In [2]:
from pathlib import Path
import tarfile
import os
import subprocess
import shlex
import json

from curriculum_deeplab.utils import get_script_dir
THIS_SCRIPT_DIR = get_script_dir()

In [5]:
# Download slic3er:
# https://download.slicer.org/

URL_SLICER_LINUX = "https://slicer-packages.kitware.com/api/v1/item/60add706ae4540bf6a89bf98/download"
tool_path = Path(THIS_SCRIPT_DIR, "tmp/tools")
slicer_tar_path = Path(tool_path, "slicer.tar.gz")
slicer_tar_path.parent.mkdir(parents=True, exist_ok=True)
!wget {URL_SLICER_LINUX} -O {slicer_tar_path}

### Download TCIA data

In [20]:
# Download TCIA manifest file 
# https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=70229053
TCIA_DATA_DIR = Path(THIS_SCRIPT_DIR, "tmp/tcia_data")
TCIA_DATA_CONVENIENT_DIR = Path(THIS_SCRIPT_DIR, "tmp/tcia_data_convenient")
TCIA_DATA_BASIC_PREPROCESSING_DIR = Path(THIS_SCRIPT_DIR, "tmp/tcia_data_preprocessed")
CROSSMODA_DATALOADER_DIR = Path(THIS_SCRIPT_DIR, "tmp/crossmoda_data_dataloader")

tcia_file_path = TCIA_DATA_DIR.joinpath("VS.tcia")
TCIA_DATA_DIR.mkdir(parents=True, exist_ok=True)
TCIA_DATA_CONVENIENT_DIR.mkdir(parents=True, exist_ok=True)
TCIA_DATA_BASIC_PREPROCESSING_DIR.mkdir(parents=True, exist_ok=True)
TCIA_DATALOADER_DIR.mkdir(parents=True, exist_ok=True)

URL_TCIA = "https://wiki.cancerimagingarchive.net/download/attachments/70229053/Vestibular-Schwannoma-SEG%20Feb%202021%20manifest.tcia?api=v2"

!wget {URL_TCIA} -O {tcia_file_path}

zsh:1: no matches found: https://wiki.cancerimagingarchive.net/download/attachments/70229053/Vestibular-Schwannoma-SEG%20Feb%202021%20manifest.tcia?api=v2


In [28]:
# https://wiki.cancerimagingarchive.net/display/Public/NBIA+Advanced+REST+API+Guide
import re
import zipfile
series_pattern = re.compile(r"^[0-9\.]+$")
def get_list_of_series(tcia_manifest_file_path):
    series_ids = []
    with open(tcia_manifest_file_path, 'r') as tcia_manifest_file:
        for line in tcia_manifest_file:
            for match in re.finditer(series_pattern, line.strip()):
                series_ids.append(match.group())
    return series_ids

def request_token(username, password, client_id, client_secret):
    request_token_address = f"'username={username}&password={password}&client_id={client_id}&client_secret={client_secret}&grant_type=password'"
    
    query_output = subprocess.check_output(shlex.split(f"curl -s -d {request_token_address} -X POST -k 'https://services.cancerimagingarchive.net/nbia-api/oauth/token'"))
    try:
        token = json.loads(query_output)['access_token']
    except (json.JSONDecodeError, KeyError):
        raise ValueError("TCIA token could not be fetched. Please check your credentials.")

    return token

def get_metadata(token, series_id):
    rest_download_address = f"https://services.cancerimagingarchive.net/nbia-api/services/v1/getSeriesMetaData?SeriesInstanceUID={series_id}"
    authorization_request = f'"Authorization:Bearer {token}"'
    query_output = subprocess.check_output(shlex.split(f"curl -s -k {rest_download_address}"))

    try:
        metadata = json.loads(query_output)[0]
    except (json.JSONDecodeError):
        return None

    return metadata
    # !curl -k {rest_download_address} --output {output_path}

def download_restricted_data(token, series_id, output_file_path):
    rest_download_address = f"https://services.cancerimagingarchive.net/nbia-api/services/v1/getImage?SeriesInstanceUID={series_id}"
    output_file_path.parent.mkdir(parents=True, exist_ok=True)
    authorization_request = f'"Authorization:Bearer {token}"'
    !curl -H {authorization_request} -k {rest_download_address} --output {output_file_path}

# https://wiki.cancerimagingarchive.net/plugins/personalaccesstokens/usertokens.action
all_series_ids = [get_list_of_series(tcia_file_path)[0]]
# all_series_ids = ["1.3.6.1.4.1.9590.100.1.2.374115997511889073021386151921807063992"]

token = request_token(input("Enter TCIA username"), input("Enter TCIA password"), input("Enter TCIA client_id from helpdesk"), input("Enter TCIA client_secret from helpdesk"))

for sid in all_series_ids:
    metadata = get_metadata(token, sid)
    if not metadata: continue

    zip_data_path = tcia_data_path.joinpath(metadata['Subject ID'] + ".zip")
    try:
        with zipfile.ZipFile(zip_data_path) as zf:
            pass
    except (FileNotFoundError, zipfile.BadZipfile):
        download_restricted_data(token, sid, zip_data_path)
    

ValueError: TCIA token could not be fetched. Please check your credentials.

In [51]:
!curl -d "username=nbia_guest" -X POST -k https://services.cancerimagingarchive.net/nbia-api/oauth/token

{"error":"unauthorized","error_description":"An Authentication object was not found in the SecurityContext"}

### Preprocessing step 1: Run VS_Seg convenient filenames preprocessing

In [18]:
CONVENIENT_SCRIPT = Path(THIS_SCRIPT_DIR, "./tools/VS_Seg/preprocessing/TCIA_data_convert_into_convenient_folder_structure.py")
!{CONVENIENT_SCRIPT} --input {TCIA_DATA_DIR} --output {TCIA_DATA_CONVENIENT_DIR}

zsh:1: permission denied: /Users/christianweihsbach/code/curriculum_deeplab/curriculum_deeplab/preprocessing/tools/VS_Seg/preprocessing/TCIA_data_convert_into_convenient_folder_structure.py


### Preprocessing step 2: Run VS_Seg Slicer.org preprocessing

In [5]:
# Build Slicer.org docker
build_dir = Path(THIS_SCRIPT_DIR, "tools/SlicerDockers")
subprocess.call(build_dir.joinpath("build.sh"), cwd=build_dir)

#1 [internal] load build definition from Dockerfile
#1 sha256:095a2df15bc08459aa6ccb273d84699c70689e2dab96546cfa81ba4bc2bff98e
#1 transferring dockerfile: 37B 0.0s done
#1 DONE 0.0s

#2 [internal] load .dockerignore
#2 sha256:728344ba77264fc9f27da86c811ee5e48c0ff8331e62c778dda09c5e8401d6f1
#2 transferring context: 2B done
#2 DONE 0.0s

#3 [internal] load metadata for docker.io/stevepieper/x11:latest
#3 sha256:c0789341f0873bc22b92299d0986138851e1fdf9c0d4ad5eb04f3910bd2a78a2
#3 DONE 1.7s

#4 [ 1/23] FROM docker.io/stevepieper/x11:latest@sha256:bcf1410f89f11e3c8693fa2e8d6b352da6c1938bf520895cf1d6340188d37999
#4 sha256:de6b7259924fadff7cfb60df7a5b7074f93c5170c4a27b26f8682a1d0806eb21
#4 DONE 0.0s

#15 [internal] load build context
#15 sha256:95290becd431d80f48c6db54fb182a3fd541ffc7056a7c29383e9b27927078c2
#15 transferring context: 1.82kB done
#15 DONE 0.0s

#16 [12/23] COPY /etc /etc
#16 sha256:0233c2edfddb2a21ccba409e88196a3c8351e9ccaeb2a705f62566bbeba9f3d2
#16 CACHED

#21 [17/23] COPY /ho


Use 'docker scan' to run Snyk tests against images to find vulnerabilities and learn how to fix them


In [21]:
SHARED_DIR = Path(THIS_SCRIPT_DIR, "tools/docker_shared")
SHARED_INPUT_DIR = TCIA_DATA_CONVENIENT_DIR
SHARED_OUTPUT_DIR = TCIA_DATA_BASIC_PREPROCESSING_DIR

SLICER_PREPROCESSING_SCRIPT_PATH = Path(THIS_SCRIPT_DIR, "./tools/VS_Seg/preprocessing/data_conversion.py")
!cp {SLICER_PREPROCESSING_SCRIPT_PATH} {SHARED_DIR}

# Run built docker file
docker_cmd = f'docker run -v {shared_dir}:/tmp/shared -v {SHARED_INPUT_DIR}:/tmp/shared_input -v {SHARED_OUTPUT_DIR}:/tmp/shared_output -p 8080:8080 --rm -it deep_staple/slicer'
!{docker_cmd}

2022-04-03 21:35:25,843 CRIT Supervisor running as root (no user in config file)
2022-04-03 21:35:25,845 INFO Included extra file "/etc/supervisord.d/easydav.conf" during parsing
2022-04-03 21:35:25,845 INFO Included extra file "/etc/supervisord.d/gotty.conf" during parsing
2022-04-03 21:35:25,845 INFO Included extra file "/etc/supervisord.d/nginx.conf" during parsing
2022-04-03 21:35:25,846 INFO Included extra file "/etc/supervisord.d/websockify.conf" during parsing
2022-04-03 21:35:25,846 INFO Included extra file "/etc/supervisord.d/x11vnc.conf" during parsing
2022-04-03 21:35:25,846 INFO Included extra file "/etc/supervisord.d/xdummy.conf" during parsing
2022-04-03 21:35:25,846 INFO Included extra file "/etc/supervisord.d/xinitrc.conf" during parsing
2022-04-03 21:35:25,849 INFO Creating socket unix:///var/run/easydav.sock
2022-04-03 21:35:25,850 INFO Closing socket unix:///var/run/easydav.sock
2022-04-03 21:35:25,869 INFO RPC interface 'supervisor' initialized
2022-04-03 21:35:25,8

### Preprocessing step 3: Prepare dataloader file structure

In [None]:
TCIA_TO_CROSSMODA_SCRIPT = Path(THIS_SCRIPT_DIR, "./tools/dataloader_preparation/tcia_to_crossmoda.py")

!{TCIA_TO_CROSSMODA_SCRIPT} --input {TCIA_DATA_BASIC_PREPROCESSING_DIR} --output {CROSSMODA_DATALOADER_DIR}

### Preprocessing step 4: Add dataloader data levels l1 to l4

In [None]:
BUILD_LEVELS_SCRIPT = Path(THIS_SCIPTS_DIR, "build_levels.py")

!{BUILD_LEVELS_SCRIPT} --input {}