In [None]:
from pathlib import Path
import tarfile
import os
import stat
import subprocess
import shlex
import json

from deep_staple.utils.common_utils import get_script_dir
THIS_SCRIPT_DIR = get_script_dir(Path("./fetch_dataset.ipynb"))

In [None]:
# Constants and preparation
TMP_DIR = Path(THIS_SCRIPT_DIR, "tmp")
TCIA_DATA_DIR = Path(THIS_SCRIPT_DIR, "tmp/tcia_data")
TCIA_DATA_CONVENIENT_DIR = Path(THIS_SCRIPT_DIR, "tmp/tcia_data_convenient")
TCIA_DATA_BASIC_PREPROCESSING_DIR = Path(THIS_SCRIPT_DIR, "tmp/tcia_data_preprocessed")
CROSSMODA_DATALOADER_DIR = Path(THIS_SCRIPT_DIR, "tmp/crossmoda_data_dataloader")

tcia_file_path = TCIA_DATA_DIR.joinpath("VS.tcia")
tcia_contours_path = TCIA_DATA_DIR.joinpath("Vestibular-Schwannoma-SEG_contours_Mar_2021.zip")
# tcia_reg_matrices_path = TCIA_DATA_DIR.joinpath("Vestibular-Schwannoma-SEG_matrices_Mar_2021.zip")
TCIA_DATA_DIR.mkdir(parents=True, exist_ok=True)
TCIA_DATA_CONVENIENT_DIR.mkdir(parents=True, exist_ok=True)
TCIA_DATA_BASIC_PREPROCESSING_DIR.mkdir(parents=True, exist_ok=True)
CROSSMODA_DATALOADER_DIR.mkdir(parents=True, exist_ok=True)

URL_TCIA = '"https://wiki.cancerimagingarchive.net/download/attachments/70229053/Vestibular-Schwannoma-SEG%20Feb%202021%20manifest.tcia?api=v2"'
URL_TCIA_CONTOURS = '"https://wiki.cancerimagingarchive.net/download/attachments/70229053/Vestibular-Schwannoma-SEG%20contours%20Mar%202021.zip?api=v2"'
# URL_TCIA_REGISTION_MATRICES = '"https://wiki.cancerimagingarchive.net/download/attachments/70229053/Vestibular-Schwannoma-SEG_matrices%20Mar%202021.zip?api=v2"'

### Download TCIA data

In [None]:
URL_NBIA_DEB = "https://cbiit-download.nci.nih.gov/nbia/releases/ForTCIA/NBIADataRetriever_4.2/nbia-data-retriever-4.2.deb"
nbia_deb_path = Path(THIS_SCRIPT_DIR, "tools/nbia/nbia-data-retriever-4.2.deb")
nbia_bin_path = Path(nbia_deb_path.parent, "opt/nbia-data-retriever/nbia-data-retriever")

In [None]:
# Download and extract nbia data retriever

nbia_deb_path.parent.mkdir(parents=True, exist_ok=True)
!curl {URL_NBIA_DEB} -o {nbia_deb_path}

os.chdir(nbia_deb_path.parent)
!ar -x {nbia_deb_path}
!tar -xf data.tar.xz
os.chdir(THIS_SCRIPT_DIR)


In [None]:
# Download TCIA manifest file 
# https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=70229053
tcia_file_path.parent.mkdir(exist_ok=True, parents=True)
!curl -k {URL_TCIA} -o {tcia_file_path}
!curl -k {URL_TCIA_CONTOURS} -o {tcia_contours_path}
# !curl {URL_TCIA_REGISTION_MATRICES} -o {tcia_reg_matrices_path}

In [None]:
# Download TCIA VS data 
# https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=70229053
nbia_libs_path = Path(nbia_deb_path.parent, "opt/nbia-data-retriever/runtime/lib")
if str(nbia_libs_path) not in os.environ['LD_LIBRARY_PATH']:
    os.environ['LD_LIBRARY_PATH'] = f"{os.environ['LD_LIBRARY_PATH']}:{nbia_libs_path}"
!echo "Y" | {nbia_bin_path} --cli {tcia_file_path} -d {TCIA_DATA_DIR} -u {input("username")} -p {input("password")}

### Preprocessing step 1: Run VS_Seg convenient filenames preprocessing

In [None]:
CONVENIENT_SCRIPT = Path(THIS_SCRIPT_DIR, "./tools/VS_Seg/preprocessing/TCIA_data_convert_into_convenient_folder_structure.py")
tcia_data_entrypoint = Path(TCIA_DATA_DIR, "VS/Vestibular-Schwannoma-SEG")
!python {CONVENIENT_SCRIPT} --input {tcia_data_entrypoint} --output {TCIA_DATA_CONVENIENT_DIR}

In [None]:
import zipfile

# Extract contours
with zipfile.ZipFile(tcia_contours_path, 'r') as contours_file:
    contours_file.extractall(tcia_contours_path.parent)

# Merge data and contours directories
contours_trailing_path = str(Path(tcia_contours_path.parent, "contours")) + "/"
tcia_convenient_data_trailing_path = str(Path(TCIA_DATA_CONVENIENT_DIR)) + "/"
!rsync -a {contours_trailing_path} {tcia_convenient_data_trailing_path}

### Preprocessing step 2: Run VS_Seg Slicer.org preprocessing

In [None]:
# Build Slicer.org docker
build_dir = Path(THIS_SCRIPT_DIR, "tools/SlicerDockers")
build_file = build_dir.joinpath("build.sh")
# Add +x permission
st = os.stat(build_file)
os.chmod(build_file, st.st_mode | stat.S_IEXEC)

subprocess.call(build_file, cwd=build_dir)

In [89]:
SHARED_DIR = Path(THIS_SCRIPT_DIR, "tools/docker_shared")
SHARED_INPUT_DIR = TCIA_DATA_CONVENIENT_DIR
SHARED_OUTPUT_DIR = TCIA_DATA_BASIC_PREPROCESSING_DIR

SLICER_PREPROCESSING_SCRIPT_PATH = Path(THIS_SCRIPT_DIR, "./tools/VS_Seg/preprocessing/data_conversion.py")
!cp {SLICER_PREPROCESSING_SCRIPT_PATH} {SHARED_DIR}

# Run docker container and script. 
# Running this script will take time. Connect to http://localhost:8080/ to see process
# Run built docker file

docker_cmd = f'docker run --platform linux/x86_64 -v {SHARED_DIR}:/tmp/shared -v {SHARED_INPUT_DIR}:/tmp/shared_input -v {SHARED_OUTPUT_DIR}:/tmp/shared_output -p 8080:8080 --env SLICER_ARGUMENTS="--python-script /tmp/shared/data_conversion.py --input-folder /tmp/shared_input --output-folder /tmp/shared_output --export_all_structures" --rm -it deep_staple/slicer'
!{docker_cmd}

### Preprocessing step 3: Prepare dataloader file structure

In [None]:
TCIA_TO_CROSSMODA_SCRIPT = Path(THIS_SCRIPT_DIR, "./tools/dataloader_preparation/tcia_to_crossmoda.py")

run_cmd = f"{TCIA_TO_CROSSMODA_SCRIPT} --input {TCIA_DATA_BASIC_PREPROCESSING_DIR} --output {CROSSMODA_DATALOADER_DIR}"
!python {run_cmd}

### Preprocessing step 4: Add dataloader data levels L2 to L4

In [None]:
BUILD_LEVELS_SCRIPT = Path(THIS_SCRIPT_DIR, "tools/dataloader_preparation/build_levels.py")
COCHLEA_CENTERS_PATH = Path(THIS_SCRIPT_DIR, "tools/dataloader_preparation/l3_cochlea_centers.pth")

!python {BUILD_LEVELS_SCRIPT} --input {CROSSMODA_DATALOADER_DIR} --cochlea-centers {COCHLEA_CENTERS_PATH} --device cpu

### Step 5: Copy and clean

In [None]:
DATASET_TARGET = Path(THIS_SCRIPT_DIR, "../../data/crossmoda_dataset/").resolve()
DATASET_TARGET.mkdir(exist_ok=True, parents=True)
!cp -r {CROSSMODA_DATALOADER_DIR}/* {DATASET_TARGET}

In [None]:
TMP_DIR
# Do you really want to clean the temp dir?
# !rm -r {TMP_DIR}