## IMPORTS

In [None]:
# Arbitrary Pyramid of Imports
import os
import PIL
import cv2
import time
import pydicom
import tarfile
import subprocess
import numpy as np
import pandas as pd
from glob import glob
import nibabel as nib
from PIL import Image
import multiprocessing
import matplotlib.pyplot as plt
from scipy import ndimage as ndi

## DATAFRAMES

In [None]:
train_df = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
train_brats_ids = [f"{x:>05}" for x in sorted(train_df.BraTS21ID.to_list())]

ss_df = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")
test_brats_ids = [f"{x:>05}" for x in sorted(ss_df.BraTS21ID.to_list())]

print("... FIRST TEN TRAIN BRATSIDS ...\n")
for x in train_brats_ids[:10]: print("\t-->", x)
    
print("\n\n... FIRST TEN TEST BRATSIDS ...\n")
for x in train_brats_ids[:10]: print("\t-->", x)

## FUNCTION TO INSTALL CAPTK

In [None]:
def install_captk(package_input_dir="/kaggle/input/captk-181-installerbin", working_dir="/kaggle/working"):
    """ Function to Install the CaPTk Package
    
    Most of this code comes from the following excellent notebook
        --> https://www.kaggle.com/mpsampat/running-brats-pre-processing-pipeline
    
    Args:
        package_input_dir (str, optional): Directory containing the package bin file
        working_dir (str, optional): Working directory within the particular file system
    
    Returns:
        None; Installs the CaPTk package (prints at various stages)
        
    """
    # Install the `bc` package (requirement for CapTK)
    #     --> https://helpmanual.io/packages/apt/bc/
    print("\n\n... INSTALLING BC PACKAGE ...\n\n")
    os.system('apt install bc')
    
    print("\n\n... MOVING THE BIN PACKAGE TO WORKING DIRECTORY AND MODIFYING PERMISSIONS ...\n\n")
    !cp {os.path.join(package_input_dir, "CaPTk_1.8.1_Installer.bin")} {working_dir}
    !chmod +x {os.path.join(working_dir, "CaPTk_1.8.1_Installer.bin")}
    
    print("\n\n... INSTALLING CAPTK ...\n\n")
    !echo -e Y | {os.path.join(working_dir, "CaPTk_1.8.1_Installer.bin")}

    # The subprocess module provides a function named call. 
    #       - This function allows you to call another program
    #         wait for the command to complete 
    #         and then return the return code
    # This step is necessary because after the installer successfully finishes 
    # we will not be able to run CaPTk due to FUSE issues.
    # Therefore, we use the following command to extract the contents of the AppImage onto the hard drive
    print("\n\n... EXTRACT CONTENTS OF APPIMAGE ONTO HARD DRIVE ...\n\n")
    subprocess.call([os.path.join(working_dir, "CaPTk/1.8.1/captk"), "--appimage-extract"])
    
    # Add relevant directories to the respective paths
    print("\n\n... ADD RELEVANT PATHS TO SYSTEM PATHS ...\n\n")
    os.environ['PATH'] = os.path.join(working_dir, "squashfs-root/usr/lib:") + os.environ['PATH'] 
    os.environ['LD_LIBRARY_PATH'] = os.path.join(working_dir, "squashfs-root/usr/lib:") + os.environ['LD_LIBRARY_PATH'] 
    
    print("\n\n... SEE CAPTK COMMAND HELP [-h] ...\n\n")
    !{os.path.join(working_dir, "squashfs-root/usr/bin/BraTSPipeline")} -h
    
install_captk()

## FUNCTION TO RUN CAPTK

In [None]:
def run_captk(brats_id, 
              cmd_dir="/kaggle/working/squashfs-root/usr/bin/BraTSPipeline", 
              input_dir="/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification",
              output_dir="/kaggle/working",
              ds_split="train",
              modalities=["T1wCE", "T1w", "T2w", "FLAIR"],
              flags={'-s':'0', '-b':'0', '-i':'0', '-d':'0'}):
    """ Function to trigger the BraTS CaPTk Preprocessing Pipeline
    
    Args:
        brats_id (str): The particular BraTSID to run the pipeline on
        cmd_dir (str, optional): The location of the CaPTk execuatable we wish to run
        input_dir (str, optional): Path to the input directory containing [train|test] dicom files
        output_dir (str, optional): Path to the desired output directory
        ds_split (str, optional): Whether the BraTSID is found within the train or test split
        modalities (list of strs, optional): The way the modalities are spelled/capitalized
        flags (dict, optional): Mapping to control the optional arguments 
            for the BraTS Preprocessing Pipeline
    
    Returns:
        None; Saves the generated files to the specified output directory
    
    
    """
    # Start Timing
    t1 = time.time()
    print(f"\n... STARTING TO PREPROCESS BRATSID={brats_id} ...\n")
    
    # Make sure brats_id is the correct format
    if len(brats_id)!=5 or type(brats_id)!=str:
        brats_id = f"{brats_id:>05}"
    
    # Setup paths
    modality_base_dir = os.path.join(input_dir, ds_split, brats_id)
    output_base_dir = os.path.join(output_dir, ds_split, brats_id)
    modality_file_map = {m:os.path.join(modality_base_dir, m,
                                        sorted(os.listdir(os.path.join(modality_base_dir, m)), 
                                               key=lambda x: int(x.rsplit("-", 1)[1].split(".", 1)[0]))[0])
                         for m in modalities}

    # Make output directory if it doesn't already exist
    if not os.path.isdir(output_base_dir): os.makedirs(output_base_dir, exist_ok=True)

    # Ensure the dicom image actually exists to use for reference
    for m, f_path in modality_file_map.items(): 
        if not os.path.exists(f_path):
            print(f"\n... {f_path} does not exist for {m} modality ...\n")
            
    subprocess.call([
        cmd_dir,                            # Path to the CaPTk executable file
        '-t1c', modality_file_map["T1wCE"], # Input structural T1-weighted post-contrast image 
        '-t1', modality_file_map["T1w"],    # Input structural T1-weighted pre-contrast image
        '-t2', modality_file_map["T2w"],    # Input structural T2-weighted contrast image 
        '-fl', modality_file_map["FLAIR"],  # Input structural FLAIR contrast image
        '-o', output_base_dir,              # Output directory for final output
        '-s', flags['-s'], # [DEFAULT=0]    # Flag whether to skull strip or not (0=NO, 1=YES)          
        '-b', flags['-b'], # [DEFAULT=0]    # Flag whether to segment brain tumors or no (0=NO, 1=YES)
        '-i', flags['-i'], # [DEFAULT=0]    # Flag whether to save intermediate files (0=NO, 1=YES)
        '-d', flags['-d'], # [DEFAULT=0]    # Flag whether to print debugging information (0=NO, 1=YES)
    ])
    print(f"\n... IT TOOK {time.time()-t1:.3f} SECONDS TO COMPLETE BRATSID={brats_id}...\n")

## FUNCTION TO RUN CAPTK ON SUBSAMPLE OF DATA

In [None]:
t1 = time.time()

print("\n\n\n... STARTING TRAIN IMAGES ...\n\n\n")
# test to see if multiprocessing can benefit us here...
for b_id in train_brats_ids[:5]:
    KWARGS = dict(brats_id=b_id, ds_split="train", flags={'-s':'0', '-b':'0', '-i':'0', '-d':'0'})
    p = multiprocessing.Process(target=run_captk, kwargs=KWARGS)
    p.start()
    p.join()
print("\n\n\n... FINISHING TRAIN IMAGES ...\n\n\n")

print("\n\n\n... STARTING TEST IMAGES ...\n\n\n")
for brats_id in test_brats_ids[:5]:
    run_captk(brats_id, ds_split="test", flags={'-s':'0', '-b':'0', '-i':'0', '-d':'0'})
print("\n\n\n... FINISHING TEST IMAGES ...\n\n\n")

print(f"\n\n\n\n\nTIME TO FINISH 50 PATIENTS IS :   {time.time()-t1:.4f} ...\n\n\n\n\n")

In [None]:
# Clean Up So We Can Use the Output For Dataset Creation
!rm -rf /kaggle/working/squashfs-root
!rm -rf /kaggle/working/CaPTk
!rm -rf /kaggle/working/CaPTk_1.8.1_Installer.bin