# WORC Baseline

Adaption of the WORC base tutorial for a baseline approach that uses only the first timepoint to predict the growth rate. 

In [8]:
# import neccesary packages
from WORC import BasicWORC
import os

# These packages are only used in analysing the results
import pandas as pd
import json
import fastr
import glob

# If you don't want to use your own data, we use the following example set,
# see also the next code block in this example.
# from WORC.exampledata.datadownloader import download_HeadAndNeck

# Define the folder this script is in, so we can easily find the example data
script_path = os.getcwd()
data_path = os.path.join(script_path, 'Data')

# NOTE: If on Google Colab, uncomment this line
# script_path = os.path.join(script_path, 'WORCTutorial')

# Determine whether you would like to use WORC for binary_classification,
# multiclass_classification or regression
modus = 'binary_classification'



---------------------------------------------------------------------------
Input
---------------------------------------------------------------------------

This part will first largely follow the same steps as the SimpleWORC tutorial.

Define the inputs of our network

In [9]:
# Identify our data structure: change the fields below accordingly
# if you use your own data.
imagedatadir = os.path.join(data_path, 'schwannoma')
image_file_name = 'image.nii.gz'
segmentation_file_name = 'mask.nii.gz'

# File in which the labels (i.e. outcome you want to predict) is stated
# Again, change this accordingly if you use your own data.
label_file = os.path.join(data_path, 'Examplefiles', 'WORCschwannoma.csv')

# Name of the label you want to predict
if modus == 'binary_classification':
    # Classification: predict a binary (0 or 1) label
    label_name = ['above_2mm']

elif modus == 'regression':
    # Regression: predict a continuous label
    label_name = ['growth']

elif modus == 'multiclass_classification':
    # Multiclass classification: predict several mutually exclusive binaru labels together
    label_name = ['imaginary_label_1', 'complement_label_1']

# Determine whether we want to do a coarse quick experiment, or a full lengthy
# one. Again, change this accordingly if you use your own data.
coarse = False

# Give your experiment a name
experiment_name = 'schwannoma_baseline_fine_00'

# Instead of the default tempdir, let's but the temporary output in a subfolder
# in the same folder as this script
tmpdir = os.path.join(script_path, 'WORC_' + experiment_name)
print(f"Temporary folder: {tmpdir}.")

Temporary folder: /home/lkeb-mgo1/WORCTutorial/WORC_schwannoma_baseline_fine_00.



---------------------------------------------------------------------------
The actual experiment
---------------------------------------------------------------------------

Here we will use BasicWORC. We could still use the ``..._from_this_directory`` SimpleWORC functions, but for
this tutorial we will instead directly provide the data to BasicWORC ourselves.
To this end, we need to create dictionaties, where the keys will be the sample
names (e.g. patient ID) and the values the filenames. The keys are used
to match segmentations to images, and match the files to the IDs provides in your
label file, so make sure everything corresponds.

In [10]:
# create data loader dictionaries
data_path = '/mnt/share/01_followup_cleanedup'


In [11]:
# Create a WORC object
experiment = BasicWORC(experiment_name)

# Get the image files and convert to dictionary with patient names as keys
images = glob.glob(os.path.join(imagedatadir, "*", image_file_name))
images = {f"{os.path.basename(os.path.dirname(image))}_0": image for image in images}

# We now append this dictionary to the images_train object. The
# images_from_this_directory function from SimpleWORC also appends to this object.
print(f"Images: {images}")
experiment.images_train.append(images)

# We do the same with the segmentations
segmentations = glob.glob(os.path.join(imagedatadir, "*", segmentation_file_name))
segmentations = {f"{os.path.basename(os.path.dirname(segmentation))}_0": segmentation for segmentation in segmentations} 
experiment.segmentations_train.append(segmentations)
print(f"Segmentations: {segmentations}")

print(f"Number of images: {len(images)} and segmentations: {len(segmentations)}")

experiment.labels_from_this_file(label_file)
experiment.predict_labels(label_name)


# Set the types of images WORC has to process. Used in fingerprinting
# Valid quantitative types are ['CT', 'PET', 'Thermography', 'ADC']
# Valid qualitative types are ['MRI', 'DWI', 'US']
experiment.set_image_types(['MRI'])

# Use the standard workflow for your specific modus
if modus == 'binary_classification':
    experiment.binary_classification(coarse=coarse)
elif modus == 'regression':
    experiment.regression(coarse=coarse)
elif modus == 'multiclass_classification':
    experiment.multiclass_classification(coarse=coarse)

# Set the temporary directory
experiment.set_tmpdir(tmpdir)

Debug detected: False.
BigrCluster detected: False.
SnelliusCluster detected: False.
Debug detected: False.
BigrCluster detected: False.
SnelliusCluster detected: False.
Images: {'id_20040212_0': '/home/lkeb-mgo1/WORCTutorial/Data/schwannoma/id_20040212/image.nii.gz', 'id_20040045_0': '/home/lkeb-mgo1/WORCTutorial/Data/schwannoma/id_20040045/image.nii.gz', 'id_20040372_0': '/home/lkeb-mgo1/WORCTutorial/Data/schwannoma/id_20040372/image.nii.gz', 'id_20040192_0': '/home/lkeb-mgo1/WORCTutorial/Data/schwannoma/id_20040192/image.nii.gz', 'id_20040103_0': '/home/lkeb-mgo1/WORCTutorial/Data/schwannoma/id_20040103/image.nii.gz', 'id_00046941_0': '/home/lkeb-mgo1/WORCTutorial/Data/schwannoma/id_00046941/image.nii.gz', 'id_20040355_0': '/home/lkeb-mgo1/WORCTutorial/Data/schwannoma/id_20040355/image.nii.gz', 'id_20040123_0': '/home/lkeb-mgo1/WORCTutorial/Data/schwannoma/id_20040123/image.nii.gz', 'id_20040273_0': '/home/lkeb-mgo1/WORCTutorial/Data/schwannoma/id_20040273/image.nii.gz', 'id_2004017

There are various other objects you can interact with, see https://worc.readthedocs.io/en/latest/static/user_manual.html#attributes-sources
for an overview and the function of each attribute.
    
Note: You can keep appending dictionaries to these objects here if you want to
use multiple images per patient, e.g. a T1 MRI and a T2 MRI. You should
provide matching segmentations for each of the images, as WORC extracts the features
per image-segmentation set. Except when you want to
use special workflows, e.g. use image registration, see the WORC readthedocs.


In [12]:
# The rest remains the same as in SimpleWORC
experiment.labels_from_this_file(label_file)
experiment.predict_labels(label_name)

# Set the types of images WORC has to process. Used in fingerprinting
# Valid quantitative types are ['CT', 'PET', 'Thermography', 'ADC']
# Valid qualitative types are ['MRI', 'DWI', 'US']
experiment.set_image_types(['MRI'])

# Use the standard workflow for your specific modus
if modus == 'binary_classification':
    experiment.binary_classification(coarse=coarse)
elif modus == 'regression':
    experiment.regression(coarse=coarse)
elif modus == 'multiclass_classification':
    experiment.multiclass_classification(coarse=coarse)

# Set the temporary directory
experiment.set_tmpdir(tmpdir)

Debug detected: False.


In [13]:
# adds run statistics, classification reports, ROC curves, etc.
experiment.add_evaluation()

In [14]:
# Run the experiment!
experiment.execute()

SimpleV validated: True.
MinSubjectsV validated: True.
Sample validated: True.
Label_type given is None, extracting all labels.
Label names to extract: Index(['growth', 'days', 'above_2mm'], dtype='object')
InvalidLabelsV validated: True.
Debug detected: False.
Building training network...
	 - Adding segmentix node for segmentation preprocessing.
	 - Adding preprocessing node for image preprocessing.
	 - Adding feature calculation node: predict/CalcFeatures:1.0.
	 - Adding feature calculation node: pyradiomics/Pyradiomics:1.0.
Debug detected: False.
 [INFO] networkrun:0103 >> Detected network scope: vfs://home/anaconda3/envs/WORC/lib/python3.7/site-packages/WORC
 [INFO] networkrun:0553 >> ####################################
 [INFO] networkrun:0554 >> #     network execution STARTED    #
 [INFO] networkrun:0555 >> ####################################
 [INFO] networkrun:0580 >> Running network via /home/lkeb-mgo1/anaconda3/envs/WORC/lib/python3.7/site-packages/WORC/WORC.py (last modifie

---------------------------------------------------------------------------
Analysis of results
---------------------------------------------------------------------------

There are two main outputs: the features for each patient/object, and the overall
performance. These are stored as .hdf5 and .json files, respectively. By
default, they are saved in the so-called "fastr output mount", in a subfolder
named after your experiment name.

In [67]:
# Locate output folder
outputfolder = fastr.config.mounts['output']
experiment_folder = os.path.join(outputfolder, 'WORC_' + experiment_name)

print(f"Your output is stored in {experiment_folder}.")

# Read the features for the first patient
# NOTE: we use the glob package for scanning a folder to find specific files
feature_files = glob.glob(os.path.join(experiment_folder,
                                       'Features',
                                       'features_*.hdf5'))
if len(feature_files) == 0:
    raise ValueError('No feature files found: your network has failed.')

feature_files.sort()
featurefile_p1 = feature_files[0]
features_p1 = pd.read_hdf(featurefile_p1)

# Read the overall peformance
performance_file = os.path.join(experiment_folder, 'performance_all_0.json')
if not os.path.exists(performance_file):
    raise ValueError(f'No performance file {performance_file} found: your network has failed.')
    
with open(performance_file, 'r') as fp:
    performance = json.load(fp)

# Print the feature values and names
print("Feature values from first patient:")
for v, l in zip(features_p1.feature_values, features_p1.feature_labels):
    print(f"\t {l} : {v}.")

# Print the output performance
print("\n Performance:")
stats = performance['Statistics']
for k, v in stats.items():
    print(f"\t {k} {v}.")

Your output is stored in /home/lkeb-mgo1/WORC/output/WORC_schwannoma_baseline_t2_00.


Feature values from first patient:
	 PREDICT_original_sf_compactness_avg_2.5D : 0.8765665771274658.
	 PREDICT_original_sf_compactness_std_2.5D : 0.0616710499734871.
	 PREDICT_original_sf_rad_dist_avg_2.5D : 10.666117154788761.
	 PREDICT_original_sf_rad_dist_std_2.5D : 1.075765469994343.
	 PREDICT_original_sf_roughness_avg_2.5D : 4.682288230978052.
	 PREDICT_original_sf_roughness_std_2.5D : 0.7659699362484158.
	 PREDICT_original_sf_convexity_avg_2.5D : 0.9793130164895125.
	 PREDICT_original_sf_convexity_std_2.5D : 0.030734101547315273.
	 PREDICT_original_sf_cvar_avg_2.5D : 0.012064687480414603.
	 PREDICT_original_sf_cvar_std_2.5D : 0.008045145700684512.
	 PREDICT_original_sf_prax_avg_2.5D : 0.6693416200136623.
	 PREDICT_original_sf_prax_std_2.5D : 0.15237120348410435.
	 PREDICT_original_sf_evar_avg_2.5D : 0.015263416678518339.
	 PREDICT_original_sf_evar_std_2.5D : 0.013931814355208393.
	 PREDICT_original_sf_solidity_avg_2.5D : 1.0289449037377532.
	 PREDICT_original_sf_solidity_std_2.5D 

In [68]:
experiment.add_evaluation()

In [69]:
experiment.execute()

SimpleV validated: True.
MinSubjectsV validated: True.
Sample validated: True.
Label_type given is None, extracting all labels.
Label names to extract: Index(['growth', 'days', 'above_2mm'], dtype='object')
InvalidLabelsV validated: True.
Debug detected: False.
Building training network...
	 - Adding segmentix node for segmentation preprocessing.
	 - Adding preprocessing node for image preprocessing.
	 - Adding feature calculation node: predict/CalcFeatures:1.0.


  Edge WORC_schwannoma_baseline_t2_00__calcfeatures_train_predict_CalcFeatures_1_0_MRI_0 -> WORC_schwannoma_baseline_t2_00__fingerprinter_MRI_0
Error: lost WORC_schwannoma_baseline_t2_00__fingerprinter_MRI_0 WORC_schwannoma_baseline_t2_00__calcfeatures_train_predict_CalcFeatures_1_0_MRI_0 edge


Debug detected: False.
 [INFO] networkrun:0103 >> Detected network scope: vfs://home/anaconda3/envs/WORC/lib/python3.7/site-packages/WORC
 [INFO] networkrun:0553 >> ####################################
 [INFO] networkrun:0554 >> #     network execution STARTED    #
 [INFO] networkrun:0555 >> ####################################
 [INFO] networkrun:0580 >> Running network via /home/lkeb-mgo1/anaconda3/envs/WORC/lib/python3.7/site-packages/WORC/WORC.py (last modified Thu Nov  9 19:10:56 2023)
 [INFO] networkrun:0581 >> FASTR loaded from /home/lkeb-mgo1/anaconda3/envs/WORC/lib/python3.7/site-packages/fastr
 [INFO] networkrun:0597 >> Network run tmpdir: /home/lkeb-mgo1/WORCTutorial/WORC_schwannoma_baseline_t2_00
 [INFO] networkrun:0629 >> Selecting ProcessPoolExecution as executor plugin
 [INFO] networkchunker:0146 >> Adding config_classification_sink to candidates (blocking False)
 [INFO] networkchunker:0146 >> Adding classification to candidates (blocking False)
 [INFO] networkchunker:014

**NOTE:** the performance is probably horrible, which is expected as we ran
the experiment on coarse settings. These settings are recommended to only
use for testing: see also below.
