# Script to create json file for from scratch training with our data (08.02.2024)

In [4]:
import os
import argparse
import sys
import json
import random
from collections import OrderedDict

In [5]:
class WriteTrainJSON:
    """
    Class for writing .json files to run from training from scratch, finetuning and/or the prediction of Choroid Plexus segmentations.

    """
    def __init__(self, dataroot: str=".", description=None, work_dir: str=".", train: str="."):
        """
        Initializes the class with the given parameters.

        :param dataroot: The path to the data directory. (/var/data/MONAI_Choroid_Plexus/dataset_monai)
        :param description: The description of the experiment.
        :param work_dir: The working directory. (/var/data/student_home/lia/thesis/monai_segmentation/monai_training)
        :param train: The path to the train directory.
        """
        self.dataroot = dataroot
        if description is None:
            self.description='Dataset for Choroid Plexus segmentation'
        elif isinstance(description, str):
            self.description=description
        self.work_dir = work_dir
        self.train = train
        self.file=[]

    def write_train_json(self, json_filename: str="train.json"):

        # Set path to output file
        output_folder = os.path.join(self.work_dir, 'JSON_dir')

        # Create output folder if it does not exist
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        image_dir = self.dataroot
        label_dir = os.path.join(self.dataroot, 'labels', 'final')

        filenames_image = os.listdir(image_dir)
        filenames_image.sort()
        print("Before removing labels direcotry from list",filenames_image)
        # Check if the label directory is in filenames_image and remove it from the list of filenames
        if 'labels' in filenames_image:
            filenames_image.remove('labels')
        print(filenames_image)

        filenames_label = os.listdir(label_dir)
        filenames_label.sort()   

        image_paths = [os.path.join(image_dir, filename) for filename in filenames_image]
        label_paths = [os.path.join(label_dir, filename) for filename in filenames_label]

        if len(filenames_image)!=len(filenames_label):
                raise ValueError("The number of images and the number of labels is different. Please, check image_Tr and label_Tr folders.")
        
        
        # create json file - manually set

        json_dict = OrderedDict()
        json_dict['name'] = "MRI Dataset - Choroid Plexus Segmentation" 
        json_dict['description'] = self.description
        json_dict['tensorImageSize'] = "3D"
        json_dict['modality'] = {
            "0": "MR"
        }
            
        json_dict['labels'] = {
            "0": "background",
            "1": "Choroid Plexus"
        }

        json_dict['numTraining'] = len(image_paths)

        json_dict['training'] = [{"fold": 0, "image": '%s' %i , "label": '%s' %j} for j, i in zip(label_paths, image_paths)]

        random.seed(42)
        random.shuffle(json_dict["training"])

        # Split training data into N random folds
        num_folds = 5
        fold_size = len(json_dict["training"]) // num_folds
        for i in range(num_folds):
            for j in range(fold_size):
                json_dict["training"][i * fold_size + j]["fold"] = i

        with open(os.path.join(output_folder, json_filename), 'w') as f:
                json.dump(json_dict, f, indent=4, sort_keys=True)





In [6]:
class MyParser(argparse.ArgumentParser):
    def error(self, message):
        sys.stderr.write('error: %s\n' % message)
        self.print_help()
        sys.exit(2)

# Main
if __name__ == '__main__':
    print('Starting launching_tool :)')

    # Initialize the parser
    parser = argparse.ArgumentParser(
        description="Pipeline for training selected model from scratch or finetuning with N subjects with selected pretrained models"
    )

    # Add the parameters positional/optional
    parser.add_argument('--dataroot', required=True, default="/var/data/MONAI_Choroid_Plexus/dataset_train_from_scratch_monai" , help="Data directory. Where the data is stored")
    parser.add_argument('--description', required=False, help="Data description")
    parser.add_argument('--work_dir', required=True, help="working directory")
    parser.add_argument('--training_dir', required=False, help="Working directory where to save trained models. If not specified, default folder name and locations will be used")
    parser.add_argument('--train_json', required=False, default="train.json", help="Name of the train.json file")
    # Parse the arguments
    args = parser.parse_args()
    print(args)
 
    print('Writing JSON file for training.....')
    json_file=WriteTrainJSON(args.dataroot, args.description, args.work_dir, args.training_dir).write_train_json(json_filename=args.train_json)



Starting launching_tool :)


usage: ipykernel_launcher.py [-h] --dataroot DATAROOT
                             [--description DESCRIPTION] --work_dir WORK_DIR
                             [--training_dir TRAINING_DIR]
                             [--train_json TRAIN_JSON]
ipykernel_launcher.py: error: the following arguments are required: --dataroot, --work_dir


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [7]:
json_file=WriteTrainJSON("/var/data/MONAI_Choroid_Plexus/dataset_train_from_scratch_monai", work_dir = "/var/data/student_home/lia/thesis/monai_segmentation/monai_training").write_train_json()

Before removing labels direcotry from list ['0_ChP.nii.gz', '10_ChP.nii.gz', '11_ChP.nii.gz', '12_ChP.nii.gz', '13_ChP.nii.gz', '14_ChP.nii.gz', '15_ChP.nii.gz', '16_ChP.nii.gz', '17_ChP.nii.gz', '18_ChP.nii.gz', '19_ChP.nii.gz', '1_ChP.nii.gz', '20_ChP.nii.gz', '21_ChP.nii.gz', '22_ChP.nii.gz', '23_ChP.nii.gz', '24_ChP.nii.gz', '25_ChP.nii.gz', '26_ChP.nii.gz', '28_ChP.nii.gz', '29_ChP.nii.gz', '2_ChP.nii.gz', '3_ChP.nii.gz', '4_ChP.nii.gz', '5_ChP.nii.gz', '6_ChP.nii.gz', '7_ChP.nii.gz', '8_ChP.nii.gz', '9_ChP.nii.gz', 'labels']
['0_ChP.nii.gz', '10_ChP.nii.gz', '11_ChP.nii.gz', '12_ChP.nii.gz', '13_ChP.nii.gz', '14_ChP.nii.gz', '15_ChP.nii.gz', '16_ChP.nii.gz', '17_ChP.nii.gz', '18_ChP.nii.gz', '19_ChP.nii.gz', '1_ChP.nii.gz', '20_ChP.nii.gz', '21_ChP.nii.gz', '22_ChP.nii.gz', '23_ChP.nii.gz', '24_ChP.nii.gz', '25_ChP.nii.gz', '26_ChP.nii.gz', '28_ChP.nii.gz', '29_ChP.nii.gz', '2_ChP.nii.gz', '3_ChP.nii.gz', '4_ChP.nii.gz', '5_ChP.nii.gz', '6_ChP.nii.gz', '7_ChP.nii.gz', '8_ChP.nii.

In [3]:
os.listdir('/var/data/MONAI_Choroid_Plexus/dataset_train_from_scratch_monai')

['2_ChP.nii.gz',
 '29_ChP.nii.gz',
 '20_ChP.nii.gz',
 '7_ChP.nii.gz',
 '14_ChP.nii.gz',
 '18_ChP.nii.gz',
 '3_ChP.nii.gz',
 'labels',
 '25_ChP.nii.gz',
 '22_ChP.nii.gz',
 '9_ChP.nii.gz',
 '12_ChP.nii.gz',
 '8_ChP.nii.gz',
 '13_ChP.nii.gz',
 '23_ChP.nii.gz',
 '17_ChP.nii.gz',
 '19_ChP.nii.gz',
 '16_ChP.nii.gz',
 '24_ChP.nii.gz',
 '0_ChP.nii.gz',
 '28_ChP.nii.gz',
 '4_ChP.nii.gz',
 '26_ChP.nii.gz',
 '11_ChP.nii.gz',
 '21_ChP.nii.gz',
 '15_ChP.nii.gz',
 '1_ChP.nii.gz',
 '5_ChP.nii.gz',
 '10_ChP.nii.gz',
 '6_ChP.nii.gz']