# Debugging for making json file

In [10]:
import json
import os 
from pathlib import Path
from sklearn.model_selection import train_test_split
import random

In [6]:
def split_atlas_data(data_directory: str, train_fraction: float = 0.8, seed: int = None) -> tuple:
    """Locates and splits ATLAS 2.0 Data into training and validation lists.

    :param data_directory: absolute path to the source file. For ATLAS should be /red/ruogu.fang/atlas/decrypt/ATLAS_2
    :param train_fraction: percentage of the data split into training data
    :param seed: random seed for splitting data. If None, data will be split differently each time.
    :returns: tuple of training data, validation data. Each item in the tuple is a list of dictionaries pointing to the
        with keys 'image' and 'label', similar to how the BraTS data is treated
    """

    data_directory = Path(data_directory)
    training_dir = data_directory / 'Training'
    subdirectory = 'ses-1/anat/'

    training = []
    for record_id in training_dir.iterdir():
        for subject_id in record_id.iterdir():
            if str(subject_id).endswith('.json'):
                continue 
            scan_path = training_dir / record_id / subject_id / subdirectory
            label = f'{scan_path}/{os.path.basename(subject_id)}_ses-1_space-MNI152NLin2009aSym_label-L_desc-T1lesion_mask.nii.gz'
            image = f'{scan_path}/{os.path.basename(subject_id)}_ses-1_space-MNI152NLin2009aSym_T1w.nii.gz'

            training.append(
                {
                    'label': label,
                    'image': image
                }
            )
            
    train, val = train_test_split(training, train_size=train_fraction, random_state=seed)
    return train, val

In [9]:
split_atlas_data('/red/ruogu.fang/atlas/decrypt/ATLAS_2/')[0][0]

{'label': '/red/ruogu.fang/atlas/decrypt/ATLAS_2/Training/R003/sub-r003s014/ses-1/anat/sub-r003s014_ses-1_space-MNI152NLin2009aSym_label-L_desc-T1lesion_mask.nii.gz',
 'image': '/red/ruogu.fang/atlas/decrypt/ATLAS_2/Training/R003/sub-r003s014/ses-1/anat/sub-r003s014_ses-1_space-MNI152NLin2009aSym_T1w.nii.gz'}

## Goal
Create json file with format 
```json
{
    "training": [
        {
            "fold": 0,
            "image": [
                "TrainingData/BraTS2021_01146/BraTS2021_01146_flair.nii.gz",
                "TrainingData/BraTS2021_01146/BraTS2021_01146_t1ce.nii.gz",
                "TrainingData/BraTS2021_01146/BraTS2021_01146_t1.nii.gz",
                "TrainingData/BraTS2021_01146/BraTS2021_01146_t2.nii.gz"
            ],
            "label": "TrainingData/BraTS2021_01146/BraTS2021_01146_seg.nii.gz"
        },
        {
            "fold": 0,
            "image": [
                "TrainingData/BraTS2021_01419/BraTS2021_01419_flair.nii.gz",
                "TrainingData/BraTS2021_01419/BraTS2021_01419_t1ce.nii.gz",
                "TrainingData/BraTS2021_01419/BraTS2021_01419_t1.nii.gz",
                "TrainingData/BraTS2021_01419/BraTS2021_01419_t2.nii.gz"
            ],
            "label": "TrainingData/BraTS2021_01419/BraTS2021_01419_seg.nii.gz"
        },
        {
            "fold": 0,
            "image": [
                "TrainingData/BraTS2021_00483/BraTS2021_00483_flair.nii.gz",
                "TrainingData/BraTS2021_00483/BraTS2021_00483_t1ce.nii.gz",
                "TrainingData/BraTS2021_00483/BraTS2021_00483_t1.nii.gz",
                "TrainingData/BraTS2021_00483/BraTS2021_00483_t2.nii.gz"
            ],
            "label": "TrainingData/BraTS2021_00483/BraTS2021_00483_seg.nii.gz"
        }
    ]
}
```

In [15]:
def reasonable_format_to_bad_format(reasonable_format: str) -> str:
    """Take data from the reasonable format found in the code I made, and put it into the awful json format 
    from SwinUNETR. Note that doing this will not solve the issue I was having anyways, but thats whatever. If 
    """
    base_dir = '/red/ruogu.fang/atlas/decrypt/ATLAS_2/'
    return reasonable_format.split(base_dir)[1]

training = []
for fold in range(5):
    seed = random.randint(0, 10_000_000)
    training_images, _ = split_atlas_data('/red/ruogu.fang/atlas/decrypt/ATLAS_2/', seed=seed)
    output_json = {}
    for image_dict in training_images:
        image = reasonable_format_to_bad_format(image_dict['image'])
        label = reasonable_format_to_bad_format(image_dict['label'])
        images = [image, image]  # Duplicatae the image for 2 channel input
        training.append(
            {
                "fold": fold, 
                "image": images,
                "label": label
            }
    )

In [18]:
outdir = './jsons/'
outfile = 'atlas_2_folds_2channel.json'

os.makedirs(outdir, exist_ok=True)
with open(Path(outdir) / outfile, 'w') as json_file:
    output = {'training': training}
    json.dump(output, json_file)
