## Creating the JSON file

In [1]:
import pathlib
from typing import Callable, List, Dict
import random
import json

BASE_PATH = pathlib.Path("/red/ruogu.fang/VALDO")


def get_label_filter_for_task(task: int) -> Callable[[pathlib.Path], bool]:
    filters = {1: lambda path: path.name.endswith('Seg.nii.gz'), 2: lambda path: path.name.endswith('CMB.nii.gz')}
    if task in filters:
        return filters[task]
    return lambda path: 'Rater' in path.name
        
def get_image_filter_for_task(task: int) -> Callable[[pathlib.Path], bool]:
    filters = {1: lambda path: not 'Region' in path.name and path.suffix != ".csv"}
    if task in filters:
        return filters[task]
    return lambda path: True

def create_fold_data(task: int) -> List[Dict[str, List[str]]]:
    data = []
    task_folder = BASE_PATH / f"Task{task}"
    label_filter = get_label_filter_for_task(task)
    image_filter = get_image_filter_for_task(task)
    for patient_folder in task_folder.iterdir():
        labels = []
        images = []
        for file in patient_folder.iterdir():
            if label_filter(file):
                labels.append(str(file))
            elif image_filter(file):
                images.append(str(file))
        if labels:          
            data.append({'image': images, 'label': labels})
    return data


def create_cross_validation_data():
    cross_validation_data = {}
    for task in range(1, 4):
        task_key = f'Task_{task}'
        cross_validation_data[task_key] = {}
        for fold in range(5):
            fold_key = str(fold)
            fold_data = create_fold_data(task)
            random.shuffle(fold_data)  
            split_index = int(0.8 * len(fold_data))
            training_data = fold_data[:split_index]
            validation_data = fold_data[split_index:]
            cross_validation_data[task_key][fold_key] = {
                'training': training_data,
                'validation': validation_data
            }
    return cross_validation_data

In [2]:
json_data = create_cross_validation_data()

In [3]:
with open('valdo_data_folds.json', 'w') as f:
    json.dump(json_data, f, indent=4)

print("JSON file created successfully.")

JSON file created successfully.


# Datafold Read

In [22]:
def datafold_read(datalist: str, task: str, fold: str, first_reviewer_only: bool = True):
    with open(datalist) as f:
        json_data = json.load(f)

    task_files = json_data[task]
    fold_data = task_files[fold]
    train, val = fold_data['training'], fold_data['validation']
    
    if first_reviewer_only:
        train, val = remove_alternative_reviewers(train), remove_alternative_reviewers(val)
    return train, val


def remove_alternative_reviewers(dataset: list[dict]):
    for subject in dataset:
        subject['label'] = [subject['label'][0]]
    return dataset

In [9]:
test_list = [1, 2]

In [10]:
test_list.pop()

2

In [11]:
test_list

[1]