In [7]:
import os
import json

def specific_split_json_file(dataset_name):
    dataset_folder = os.path.join("/home/sastocke/nnUNet/nnUNet_raw", dataset_name, "imagesTr")
    all_files = [f.split("_000")[0] for f in os.listdir(dataset_folder) if f.endswith(".nii.gz")]

    # Define all volunteers (excluding Volunteer 03)
    volunteers = [f"{i:02}" for i in range(1, 13) if i != 3]  # '01' to '12', excluding '03'

    # Select the first 5 volunteers for validation
    validation_volunteers = volunteers[:5]  # ['01', '02', '04', '05', '06']

    splits = []
    for val_vol in validation_volunteers:
        # Training set: all volunteers except the validation volunteer and Volunteer 03
        train_vols = [v for v in volunteers if v != val_vol]
        train_ids = [f for f in all_files if any(f"Volunteer_{v}_" in f for v in train_vols)]

        #For disco and dirvsavg differences, only want to valiate on dirvsaverages
        val_ids = [f for f in all_files if f.startswith("Hannum_Volunteer_") and f"Volunteer_{val_vol}_" in f and f'r0' in f]

        # Ensure filenames are clean
        train_ids = [f.replace("__", "_") for f in train_ids]
        val_ids = [f.replace("__", "_") for f in val_ids]

        splits.append({"train": train_ids, "val": val_ids})

    # Save the splits_final.json file
    output_path = os.path.join("/home/sastocke/nnUNet/nnUNet_preprocessed", dataset_name, "splits_final.json")
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    with open(output_path, "w") as f:
        json.dump(splits, f, indent=4)

    print(f"Splits saved to {output_path}")

# Example usage
dataset_name = 'Dataset060_DiscoandDirvsAvgHannumLV'
specific_split_json_file(dataset_name)


Splits saved to /home/sastocke/nnUNet/nnUNet_preprocessed/Dataset060_DiscoandDirvsAvgHannumLV/splits_final.json


In [8]:
import json

# Path to splits_final.json
split_file_path = "/home/sastocke/nnUNet/nnUNet_preprocessed/Dataset060_DiscoandDirvsAvgHannumLV/splits_final.json"

# Load and inspect the splits
with open(split_file_path, "r") as f:
    splits = json.load(f)

print(f"Number of splits: {len(splits)}")

# Optionally, print details for each split
for idx, split in enumerate(splits):
    print(f"Split {idx + 1}:")
    print(f"  Train: {len(split['train'])} samples")
    print(f"  Val: {len(split['val'])} samples")


Number of splits: 5
Split 1:
  Train: 2976 samples
  Val: 92 samples
Split 2:
  Train: 3056 samples
  Val: 80 samples
Split 3:
  Train: 3104 samples
  Val: 64 samples
Split 4:
  Train: 3008 samples
  Val: 88 samples
Split 5:
  Train: 3136 samples
  Val: 52 samples


In [24]:
import json

# Paths to split files
old_split_path = "/home/sastocke/nnUNet/nnUNet_preprocessed/Dataset050_DataAugAllSpecifcNormLVOnly/old_splits_final.json"  # Replace with the path of your 11-split file
new_split_path = "/home/sastocke/nnUNet/nnUNet_preprocessed/Dataset050_DataAugAllSpecifcNormLVOnly/splits_final.json"

# Load and print splits
with open(old_split_path, "r") as f:
    old_splits = json.load(f)

with open(new_split_path, "r") as f:
    new_splits = json.load(f)

print(f"Old splits: {len(old_splits)}")
for i, split in enumerate(old_splits):
    print(f"Old Split {i + 1}: Train {len(split['train'])}, Val {len(split['val'])}")

print("\nNew splits: {len(new_splits)}")
for i, split in enumerate(new_splits):
    print(f"New Split {i + 1}: Train {len(split['train'])}, Val {len(split['val'])}")


Old splits: 11
Old Split 1: Train 2592, Val 368
Old Split 2: Train 2640, Val 320
Old Split 3: Train 2704, Val 256
Old Split 4: Train 2608, Val 352
Old Split 5: Train 2752, Val 208
Old Split 6: Train 2736, Val 224
Old Split 7: Train 2720, Val 240
Old Split 8: Train 2704, Val 256
Old Split 9: Train 2752, Val 208
Old Split 10: Train 2704, Val 256
Old Split 11: Train 2688, Val 272

New splits: {len(new_splits)}
New Split 1: Train 2592, Val 368
New Split 2: Train 2640, Val 320
New Split 3: Train 2704, Val 256
New Split 4: Train 2608, Val 352
New Split 5: Train 2752, Val 208
