# Imports

In [None]:
import numpy as np 
import pandas as pd 
import os
import shutil
import json
import glob


!pip install nnunetv2

# Environment Setup and Directory Creation

In [None]:
os.environ['nnUNet_raw_data_base'] = '/kaggle/working/nnUNet_raw_data_base'
os.environ['nnUNet_preprocessed'] = '/kaggle/working/nnUNet_preprocessed'

# 3. Create necessary directories
!mkdir -p $nnUNet_raw_data_base/nnUNet_raw/Dataset001_FCDLesions
!mkdir -p $nnUNet_preprocessed

print("nnUNet environment setup complete.")

In [None]:

UPLOADED_DATASET_NAME = 'participants' 
CSV_DATASET_ROOT = f'/kaggle/input/{UPLOADED_DATASET_NAME}'

print(f"Listing files in the root of your uploaded metadata dataset ({CSV_DATASET_ROOT}):")
if os.path.exists(CSV_DATASET_ROOT):
    # This will print the actual file name (e.g., ['participants.csv'] or ['participants.xlsx'])
    print(os.listdir(CSV_DATASET_ROOT))
else:
    print(f"Error: The dataset root {CSV_DATASET_ROOT} was not found. Please verify the uploaded dataset name.")

# Data Conversion and dataset.json Generation

In [None]:

# --- FINAL PATHS AND NAMES ---
BASE_DIR = '/kaggle/input/organized-bonn-fcd-ii-epilepsy-mri-dataset/bonn_fcd_fixed' 
UPLOADED_DATASET_NAME = 'participants' 
EXCEL_FILE_NAME = 'participants.xlsx' 
EXCEL_PATH = os.path.join('/kaggle/input', UPLOADED_DATASET_NAME, EXCEL_FILE_NAME)
# ---------------------------------------------

TASK_ID = 1
TASK_NAME = 'Dataset001_FCDLesions' 
NNUNET_RAW_DATA_DIR = os.path.join(os.environ['nnUNet_raw_data_base'], 'nnUNet_raw', TASK_NAME) 

# Create target directories
IMAGES_TR_DIR = os.path.join(NNUNET_RAW_DATA_DIR, 'imagesTr')
LABELS_TR_DIR = os.path.join(NNUNET_RAW_DATA_DIR, 'labelsTr')
IMAGES_TS_DIR = os.path.join(NNUNET_RAW_DATA_DIR, 'imagesTs')

os.makedirs(IMAGES_TR_DIR, exist_ok=True)
os.makedirs(LABELS_TR_DIR, exist_ok=True)
os.makedirs(IMAGES_TS_DIR, exist_ok=True)

# --- 1. Load and Filter the Excel Data ---
print(f"Attempting to read Excel file from: {EXCEL_PATH}")

try:
    participants_df = pd.read_excel(EXCEL_PATH, sheet_name='participants')
except ValueError:
    xls = pd.ExcelFile(EXCEL_PATH)
    print(f"Available sheets: {xls.sheet_names}")
    participants_df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
except Exception as e:
    print(f"FATAL ERROR: Could not read Excel file. Please verify the EXCEL_PATH: {EXCEL_PATH}. Error: {e}")
    raise

# Ensure required columns exist
required_columns = {'participant_id', 'group', 'split'}
missing_columns = required_columns - set(participants_df.columns)
if missing_columns:
    raise Exception(f"Missing required columns in Excel file: {missing_columns}")

# Filter FCD subjects for training and testing
train_fcd_subjects = participants_df[
    (participants_df['group'].str.lower() == 'fcd') & 
    (participants_df['split'].str.lower() == 'train')
]['participant_id'].tolist()

test_fcd_subjects = participants_df[
    (participants_df['group'].str.lower() == 'fcd') & 
    (participants_df['split'].str.lower() == 'test')
]['participant_id'].tolist()

print(f"Total FCD subjects for TRAINING: {len(train_fcd_subjects)}")
print(f"Total FCD subjects for TESTING: {len(test_fcd_subjects)}")

training_files = []
test_files = []
skipped_subjects = []

# --- Helper Function to Process Subjects ---
def process_subject(subject_id, target_images_dir, is_training=True):
    subject_path_with_anat = os.path.join(BASE_DIR, subject_id, 'anat')
    
    if not os.path.exists(subject_path_with_anat):
        return False, f"'anat' folder missing"

    # Look for relevant MRI and label files
    t1w_search_pattern = os.path.join(subject_path_with_anat, f'{subject_id}*_T1w.nii')
    flair_search_pattern = os.path.join(subject_path_with_anat, f'{subject_id}*_FLAIR.nii')
    label_search_pattern = os.path.join(subject_path_with_anat, f'{subject_id}*_FLAIR_roi.nii')
    
    t1w_files = glob.glob(t1w_search_pattern)
    flair_files = glob.glob(flair_search_pattern)
    label_files = glob.glob(label_search_pattern)
    
    # Validation checks
    if len(t1w_files) != 1 or len(flair_files) != 1:
        return False, f"Image ambiguity: Found {len(t1w_files)} T1w, {len(flair_files)} FLAIR."

    if is_training and len(label_files) != 1:
        return False, f"Training subject missing label file (found {len(label_files)})."

    # Prepare target filenames
    t1w_target_name = f'{subject_id}_0000.nii'
    flair_target_name = f'{subject_id}_0001.nii'
    label_target_name = f'{subject_id}.nii'
    
    try:
        # Copy MRI modalities
        shutil.copy(t1w_files[0], os.path.join(target_images_dir, t1w_target_name))
        shutil.copy(flair_files[0], os.path.join(target_images_dir, flair_target_name))
        
        if is_training:
            shutil.copy(label_files[0], os.path.join(LABELS_TR_DIR, label_target_name))
            return True, {"image": f"./imagesTr/{subject_id}", "label": f"./labelsTr/{subject_id}.nii"}
        else:
            return True, {"image": f"./imagesTs/{subject_id}"}
            
    except Exception as e:
        return False, f"Copy error: {e}"

# --- 2. Process Training and Test Subjects ---
print("\n--- Processing Training Subjects ---")
for subject_id in train_fcd_subjects:
    success, result = process_subject(subject_id, IMAGES_TR_DIR, is_training=True)
    if success:
        training_files.append(result)
    else:
        skipped_subjects.append((subject_id, f"TRAIN - {result}"))

print("\n--- Processing Test Subjects ---")
for subject_id in test_fcd_subjects:
    success, result = process_subject(subject_id, IMAGES_TS_DIR, is_training=False)
    if success:
        test_files.append(result)
    else:
        skipped_subjects.append((subject_id, f"TEST - {result}"))

# --- 3. Generate dataset.json ---
dataset_json = {
    "name": "FCD Lesion Segmentation",
    "description": "Focal Cortical Dysplasia Lesion Segmentation Dataset (Pre-defined Splits)",
    "reference": "your/publication/link/here",
    "licence": "CC-BY-4.0",
    "release": "1.0",
    "channel_names": {
        "0": "T1w",
        "1": "FLAIR"
    },
    "labels": {
        "background": 0,
        "lesion": 1
    },
    "numTraining": len(training_files),
    "file_ending": ".nii",
    "training": training_files,
    "test": test_files
}

# Save JSON
with open(os.path.join(NNUNET_RAW_DATA_DIR, 'dataset.json'), 'w') as f:
    json.dump(dataset_json, f, indent=4)

print(f"\n✅ Conversion complete!")
print(f"  Training subjects: {len(training_files)}")
print(f"  Test subjects: {len(test_files)}")

if skipped_subjects:
    print("\n--- ⚠️ Skipped Subjects Summary ---")
    for subj, reason in skipped_subjects:
        print(f"  {subj}: {reason}")


# Registration

In [None]:
!pip install SimpleITK -q

import SimpleITK as sitk
from tqdm import tqdm

# Paths
DATASET_PATH = '/kaggle/working/nnUNet_raw_data_base/nnUNet_raw/Dataset001_FCDLesions'
IMAGES_TR = os.path.join(DATASET_PATH, 'imagesTr')
LABELS_TR = os.path.join(DATASET_PATH, 'labelsTr')
IMAGES_TS = os.path.join(DATASET_PATH, 'imagesTs')

def register_flair_to_t1(subject_id, folder, is_training=True, transform_type='rigid'):
    """
    Register FLAIR to T1 for a given subject and apply same transform to label.
    """
    t1_path = os.path.join(folder, f"{subject_id}_0000.nii")
    flair_path = os.path.join(folder, f"{subject_id}_0001.nii")

    if not (os.path.exists(t1_path) and os.path.exists(flair_path)):
        print(f"⚠️ Missing T1 or FLAIR for {subject_id}")
        return False

    # Load images
    t1 = sitk.ReadImage(t1_path, sitk.sitkFloat32)
    flair = sitk.ReadImage(flair_path, sitk.sitkFloat32)

    # Initialize registration
    registration_method = sitk.ImageRegistrationMethod()
    registration_method.SetMetricAsMattesMutualInformation(numberOfHistogramBins=50)
    registration_method.SetMetricSamplingStrategy(registration_method.RANDOM)
    registration_method.SetMetricSamplingPercentage(0.2)
    registration_method.SetInterpolator(sitk.sitkLinear)
    registration_method.SetOptimizerAsGradientDescent(learningRate=1.0, numberOfIterations=100)
    registration_method.SetOptimizerScalesFromPhysicalShift()

    # Select transform type
    if transform_type == 'rigid':
        transform = sitk.CenteredTransformInitializer(t1, flair, sitk.Euler3DTransform())
    else:
        transform = sitk.CenteredTransformInitializer(t1, flair, sitk.AffineTransform(3))

    registration_method.SetInitialTransform(transform, inPlace=False)
    registration_method.SetShrinkFactorsPerLevel(shrinkFactors=[4,2,1])
    registration_method.SetSmoothingSigmasPerLevel(smoothingSigmas=[2,1,0])
    registration_method.SmoothingSigmasAreSpecifiedInPhysicalUnitsOn()

    # Perform registration
    final_transform = registration_method.Execute(t1, flair)

    # Resample FLAIR to T1 space
    resampled_flair = sitk.Resample(flair, t1, final_transform,
                                    sitk.sitkLinear, 0.0, flair.GetPixelID())
    sitk.WriteImage(resampled_flair, flair_path)

    # Resample label if training
    if is_training:
        label_path = os.path.join(LABELS_TR, f"{subject_id}.nii")
        if os.path.exists(label_path):
            label_img = sitk.ReadImage(label_path, sitk.sitkUInt8)
            resampled_label = sitk.Resample(label_img, t1, final_transform,
                                            sitk.sitkNearestNeighbor, 0.0, label_img.GetPixelID())
            sitk.WriteImage(resampled_label, label_path)
        else:
            print(f"⚠️ Label missing for {subject_id}")

    return True


# --- Apply to all subjects ---
print(" Starting registration for training set...")
train_subjects = sorted(set([f.split('_')[0] for f in os.listdir(IMAGES_TR) if f.endswith('_0000.nii')]))

for subj in tqdm(train_subjects):
    register_flair_to_t1(subj, IMAGES_TR, is_training=True, transform_type='rigid')

print(" Starting registration for test set...")
test_subjects = sorted(set([f.split('_')[0] for f in os.listdir(IMAGES_TS) if f.endswith('_0000.nii')]))

for subj in tqdm(test_subjects):
    register_flair_to_t1(subj, IMAGES_TS, is_training=False, transform_type='rigid')

print(" Registration complete for all subjects!")


# Planning and Preprocessing

In [None]:
import os

# Fix environment variables for nnUNetv2
os.environ['nnUNet_raw'] = '/kaggle/working/nnUNet_raw_data_base/nnUNet_raw'
os.environ['nnUNet_preprocessed'] = '/kaggle/working/nnUNet_preprocessed'
os.environ['nnUNet_results'] = '/kaggle/working/nnUNet_results'

# Confirm they’re visible
for k in ['nnUNet_raw', 'nnUNet_preprocessed', 'nnUNet_results']:
    print(f"{k} -> {os.environ[k]}")


In [None]:
!nnUNetv2_plan_and_preprocess -d 1 --verify_dataset_integrity