In [1]:
# !pip install pyradiomics SimpleITK tqdm pypickle Path flaml

In [2]:
import SimpleITK as sitk
from radiomics import featureextractor
import json
import glob
import pandas as pd
import re
import os
import logging
from tqdm.notebook import tqdm
from pathlib import Path
import pypickle

logging.getLogger("radiomics").setLevel(logging.ERROR)

USERNAME = "daniil.tikhonov"
MNI_TEMPLATE_PATH = Path("/home") / USERNAME / "fsl" / "data" / "standard" / "MNI152_T1_1mm_brain.nii.gz"

if not os.path.exists(MNI_TEMPLATE_PATH):
    raise FileNotFoundError(f"MNI Template not found at: {MNI_TEMPLATE_PATH}. Please update the path.")

# --- Directory and File Paths ---
data_dir = Path("/home") / USERNAME / "mri" / "data" / "Lumiere"
patients_file = os.path.join(data_dir, "patients.json")

# New directory for atlas-registered files
atlas_dir = Path(os.getcwd()) / "atlas_mapping"
atlas_scans_dir = atlas_dir / "scans"
atlas_segs_dir = atlas_dir / "segmentations"
os.makedirs(atlas_scans_dir, exist_ok=True)
os.makedirs(atlas_segs_dir, exist_ok=True)

In [3]:
def load_patients(patients_file):
    with open(patients_file) as f: return json.load(f)

In [4]:
def extract_week_number(week_str):
    match = re.match(r"week-(\d+)", week_str)
    if match: return int(match.group(1))
    else: raise ValueError(f"Invalid week format: {week_str}")

def get_week_from_path(path):
    match = re.search(r'week-(\d+)', path)
    if match: return int(match.group(1))
    else: raise ValueError(f"No valid week found in path: {path}")
 
def get_patient_id_from_path(path):
    match = re.search(r'Patient-(\d+)', path)
    if match: return f'patient_{match.group(1).zfill(3)}'
    else: raise ValueError(f"No valid patient ID found in path: {path}")

def load_json(path):
    with open(path, 'r') as f: return json.load(f)

def load_patients(patients_file):
    with open(patients_file) as f: return json.load(f)

def find_all_volume_paths(patient_dir):
    pattern = os.path.join(patient_dir, 'week-*', volume_rel_path)
    files = glob.glob(pattern)
    week_map = {}
    for fp in files:
        parts = fp.split('/')
        week = next((part for part in parts if part.startswith('week-')), None)
        if week == "week-000-1": continue
        week = extract_week_number(week)
        week_map[week] = fp
    return week_map

In [5]:
def register_to_atlas(fixed_image, moving_t1_image, moving_other_image, is_segmentation=False):
    """
    Registers an image to the atlas space using a transform derived from its corresponding T1 scan.
    
    Args:
        fixed_image (sitk.Image): The MNI atlas template image.
        moving_t1_image (sitk.Image): The patient's T1 scan (used to calculate the transform).
        moving_other_image (sitk.Image): The image to be transformed (can be T1c, T2, FLAIR, or seg).
        is_segmentation (bool): If True, use Nearest Neighbor interpolation.
        
    Returns:
        sitk.Image: The resampled/registered image.
    """
    # Use a robust registration method
    registration_method = sitk.ImageRegistrationMethod()
    
    # Similarity metric
    registration_method.SetMetricAsMattesMutualInformation(numberOfHistogramBins=50)
    registration_method.SetMetricSamplingStrategy(registration_method.RANDOM)
    registration_method.SetMetricSamplingPercentage(0.01)
    
    # Interpolator
    registration_method.SetInterpolator(sitk.sitkLinear)
    
    # Optimizer
    registration_method.SetOptimizerAsGradientDescent(learningRate=0.33, numberOfIterations=300, convergenceMinimumValue=1e-6, convergenceWindowSize=10)
    registration_method.SetOptimizerScalesFromPhysicalShift()
    
    # Setup for the transform
    initial_transform = sitk.CenteredTransformInitializer(fixed_image, 
                                                          moving_t1_image, 
                                                          sitk.Euler3DTransform(), 
                                                          sitk.CenteredTransformInitializerFilter.GEOMETRY)
    
    registration_method.SetInitialTransform(initial_transform, inPlace=False)
    
    # Execute the registration to find the transform
    final_transform = registration_method.Execute(sitk.Cast(fixed_image, sitk.sitkFloat32), 
                                                  sitk.Cast(moving_t1_image, sitk.sitkFloat32))

    # --- Apply the found transform to the 'moving_other_image' ---
    resampler = sitk.ResampleImageFilter()
    resampler.SetReferenceImage(fixed_image) # Resample to the same grid as the atlas
    resampler.SetTransform(final_transform)
    
    # Use appropriate interpolation
    if is_segmentation:
        resampler.SetInterpolator(sitk.sitkNearestNeighbor)
    else:
        resampler.SetInterpolator(sitk.sitkLinear)
        
    resampler.SetDefaultPixelValue(0) # Pad with black
    
    return resampler.Execute(moving_other_image)


# === Preprocessing Step to Create Atlas-Registered Files ===

def preprocess_and_register_all_scans(patients, data_dir):
    """
    Iterates through all scans, registers them to the MNI atlas, and saves them.
    This version correctly handles inconsistencies in patients.json.
    """
    print("--- Starting Preprocessing: Registering all scans to MNI atlas ---")
    fixed_image = sitk.ReadImage(MNI_TEMPLATE_PATH, sitk.sitkFloat32)
    total_cases = sum(len(cases) for cases in patients.values())
    pbar = tqdm(total=total_cases, desc="Registering cases to atlas", unit="case")

    # The outer `pid` from `patients.items()` is unreliable.
    for unreliable_pid, cases in patients.items():
        for cid, meta in cases.items():
            try:
                true_pid = get_patient_id_from_path(meta['baseline_registered'])

                # --- Process BASELINE scans ---
                baseline_t1_atlas_path = os.path.join(atlas_scans_dir, f"{true_pid}_{cid}_baseline_T1.nii.gz")
                if not os.path.exists(baseline_t1_atlas_path):
                    baseline_t1_path = f"{data_dir}/{meta['baseline_registered'].replace('./', '')}/{meta['baseline_registered'].replace('./images_registered/', '')}_0000.nii.gz"
                    moving_t1_baseline = sitk.ReadImage(baseline_t1_path, sitk.sitkFloat32)
                    
                    for mri_type_idx, mri_type_name in enumerate(['T1', 'T1CE', 'T2', 'FLAIR']):
                        image_path = f"{data_dir}/{meta['baseline_registered'].replace('./', '')}/{meta['baseline_registered'].replace('./images_registered/', '')}_{mri_type_idx:04d}.nii.gz"
                        moving_image = sitk.ReadImage(image_path, sitk.sitkFloat32)
                        registered_image = register_to_atlas(fixed_image, moving_t1_baseline, moving_image)
                        # Use 'true_pid' for saving the file.
                        output_path = os.path.join(atlas_scans_dir, f"{true_pid}_{cid}_baseline_{mri_type_name}.nii.gz")
                        sitk.WriteImage(registered_image, output_path)

                    seg_path = f"{data_dir}/{meta['baseline_seg_registered'].replace('./', '')}"
                    moving_seg = sitk.ReadImage(seg_path)
                    registered_seg = register_to_atlas(fixed_image, moving_t1_baseline, moving_seg, is_segmentation=True)
                    # Use 'true_pid' for saving the file.
                    output_seg_path = os.path.join(atlas_segs_dir, f"{true_pid}_{cid}_baseline_seg.nii.gz")
                    sitk.WriteImage(registered_seg, output_seg_path)

                # --- Process FOLLOWUP scans ---
                followup_t1_atlas_path = os.path.join(atlas_scans_dir, f"{true_pid}_{cid}_followup_T1.nii.gz")
                if not os.path.exists(followup_t1_atlas_path):
                    followup_t1_path = f"{data_dir}/{meta['followup_registered'].replace('./', '')}/{meta['followup_registered'].replace('./images_registered/', '')}_0000.nii.gz"
                    moving_t1_followup = sitk.ReadImage(followup_t1_path, sitk.sitkFloat32)

                    for mri_type_idx, mri_type_name in enumerate(['T1', 'T1CE', 'T2', 'FLAIR']):
                        image_path = f"{data_dir}/{meta['followup_registered'].replace('./', '')}/{meta['followup_registered'].replace('./images_registered/', '')}_{mri_type_idx:04d}.nii.gz"
                        moving_image = sitk.ReadImage(image_path, sitk.sitkFloat32)
                        registered_image = register_to_atlas(fixed_image, moving_t1_followup, moving_image)
                        output_path = os.path.join(atlas_scans_dir, f"{true_pid}_{cid}_followup_{mri_type_name}.nii.gz")
                        sitk.WriteImage(registered_image, output_path)

                    seg_path = f"{data_dir}/{meta['followup_seg_registered'].replace('./', '')}"
                    moving_seg = sitk.ReadImage(seg_path)
                    registered_seg = register_to_atlas(fixed_image, moving_t1_followup, moving_seg, is_segmentation=True)
                    output_seg_path = os.path.join(atlas_segs_dir, f"{true_pid}_{cid}_followup_seg.nii.gz")
                    sitk.WriteImage(registered_seg, output_seg_path)

            except Exception as e:
                print(f"ERROR processing case {unreliable_pid}/{cid}. True Patient: {get_patient_id_from_path(meta.get('baseline_registered', '')) if meta.get('baseline_registered') else 'Unknown'}. Error: {e}")
            finally:
                pbar.update(1)

    pbar.close()
    print("--- Preprocessing complete. All files saved to atlas_mapping directory. ---")

In [None]:
# Load patient data
patients = load_patients(patients_file)

# STEP 1: Run the one-time preprocessing to register all images to the atlddas
# You can comment this out after it has been run once.
preprocess_and_register_all_scans(patients, data_dir)

--- Starting Preprocessing: Registering all scans to MNI atlas ---


Registering cases to atlas:   0%|          | 0/361 [00:00<?, ?case/s]

In [None]:
import SimpleITK as sitk
from radiomics import featureextractor
import json
import glob
import pandas as pd
import re
import os
import logging
from tqdm.notebook import tqdm
from pathlib import Path
import pypickle

# --- Configuration & Setup ---

# Configure logging to be less verbose for radiomics
logging.getLogger("radiomics").setLevel(logging.ERROR)

MNI_TEMPLATE_PATH = "/home/daniil.tikhonov/fsl/data/standard/MNI152_T1_1mm_brain.nii.gz"

if not os.path.exists(MNI_TEMPLATE_PATH):
    raise FileNotFoundError(f"MNI Template not found at: {MNI_TEMPLATE_PATH}. Please update the path.")

# Load selected features for radiomics
with open('selected_features.json', 'r') as f:
    SELECTED_FEATS = json.load(f)

# --- Directory and File Paths ---
data_dir = "/home/matheus.scatolin/Documents/Project11/dataset/Lumiere"
patients_file = os.path.join(data_dir, "patients.json")
volume_rel_path = "DeepBraTumIA-segmentation/atlas/segmentation/measured_volumes_in_mm3.json"

# New directory for atlas-registered files
atlas_dir = os.path.join(data_dir, "atlas_mapping")
atlas_scans_dir = os.path.join(atlas_dir, "scans")
atlas_segs_dir = os.path.join(atlas_dir, "segmentations")
os.makedirs(atlas_scans_dir, exist_ok=True)
os.makedirs(atlas_segs_dir, exist_ok=True)


# === Helper Functions (Originals, mostly unchanged) ===
# def extract_week_number(week_str):
#     match = re.match(r"week-(\d+)", week_str)
#     if match: return int(match.group(1))
#     else: raise ValueError(f"Invalid week format: {week_str}")

# def get_week_from_path(path):
#     match = re.search(r'week-(\d+)', path)
#     if match: return int(match.group(1))
#     else: raise ValueError(f"No valid week found in path: {path}")

# def get_patient_id_from_path(path):
#     match = re.search(r'Patient-(\d+)', path)
#     if match: return f'patient_{match.group(1).zfill(3)}'
#     else: raise ValueError(f"No valid patient ID found in path: {path}")

# def load_json(path):
#     with open(path, 'r') as f: return json.load(f)

# def load_patients(patients_file):
#     with open(patients_file) as f: return json.load(f)

# def find_all_volume_paths(patient_dir):
#     pattern = os.path.join(patient_dir, 'week-*', volume_rel_path)
#     files = glob.glob(pattern)
#     week_map = {}
#     for fp in files:
#         parts = fp.split('/')
#         week = next((part for part in parts if part.startswith('week-')), None)
#         if week == "week-000-1": continue
#         week = extract_week_number(week)
#         week_map[week] = fp
#     return week_map

# === NEW: Registration Functions ===

def register_to_atlas(fixed_image, moving_t1_image, moving_other_image, is_segmentation=False):
    """
    Registers an image to the atlas space using a transform derived from its corresponding T1 scan.
    
    Args:
        fixed_image (sitk.Image): The MNI atlas template image.
        moving_t1_image (sitk.Image): The patient's T1 scan (used to calculate the transform).
        moving_other_image (sitk.Image): The image to be transformed (can be T1c, T2, FLAIR, or seg).
        is_segmentation (bool): If True, use Nearest Neighbor interpolation.
        
    Returns:
        sitk.Image: The resampled/registered image.
    """
    # Use a robust registration method
    registration_method = sitk.ImageRegistrationMethod()
    
    # Similarity metric
    registration_method.SetMetricAsMattesMutualInformation(numberOfHistogramBins=50)
    registration_method.SetMetricSamplingStrategy(registration_method.RANDOM)
    registration_method.SetMetricSamplingPercentage(0.01)
    
    # Interpolator
    registration_method.SetInterpolator(sitk.sitkLinear)
    
    # Optimizer
    registration_method.SetOptimizerAsGradientDescent(learningRate=1.0, numberOfIterations=100, convergenceMinimumValue=1e-6, convergenceWindowSize=10)
    registration_method.SetOptimizerScalesFromPhysicalShift()
    
    # Setup for the transform
    initial_transform = sitk.CenteredTransformInitializer(fixed_image, 
                                                          moving_t1_image, 
                                                          sitk.Euler3DTransform(), 
                                                          sitk.CenteredTransformInitializerFilter.GEOMETRY)
    
    registration_method.SetInitialTransform(initial_transform, inPlace=False)
    
    # Execute the registration to find the transform
    final_transform = registration_method.Execute(sitk.Cast(fixed_image, sitk.sitkFloat32), 
                                                  sitk.Cast(moving_t1_image, sitk.sitkFloat32))

    # --- Apply the found transform to the 'moving_other_image' ---
    resampler = sitk.ResampleImageFilter()
    resampler.SetReferenceImage(fixed_image) # Resample to the same grid as the atlas
    resampler.SetTransform(final_transform)
    
    # Use appropriate interpolation
    if is_segmentation:
        resampler.SetInterpolator(sitk.sitkNearestNeighbor)
    else:
        resampler.SetInterpolator(sitk.sitkLinear)
        
    resampler.SetDefaultPixelValue(0) # Pad with black
    
    return resampler.Execute(moving_other_image)


# === Preprocessing Step to Create Atlas-Registered Files ===

def preprocess_and_register_all_scans(patients, data_dir):
    """
    Iterates through all scans, registers them to the MNI atlas, and saves them.
    This version correctly handles inconsistencies in patients.json.
    """
    print("--- Starting Preprocessing: Registering all scans to MNI atlas ---")
    fixed_image = sitk.ReadImage(MNI_TEMPLATE_PATH, sitk.sitkFloat32)
    total_cases = sum(len(cases) for cases in patients.values())
    pbar = tqdm(total=total_cases, desc="Registering cases to atlas", unit="case")

    # The outer `pid` from `patients.items()` is unreliable.
    for unreliable_pid, cases in patients.items():
        for cid, meta in cases.items():
            try:
                true_pid = get_patient_id_from_path(meta['baseline_registered'])

                # --- Process BASELINE scans ---
                baseline_t1_atlas_path = os.path.join(atlas_scans_dir, f"{true_pid}_{cid}_baseline_T1.nii.gz")
                if not os.path.exists(baseline_t1_atlas_path):
                    baseline_t1_path = f"{data_dir}/{meta['baseline_registered'].replace('./', '')}/{meta['baseline_registered'].replace('./images_registered/', '')}_0000.nii.gz"
                    moving_t1_baseline = sitk.ReadImage(baseline_t1_path, sitk.sitkFloat32)
                    
                    for mri_type_idx, mri_type_name in enumerate(['T1', 'T1CE', 'T2', 'FLAIR']):
                        image_path = f"{data_dir}/{meta['baseline_registered'].replace('./', '')}/{meta['baseline_registered'].replace('./images_registered/', '')}_{mri_type_idx:04d}.nii.gz"
                        moving_image = sitk.ReadImage(image_path, sitk.sitkFloat32)
                        registered_image = register_to_atlas(fixed_image, moving_t1_baseline, moving_image)
                        # Use 'true_pid' for saving the file.
                        output_path = os.path.join(atlas_scans_dir, f"{true_pid}_{cid}_baseline_{mri_type_name}.nii.gz")
                        sitk.WriteImage(registered_image, output_path)

                    seg_path = f"{data_dir}/{meta['baseline_seg_registered'].replace('./', '')}"
                    moving_seg = sitk.ReadImage(seg_path)
                    registered_seg = register_to_atlas(fixed_image, moving_t1_baseline, moving_seg, is_segmentation=True)
                    # Use 'true_pid' for saving the file.
                    output_seg_path = os.path.join(atlas_segs_dir, f"{true_pid}_{cid}_baseline_seg.nii.gz")
                    sitk.WriteImage(registered_seg, output_seg_path)

                # --- Process FOLLOWUP scans ---
                followup_t1_atlas_path = os.path.join(atlas_scans_dir, f"{true_pid}_{cid}_followup_T1.nii.gz")
                if not os.path.exists(followup_t1_atlas_path):
                    followup_t1_path = f"{data_dir}/{meta['followup_registered'].replace('./', '')}/{meta['followup_registered'].replace('./images_registered/', '')}_0000.nii.gz"
                    moving_t1_followup = sitk.ReadImage(followup_t1_path, sitk.sitkFloat32)

                    for mri_type_idx, mri_type_name in enumerate(['T1', 'T1CE', 'T2', 'FLAIR']):
                        image_path = f"{data_dir}/{meta['followup_registered'].replace('./', '')}/{meta['followup_registered'].replace('./images_registered/', '')}_{mri_type_idx:04d}.nii.gz"
                        moving_image = sitk.ReadImage(image_path, sitk.sitkFloat32)
                        registered_image = register_to_atlas(fixed_image, moving_t1_followup, moving_image)
                        output_path = os.path.join(atlas_scans_dir, f"{true_pid}_{cid}_followup_{mri_type_name}.nii.gz")
                        sitk.WriteImage(registered_image, output_path)

                    seg_path = f"{data_dir}/{meta['followup_seg_registered'].replace('./', '')}"
                    moving_seg = sitk.ReadImage(seg_path)
                    registered_seg = register_to_atlas(fixed_image, moving_t1_followup, moving_seg, is_segmentation=True)
                    output_seg_path = os.path.join(atlas_segs_dir, f"{true_pid}_{cid}_followup_seg.nii.gz")
                    sitk.WriteImage(registered_seg, output_seg_path)

            except Exception as e:
                print(f"ERROR processing case {unreliable_pid}/{cid}. True Patient: {get_patient_id_from_path(meta.get('baseline_registered', '')) if meta.get('baseline_registered') else 'Unknown'}. Error: {e}")
            finally:
                pbar.update(1)

    pbar.close()
    print("--- Preprocessing complete. All files saved to atlas_mapping directory. ---")


# === UPDATED: Feature Extraction from Atlas-Registered Files ===

def add_radiomics_features_from_atlas(feats, pid, cid):
    """
    Extracts radiomics features from the pre-registered ATLAS files.
    """
    extractor = featureextractor.RadiomicsFeatureExtractor()
    extractor.enableAllFeatures()
    extractor.settings['force2D'] = False

    for mri_type_name in ['T1', 'T1CE', 'T2', 'FLAIR']:
        try:
            # Construct paths to the NEW atlas-registered files
            image_path_b = os.path.join(atlas_scans_dir, f"{pid}_{cid}_baseline_{mri_type_name}.nii.gz")
            mask_path_b = os.path.join(atlas_segs_dir, f"{pid}_{cid}_baseline_seg.nii.gz")
            
            image_path_f = os.path.join(atlas_scans_dir, f"{pid}_{cid}_followup_{mri_type_name}.nii.gz")
            mask_path_f = os.path.join(atlas_segs_dir, f"{pid}_{cid}_followup_seg.nii.gz")

            # Load images and masks
            image_b = sitk.ReadImage(image_path_b)
            mask_b = sitk.ReadImage(mask_path_b)
            image_f = sitk.ReadImage(image_path_f)
            mask_f = sitk.ReadImage(mask_path_f)

            # Extract features
            features_b = extractor.execute(image_b, mask_b)
            features_f = extractor.execute(image_f, mask_f)

        except RuntimeError as e:
            if "No labels found" in str(e):
                print(f"INFO: No labels found for patient {pid}, case {cid}, MRI type {mri_type_name}. Setting radiomics to 0.")
                features_b = {feature: 0 for feature in SELECTED_FEATS}
                features_f = {feature: 0 for feature in SELECTED_FEATS}
            else:
                logging.error(f"Runtime error for patient {pid}, case {cid}, MRI type {mri_type_name}: {e}")
                features_b = {feature: 0 for feature in SELECTED_FEATS}
                features_f = {feature: 0 for feature in SELECTED_FEATS}
        except Exception as e:
            logging.error(f"Unexpected error for patient {pid}, case {cid}, MRI type {mri_type_name}: {e}")
            features_b = {feature: 0 for feature in SELECTED_FEATS}
            features_f = {feature: 0 for feature in SELECTED_FEATS}
        
        # Add features to the feats dictionary
        for feature in SELECTED_FEATS:
            val_b = float(features_b.get(feature, 0))
            val_f = float(features_f.get(feature, 0))
            feats[f'{mri_type_name}_{feature}_b'] = val_b
            feats[f'{mri_type_name}_{feature}_f'] = val_f
            feats[f'{mri_type_name}_{feature}_rel'] = (val_f - val_b) / (val_b + 1e-6)

# === MAIN FEATURE BUILDING (using the new radiomics function) ===

def build_feature_table(patients, data_dir):
    records = []
    total_cases = sum(len(cases) for cases in patients.values())
    pbar = tqdm(total=total_cases, desc="Extracting features for each case", unit="case")

    # The outer `pid` is unreliable.
    for unreliable_pid, cases in patients.items():
        for cid, meta in cases.items():
            try:
                # Get the TRUE patient ID from the file path for this specific case.
                true_pid = get_patient_id_from_path(meta['baseline'])
                patient_id_num_str = true_pid.replace('patient_', '')
                patient_id_num = int(patient_id_num_str)
                
                patient_folder_name = f"Patient-{patient_id_num_str}"
                patient_folder = os.path.join(data_dir, 'Imaging', patient_folder_name)
                week_paths = find_all_volume_paths(patient_folder)
                all_vols = {wk: load_json(p) for wk, p in week_paths.items()}

                w0, wf = get_week_from_path(meta['baseline']), get_week_from_path(meta['followup'])
                vb, vf = all_vols.get(w0, {}), all_vols.get(wf, {})

                nadir = {}
                for comp in ['Necrotic_NonEnhancing', 'Enhancing_Core', 'Edema_Compartment']:
                    vals = [vol.get(comp, 0) for wk, vol in all_vols.items() if wk <= wf]
                    nadir[comp] = min(vals) if vals else 0

                total_vals = [sum(vol.values()) for wk, vol in all_vols.items() if wk <= wf]
                total_nadir = min(total_vals) if total_vals else 0

                feats = {
                    'patient_json_file': unreliable_pid,
                    'patient_image_path': true_pid,
                    'case': cid,
                    'response': meta['response'],
                    'time_gap_weeks': wf - w0,
                }

                for comp in nadir:
                    b, f, n = vb.get(comp, 0), vf.get(comp, 0), nadir[comp]
                    total_b, total_f = sum(vb.values()) + 1e-6, sum(vf.values()) + 1e-6
                    feats.update({
                        f'{comp}_b': b, f'{comp}_f': f, f'{comp}_rel': (f - b) / (b + 1e-6),
                        f'{comp}_pct_b': b / total_b, f'{comp}_pct_f': f / total_f,
                        f'{comp}_nadir': n, f'{comp}_chg_from_nadir': (f - n) / (n + 1e-6)
                    })
                vb_tot, vf_tot = sum(vb.values()), sum(vf.values())
                feats.update({
                    'total_b': vb_tot, 'total_f': vf_tot,
                    'total_rel': (vf_tot - vb_tot) / (vb_tot + 1e-6),
                    'total_nadir': total_nadir,
                    'total_chg_from_nadir': (vf_tot - total_nadir) / (total_nadir + 1e-6)
                })
                
                add_radiomics_features_from_atlas(feats, true_pid, cid)
                records.append(feats)
            except Exception as e:
                print(f"ERROR building features for case {unreliable_pid}/{cid}. Error: {e}")
            finally:
                pbar.update(1)

    pbar.close()
    df = pd.DataFrame(records)
    df.fillna(0, inplace=True)
    return df

In [4]:
# Load patient data
patients = load_patients(patients_file)

# STEP 1: Run the one-time preprocessing to register all images to the atlas
# You can comment this out after it has been run once.
preprocess_and_register_all_scans(patients, data_dir)

--- Starting Preprocessing: Registering all scans to MNI atlas ---




In [None]:
# STEP 2: Build the feature table using the newly created atlas-registered files
feature_df = build_feature_table(patients, data_dir)

# Display the first few rows of the final dataframe
print("\nFeature DataFrame created successfully!")
feature_df.head()

In [None]:
# Save the dataframe
feature_df.to_csv("features_with_atlas_radiomics.csv", index=False)
print("\nDataFrame saved to features_with_atlas_radiomics.csv")

In [None]:
split = pypickle.load(Path(os.getcwd()) / "5_fold_split.pkl")
folds = {k: v for k, v in split.items() if 'patient_ids_json_file' in k}

# calculate the fold column
def calculate_fold_column(patient_id_json):
    id = int(patient_id_json.replace("patient_", ""))
    for fold, patient_ids in folds.items():
        if id in patient_ids:
            return int(fold.split('_')[1])  # Extract the fold number from the key

# delete the train column
if 'train' in df.columns:
    df = df.drop(columns=['train'])

# put this column as the 5th column
df.insert(4, 'fold', None)

df['fold'] = df['patient_json_file'].apply(calculate_fold_column)

print("Data shape after loading and fold assignment:", df.shape)
df.head()

In [None]:
# SAVE TO CSV AGAIN AFTER ADDING FOLD COLUMN

Extracting features:  22%|██▏       | 78/361 [42:48<8:05:37, 102.96s/case] ERROR:root:Unexpected error for patient patient_023, case case_01, MRI type T1: No labels found in this mask (i.e. nothing is segmented)!
ERROR:root:Unexpected error for patient patient_023, case case_01, MRI type T1CE: No labels found in this mask (i.e. nothing is segmented)!
ERROR:root:Unexpected error for patient patient_023, case case_01, MRI type T2: No labels found in this mask (i.e. nothing is segmented)!
ERROR:root:Unexpected error for patient patient_023, case case_01, MRI type FLAIR: No labels found in this mask (i.e. nothing is segmented)!
Extracting features: 100%|██████████| 361/361 [3:08:10<00:00, 31.28s/case]   

Index(['patient', 'case', 'response', 'time_gap_weeks', 'train',
       'Necrotic_NonEnhancing_b', 'Necrotic_NonEnhancing_f',
       'Necrotic_NonEnhancing_rel', 'Necrotic_NonEnhancing_pct_b',
       'Necrotic_NonEnhancing_pct_f',
       ...
       'FLAIR_original_ngtdm_Coarseness_rel',
       'FLAIR_original_ngtdm_Complexity_b',
       'FLAIR_original_ngtdm_Complexity_f',
       'FLAIR_original_ngtdm_Complexity_rel',
       'FLAIR_original_ngtdm_Contrast_b', 'FLAIR_original_ngtdm_Contrast_f',
       'FLAIR_original_ngtdm_Contrast_rel', 'FLAIR_original_ngtdm_Strength_b',
       'FLAIR_original_ngtdm_Strength_f', 'FLAIR_original_ngtdm_Strength_rel'],
      dtype='object', length=1339)





Unnamed: 0,patient,case,response,time_gap_weeks,train,Necrotic_NonEnhancing_b,Necrotic_NonEnhancing_f,Necrotic_NonEnhancing_rel,Necrotic_NonEnhancing_pct_b,Necrotic_NonEnhancing_pct_f,...,FLAIR_original_ngtdm_Coarseness_rel,FLAIR_original_ngtdm_Complexity_b,FLAIR_original_ngtdm_Complexity_f,FLAIR_original_ngtdm_Complexity_rel,FLAIR_original_ngtdm_Contrast_b,FLAIR_original_ngtdm_Contrast_f,FLAIR_original_ngtdm_Contrast_rel,FLAIR_original_ngtdm_Strength_b,FLAIR_original_ngtdm_Strength_f,FLAIR_original_ngtdm_Strength_rel
0,patient_001,case_01,2,44,1,4557.0,18615.0,3.084924,0.103894,0.497727,...,0.204067,328.63498435744964,7.377117388278558,-0.977552,0.039901817979397125,0.03126270291075473,-0.216504,0.08868813035420528,0.004872398907481225,-0.945051
1,patient_001,case_02,3,56,1,4557.0,0.0,-1.000000,0.103894,0.000000,...,53.909944,328.63498435744964,27.148232533834317,-0.917391,0.039901817979397125,0.013890268362665133,-0.651872,0.08868813035420528,1.6750438774954945,17.886699
2,patient_002,case_01,2,18,0,24931.0,982.0,-0.960611,0.339877,0.080209,...,0.530624,122.08733989264294,103.38878610967622,-0.153157,0.018866205324224047,0.016224767792842622,-0.140002,0.13569818605779374,0.17155102186875318,0.264208
3,patient_002,case_02,3,34,0,24931.0,70.0,-0.997192,0.339877,0.001394,...,-0.237836,122.08733989264294,84.26896983389348,-0.309765,0.018866205324224047,0.01814366351411647,-0.038296,0.13569818605779374,0.08163552462562457,-0.398401
4,patient_002,case_03,3,7,0,3177.0,9135.0,1.875354,0.113720,0.158676,...,-0.401922,78.42975298048647,100.22153638964184,0.277851,0.01280380563984138,0.011459915211096383,-0.104952,0.1527105523670362,0.11458967349431017,-0.249627
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
356,patient_090,case_01,3,29,1,278.0,6113.0,20.989209,0.004857,0.061901,...,-0.215032,301.57176872835845,96.93571583349284,-0.678565,0.008198533474177538,0.01792649436444592,1.186404,0.772652828509451,0.1181473954916159,-0.847088
357,patient_091,case_01,3,13,1,8494.0,1009.0,-0.881210,0.129751,0.047671,...,1.134864,1095.3465781969296,62.20718258671151,-0.943208,0.005627504003435554,0.003652710898959829,-0.350856,0.1815749632097702,0.03850510190178739,-0.787934
358,patient_091,case_02,3,25,1,8494.0,411.0,-0.951613,0.129751,0.014033,...,0.994825,1095.3465781969296,315.3835495587823,-0.712070,0.005627504003435554,0.006031538937842868,0.071784,0.1815749632097702,0.10381760127820133,-0.428236
359,patient_091,case_03,3,35,1,8494.0,663.0,-0.921945,0.129751,0.028721,...,0.914529,1095.3465781969296,39.549209612066164,-0.963893,0.005627504003435554,0.01144570925124568,1.033704,0.1815749632097702,0.013837128126819165,-0.923789
