In [1]:
import os
import glob
import numpy as np
import nibabel as nib
import json
from scipy.ndimage import binary_erosion, generate_binary_structure, rotate
from skimage.transform import downscale_local_mean
import time

# ==========================================
# 1. CONFIGURACIÓN MASIVA (nnU-Net v2)
# ==========================================
# Ruta origen (AIBL limpio)
INPUT_ROOT_DIR = "/media/PORT-DISK/Practicas/MicroBleeds_Generation/CSIRO_dataset/NoCMBSubject/data"

# Ruta destino: IMPORTANTE usar formato DatasetXXX_Nombre
# Cambia 'Dataset501_SyntheticCMB' por el ID que quieras (ej: 001, 500, etc.)
DATASET_NAME = "Dataset501_SyntheticCMB"
NNUNET_RAW_DIR = "/media/PORT-DISK/Practicas/MicroBleeds_Generation/nnUNet_raw" 
OUTPUT_DIR = os.path.join(NNUNET_RAW_DIR, DATASET_NAME)

# Parámetros
NUM_LESIONS_PER_SCAN = 20
RANDOM_SEED = 42

# Crear estructura de carpetas nnU-Net v2
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(os.path.join(OUTPUT_DIR, "imagesTr"), exist_ok=True)
    os.makedirs(os.path.join(OUTPUT_DIR, "labelsTr"), exist_ok=True)

# ==========================================
# 2. MOTOR MATEMÁTICO (Momeni Validado)
# ==========================================
def generate_momeni_gaussian(target_volume_mm3, voxel_size_mm, oversample=10):
    K = 1.175 
    term = (3 * target_volume_mm3) / (4 * np.pi * (K**3))
    sigma_t_mm = np.cbrt(term)
    
    rmin, rmax = 0.5, 0.9
    sigma_x_mm = sigma_t_mm * ((rmax - rmin) * np.random.uniform(0, 1) + rmin)
    sigma_y_mm = sigma_t_mm * ((rmax - rmin) * np.random.uniform(0, 1) + rmin)
    sigma_z_mm = (sigma_t_mm**3) / (sigma_x_mm * sigma_y_mm)
    
    hr_vx_size = np.array(voxel_size_mm) / oversample
    sx_px = sigma_x_mm / hr_vx_size[0]
    sy_px = sigma_y_mm / hr_vx_size[1]
    sz_px = sigma_z_mm / hr_vx_size[2]
    
    max_sigma = max(sx_px, sy_px, sz_px)
    grid_size = int(max_sigma * 6) + 1
    if grid_size % 2 == 0: grid_size += 1
    
    cx, cy, cz = grid_size // 2, grid_size // 2, grid_size // 2
    
    x = np.arange(grid_size) - cx
    y = np.arange(grid_size) - cy
    z = np.arange(grid_size) - cz
    xx, yy, zz = np.meshgrid(x, y, z, indexing='ij')
    
    exponent = - ( (xx**2)/(2*sx_px**2) + (yy**2)/(2*sy_px**2) + (zz**2)/(2*sz_px**2) )
    gaussian = np.exp(exponent)
    
    angle_x = np.random.uniform(-30, 30)
    angle_y = np.random.uniform(-30, 30)
    angle_z = np.random.uniform(-30, 30)
    
    img_rot = rotate(gaussian, angle_x, axes=(0,1), reshape=False, order=1)
    img_rot = rotate(img_rot, angle_y, axes=(1,2), reshape=False, order=1)
    img_rot = rotate(img_rot, angle_z, axes=(0,2), reshape=False, order=1)
    
    low_res_blob = downscale_local_mean(img_rot, (oversample, oversample, oversample))
    if low_res_blob.max() > 0:
        low_res_blob /= low_res_blob.max()
        
    return low_res_blob

def get_brain_safety_mask(img_data, threshold=20, erosion_iters=6):
    binary = img_data > threshold
    struct = generate_binary_structure(3, 1) 
    eroded = binary_erosion(binary, structure=struct, iterations=erosion_iters)
    return eroded

def implant_and_label(image_data, label_data, center, volume_mm3, voxel_dims, strength):
    x, y, z = center
    lesion_pattern = generate_momeni_gaussian(volume_mm3, voxel_dims, oversample=10)
    
    p_shape = lesion_pattern.shape
    dx, dy, dz = p_shape[0]//2, p_shape[1]//2, p_shape[2]//2
    x_s, x_e = x - dx, x - dx + p_shape[0]
    y_s, y_e = y - dy, y - dy + p_shape[1]
    z_s, z_e = z - dz, z - dz + p_shape[2]
    
    if x_s < 0 or x_e > image_data.shape[0] or \
       y_s < 0 or y_e > image_data.shape[1] or \
       z_s < 0 or z_e > image_data.shape[2]:
        return image_data, label_data
    
    roi_img = image_data[x_s:x_e, y_s:y_e, z_s:z_e]
    if roi_img.shape != lesion_pattern.shape:
        lesion_pattern = lesion_pattern[:roi_img.shape[0], :roi_img.shape[1], :roi_img.shape[2]]
        
    mask_multiplier = 1 - (lesion_pattern * strength)
    image_data[x_s:x_e, y_s:y_e, z_s:z_e] = roi_img * mask_multiplier
    
    roi_label = label_data[x_s:x_e, y_s:y_e, z_s:z_e]
    lesion_mask_binary = (lesion_pattern >= 0.5).astype(int)
    label_data[x_s:x_e, y_s:y_e, z_s:z_e] = np.maximum(roi_label, lesion_mask_binary)
    
    return image_data, label_data

# ==========================================
# 3. EJECUCIÓN DEL PIPELINE MASIVO
# ==========================================
print(f"Iniciando Pipeline Masivo...")
nifti_files = glob.glob(os.path.join(INPUT_ROOT_DIR, "**/*.nii.gz"), recursive=True)
print(f"Archivos encontrados: {len(nifti_files)}")

np.random.seed(RANDOM_SEED)

# Contador para el dataset.json
train_files_count = 0

for idx, file_path in enumerate(nifti_files):
    start_time = time.time()
    filename = os.path.basename(file_path)
    # ID simple para nnU-Net: CSIRO_001, CSIRO_002...
    subject_id = f"CSIRO_{idx+1:03d}" 
    
    print(f"\n[{idx+1}/{len(nifti_files)}] {filename} -> {subject_id}")
    
    try:
        nii = nib.load(file_path)
        data_img = nii.get_fdata().astype(float)
        data_label = np.zeros(data_img.shape, dtype=np.uint8)
        voxel_dims = nii.header.get_zooms()
        
        safety_mask = get_brain_safety_mask(data_img)
        valid_coords = np.argwhere(safety_mask)
        
        if len(valid_coords) < NUM_LESIONS_PER_SCAN:
            print(f"⚠️  SKIP: Cerebro muy pequeño.")
            continue
            
        random_indices = np.random.choice(len(valid_coords), size=NUM_LESIONS_PER_SCAN, replace=False)
        selected_coords = valid_coords[random_indices]
        
        for coord in selected_coords:
            vol_rnd = np.random.uniform(0.8, 15.0)
            str_rnd = np.random.uniform(0.85, 1.0)
            data_img, data_label = implant_and_label(data_img, data_label, coord, vol_rnd, voxel_dims, str_rnd)
            
        # --- GUARDADO NNUNET ---
        out_name_img = f"{subject_id}_0000.nii.gz" # Canal 0
        out_name_lbl = f"{subject_id}.nii.gz"      # Label
        
        path_img = os.path.join(OUTPUT_DIR, "imagesTr", out_name_img)
        path_lbl = os.path.join(OUTPUT_DIR, "labelsTr", out_name_lbl)
        
        nib.save(nib.Nifti1Image(data_img, nii.affine, nii.header), path_img)
        nib.save(nib.Nifti1Image(data_label, nii.affine, nii.header), path_lbl)
        
        train_files_count += 1
        elapsed = time.time() - start_time
        print(f"Ok ({elapsed:.2f}s)")
        
    except Exception as e:
        print(f"ERROR: {e}")

# ==========================================
# 4. GENERACIÓN AUTOMÁTICA DATASET.JSON (v2)
# ==========================================
print("-" * 50)
print("Generando dataset.json para nnU-Net v2...")

json_dict = {
    "channel_names": {
        "0": "SWI"  # O la modalidad que sea (T2*, T1, etc.)
    },
    "labels": {
        "background": 0,
        "CMB": 1
    },
    "numTraining": train_files_count,
    "file_ending": ".nii.gz",
    "name": DATASET_NAME,
    "reference": "Synthetic Data generated via Momeni method",
    "release": "1.0",
    "description": "Synthetic Microbleeds dataset for TFM",
    "overwrite_image_reader_writer": "SimpleITKIO" # Opcional, a veces ayuda
}

json_path = os.path.join(OUTPUT_DIR, "dataset.json")
with open(json_path, 'w') as f:
    json.dump(json_dict, f, indent=4)

print(f"dataset.json creado en: {json_path}")
print("-" * 50)
print(f"TODO LISTO. Dataset preparado en: {OUTPUT_DIR}")
print(" -> Siguiente paso: nnUNetv2_plan_and_preprocess")

Iniciando Pipeline Masivo...
Archivos encontrados: 313

[1/313] 2_T2_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_001
Ok (3.66s)

[2/313] 13_T2_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_002
Ok (3.36s)

[3/313] 13_T3_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_003
Ok (1.48s)

[4/313] 14_T2_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_004
Ok (1.43s)

[5/313] 14_T3_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_005
Ok (2.66s)

[6/313] 14_T4_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_006
Ok (2.64s)

[7/313] 14_T5_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_007
Ok (3.09s)

[8/313] 14_T6_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_008
Ok (2.58s)

[9/313] 14_T7_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_009
Ok (2.35s)

[10/313] 18_T2_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_010
Ok (2.47s)

[11/313] 18_T3_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_011
Ok (2.20s)

[12/313] 22_T2_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_012
Ok (1.85s)

[13/313] 22_T3_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_013
Ok (1.93s)

[14/313] 22_T4_MRI_SWI_BFC_50mm_HM.nii.gz -> CSIRO_014
Ok (2.51s)

[15/313] 22_T5_M