# Steps

1. **Data Splitting**
   - Train:Test:Validation = 60:20:20

2. **Normalization**
   - Apply Z-score normalization to all images.

3. **Data Format Conversion**
   - Convert data from .nii.gz to .npy format.

In [1]:
# Import necessary module
import os
import re
import cv2
import glob
import shutil
import numpy as np
import nilearn
import nibabel as nib
from scipy import ndimage
import matplotlib.pyplot as plt
import nilearn.plotting as plotting
from sklearn.model_selection import train_test_split

In [2]:
# Data path
data_folder = "/ssd_scratch/ATLAS_2/train_filter_3d"

In [3]:
# Image and Mask files
image_files = sorted(glob.glob(os.path.join(data_folder, "**/*_T1w.nii.gz"), recursive=True))
mask_files = sorted(glob.glob(os.path.join(data_folder, "**/*_label-L_desc-T1lesion_mask.nii.gz"), recursive=True))

image_train_files, image_test_val_files = train_test_split(image_files, test_size=0.4, random_state=42)
image_test_files, image_val_files = train_test_split(image_test_val_files, test_size=0.5, random_state=42)
mask_train_files, mask_test_val_files = train_test_split(mask_files, test_size=0.4, random_state=42)
mask_test_files, mask_val_files = train_test_split(mask_test_val_files, test_size=0.5, random_state=42)

# Train, test, and validation files
train_dir = os.path.join(data_folder, 'train')
test_dir = os.path.join(data_folder, 'test')
val_dir = os.path.join(data_folder, 'val')

# Image train files
for file_name in image_train_files:
    # Load file
    file_path = file_name
    file_data = nib.load(file_path).get_fdata()

    # Load corresponding mask file
    mask_file_name = file_name.replace('_T1w.nii.gz', '_label-L_desc-T1lesion_mask.nii.gz')
    mask_file_path = os.path.join(os.path.dirname(file_path), mask_file_name)
    mask_data = nib.load(mask_file_path).get_fdata()

    # Center of mass of the foreground mask/lesion
    center_of_mass = ndimage.center_of_mass(mask_data)

    # Define the crop size
    desired_size = (144, 172, 128)

    # Crop boundaries based on the center of mass and desired size
    x_start = int(center_of_mass[0] - desired_size[0] // 2)
    x_end = x_start + desired_size[0]
    y_start = int(center_of_mass[1] - desired_size[1] // 2)
    y_end = y_start + desired_size[1]
    z_start = int(center_of_mass[2] - desired_size[2] // 2)
    z_end = z_start + desired_size[2]

    x_start = max(0, x_start)
    x_end = min(file_data.shape[0], x_end)
    y_start = max(0, y_start)
    y_end = min(file_data.shape[1], y_end)
    z_start = max(0, z_start)
    z_end = min(file_data.shape[2], z_end)

    actual_size = (x_end - x_start, y_end - y_start, z_end - z_start)
    cropped_image = np.zeros(desired_size)
    cropped_mask = np.zeros(desired_size)
    cropped_image[:actual_size[0], :actual_size[1], :actual_size[2]] = file_data[x_start:x_end, y_start:y_end, z_start:z_end]
    cropped_mask[:actual_size[0], :actual_size[1], :actual_size[2]] = mask_data[x_start:x_end, y_start:y_end, z_start:z_end]

    image_folder = os.path.splitext(file_path)[0] + '_train_image'
    os.makedirs(os.path.join(train_dir, image_folder), exist_ok=True)
    mask_folder = os.path.splitext(mask_file_path)[0] + '_train_mask'
    os.makedirs(os.path.join(train_dir, mask_folder), exist_ok=True)

    # Normalize the data using Z-score normalization
    file_data_norm = (cropped_image - np.mean(cropped_image)) / np.std(cropped_image, ddof=1)

    # Save the cropped image as a .npy file in the image folder
    image_filename = os.path.splitext(os.path.basename(file_path))[0] + '_train_image.npy'
    image_path = os.path.join(train_dir, image_folder, image_filename)
    np.save(image_path, file_data_norm)

    # Save the cropped mask as a .npy file in the mask folder
    mask_filename = os.path.splitext(os.path.basename(mask_file_path))[0] + '_train_mask.npy'
    mask_path = os.path.join(train_dir, mask_folder, mask_filename)
    np.save(mask_path, cropped_mask)
##############################################################################################
# Test files
for file_name in image_test_files:
    # Load file
    file_path = file_name
    file_data = nib.load(file_path).get_fdata()

    # Load corresponding mask file
    mask_file_name = file_name.replace('_T1w.nii.gz', '_label-L_desc-T1lesion_mask.nii.gz')
    mask_file_path = os.path.join(os.path.dirname(file_path), mask_file_name)
    mask_data = nib.load(mask_file_path).get_fdata()

    # Center of mass of the foreground mask/lesion
    center_of_mass = ndimage.center_of_mass(mask_data)

    # Define the desired crop size
    desired_size = (144, 172, 128)

    x_start = int(center_of_mass[0] - desired_size[0] // 2)
    x_end = x_start + desired_size[0]
    y_start = int(center_of_mass[1] - desired_size[1] // 2)
    y_end = y_start + desired_size[1]
    z_start = int(center_of_mass[2] - desired_size[2] // 2)
    z_end = z_start + desired_size[2]
    
    x_start = max(0, x_start)
    x_end = min(file_data.shape[0], x_end)
    y_start = max(0, y_start)
    y_end = min(file_data.shape[1], y_end)
    z_start = max(0, z_start)
    z_end = min(file_data.shape[2], z_end)

    actual_size = (x_end - x_start, y_end - y_start, z_end - z_start)
    cropped_image = np.zeros(desired_size)
    cropped_mask = np.zeros(desired_size)
    cropped_image[:actual_size[0], :actual_size[1], :actual_size[2]] = file_data[x_start:x_end, y_start:y_end, z_start:z_end]
    cropped_mask[:actual_size[0], :actual_size[1], :actual_size[2]] = mask_data[x_start:x_end, y_start:y_end, z_start:z_end]

    image_folder = os.path.splitext(file_path)[0] + '_test_image'
    os.makedirs(os.path.join(test_dir, image_folder), exist_ok=True)
    mask_folder = os.path.splitext(mask_file_path)[0] + '_test_mask'
    os.makedirs(os.path.join(test_dir, mask_folder), exist_ok=True)

    # Normalize data using Z-score normalization
    file_data_norm = (cropped_image - np.mean(cropped_image)) / np.std(cropped_image, ddof=1)

    # Save the cropped image as a .npy file in the image folder
    image_filename = os.path.splitext(os.path.basename(file_path))[0] + '_test_image.npy'
    image_path = os.path.join(test_dir, image_folder, image_filename)
    np.save(image_path, file_data_norm)

    # Save the cropped mask as a .npy file in the mask folder
    mask_filename = os.path.splitext(os.path.basename(mask_file_path))[0] + '_test_mask.npy'
    mask_path = os.path.join(test_dir, mask_folder, mask_filename)
    np.save(mask_path, cropped_mask)
#############################################################################################   
# Validation files
for file_name in image_val_files:
    # Load file
    file_path = file_name
    file_data = nib.load(file_path).get_fdata()

    # Load corresponding mask file
    mask_file_name = file_name.replace('_T1w.nii.gz', '_label-L_desc-T1lesion_mask.nii.gz')
    mask_file_path = os.path.join(os.path.dirname(file_path), mask_file_name)
    mask_data = nib.load(mask_file_path).get_fdata()

    # Center of mass of the foreground mask
    center_of_mass = ndimage.center_of_mass(mask_data)

    # Define the desired crop size
    desired_size = (144, 172, 128)

    # Calculate the crop boundaries based on the center of mass and desired size
    x_start = int(center_of_mass[0] - desired_size[0] // 2)
    x_end = x_start + desired_size[0]
    y_start = int(center_of_mass[1] - desired_size[1] // 2)
    y_end = y_start + desired_size[1]
    z_start = int(center_of_mass[2] - desired_size[2] // 2)
    z_end = z_start + desired_size[2]

    x_start = max(0, x_start)
    x_end = min(file_data.shape[0], x_end)
    y_start = max(0, y_start)
    y_end = min(file_data.shape[1], y_end)
    z_start = max(0, z_start)
    z_end = min(file_data.shape[2], z_end)

    actual_size = (x_end - x_start, y_end - y_start, z_end - z_start)
    cropped_image = np.zeros(desired_size)
    cropped_mask = np.zeros(desired_size)
    cropped_image[:actual_size[0], :actual_size[1], :actual_size[2]] = file_data[x_start:x_end, y_start:y_end, z_start:z_end]
    cropped_mask[:actual_size[0], :actual_size[1], :actual_size[2]] = mask_data[x_start:x_end, y_start:y_end, z_start:z_end]

    image_folder = os.path.splitext(file_path)[0] + '_val_image'
    os.makedirs(os.path.join(val_dir, image_folder), exist_ok=True)
    mask_folder = os.path.splitext(mask_file_path)[0] + '_val_mask'
    os.makedirs(os.path.join(val_dir, mask_folder), exist_ok=True)

    # Normalize data using Z-score normalization
    file_data_norm = (cropped_image - np.mean(cropped_image)) / np.std(cropped_image, ddof=1)

    # Save the cropped image as a .npy file in the image folder
    image_filename = os.path.splitext(os.path.basename(file_path))[0] + '_val_image.npy'
    image_path = os.path.join(val_dir, image_folder, image_filename)
    np.save(image_path, file_data_norm)

    # Save the cropped mask as a .npy file in the mask folder
    mask_filename = os.path.splitext(os.path.basename(mask_file_path))[0] + '_val_mask.npy'
    mask_path = os.path.join(val_dir, mask_folder, mask_filename)
    np.save(mask_path, cropped_mask)

In [6]:
image_npy_files_train = sorted(glob.glob(os.path.join(data_folder, "**/*_T1w.nii_train_image.npy"), recursive=True))
mask_npy_files_train = sorted(glob.glob(os.path.join(data_folder, "**/*_mask.nii_train_mask.npy"), recursive=True))

image_npy_files_test = sorted(glob.glob(os.path.join(data_folder, "**/*_T1w.nii_test_image.npy"), recursive=True))
mask_npy_files_test = sorted(glob.glob(os.path.join(data_folder, "**/*_mask.nii_test_mask.npy"), recursive=True))

image_npy_files_val = sorted(glob.glob(os.path.join(data_folder, "**/*_T1w.nii_val_image.npy"), recursive=True))
mask_npy_files_val = sorted(glob.glob(os.path.join(data_folder, "**/*_mask.nii_val_mask.npy"), recursive=True))

print("Image .npy files:", len(image_npy_files_train))
print("Mask .npy files:", len(mask_npy_files_train))

print("Image .npy files:", len(image_npy_files_test))
print("Mask .npy files:", len(mask_npy_files_test))

print("Image .npy files:", len(image_npy_files_val))
print("Mask .npy files:", len(mask_npy_files_val))

Image .npy files: 393
Mask .npy files: 393
Image .npy files: 131
Mask .npy files: 131
Image .npy files: 131
Mask .npy files: 131


In [8]:
# Train
train_image_folder = os.path.join(data_folder, "train", "images")
train_mask_folder = os.path.join(data_folder, "train", "masks")

if not os.path.exists(train_image_folder):
    os.makedirs(train_image_folder)
if not os.path.exists(train_mask_folder):
    os.makedirs(train_mask_folder)

# train/images and train/masks
for i in range(len(image_npy_files_train)):
    image_path = image_npy_files_train[i]
    mask_path = mask_npy_files_train[i]
    file_name = os.path.basename(image_path)
    shutil.copy(image_path, os.path.join(train_image_folder, file_name))
    shutil.copy(mask_path, os.path.join(train_mask_folder, file_name))

test_image_folder = os.path.join(data_folder, "test", "images")
test_mask_folder = os.path.join(data_folder, "test", "masks")
if not os.path.exists(test_image_folder):
    os.makedirs(test_image_folder)
if not os.path.exists(test_mask_folder):
    os.makedirs(test_mask_folder)

# test/images and test/masks
for i in range(len(image_npy_files_test)):
    image_path = image_npy_files_test[i]
    mask_path = mask_npy_files_test[i]
    file_name = os.path.basename(image_path)
    shutil.copy(image_path, os.path.join(test_image_folder, file_name))
    shutil.copy(mask_path, os.path.join(test_mask_folder, file_name))

val_image_folder = os.path.join(data_folder, "val", "images")
val_mask_folder = os.path.join(data_folder, "val", "masks")

if not os.path.exists(val_image_folder):
    os.makedirs(val_image_folder)
if not os.path.exists(val_mask_folder):
    os.makedirs(val_mask_folder)

# val/images and val/masks
for i in range(len(image_npy_files_val)):
    image_path = image_npy_files_val[i]
    mask_path = mask_npy_files_val[i]
    file_name = os.path.basename(image_path)
    shutil.copy(image_path, os.path.join(val_image_folder, file_name))
    shutil.copy(mask_path, os.path.join(val_mask_folder, file_name))

In [9]:
# Train folder
train_image_folder = "/ssd_scratch/ATLAS_2/train_filter_3d/train/images/"
train_mask_folder = "/ssd_scratch/ATLAS_2/train_filter_3d/train/masks/"

num_images = len(os.listdir(train_image_folder))
num_masks = len(os.listdir(train_mask_folder))

print("Number of images in train/images:", num_images)
print("Number of masks in train/masks:", num_masks)

# Test Folder
test_image_folder = "/ssd_scratch/ATLAS_2/train_filter_3d/test/images/"
test_mask_folder = "/ssd_scratch/ATLAS_2/train_filter_3d/test/masks/"

num_images = len(os.listdir(test_image_folder))
num_masks = len(os.listdir(test_mask_folder))

print("Number of images in test/images:", num_images)
print("Number of masks in test/masks:", num_masks)

# Validation Folder
val_image_folder = "/ssd_scratch/ATLAS_2/train_filter_3d/val/images/"
val_mask_folder = "/ssd_scratch/ATLAS_2/train_filter_3d/val/masks/"

num_images = len(os.listdir(val_image_folder))
num_masks = len(os.listdir(val_mask_folder))

print("Number of images in val/images:", num_images)
print("Number of masks in val/masks:", num_masks)

Number of images in train/images: 393
Number of masks in train/masks: 393
Number of images in test/images: 131
Number of masks in test/masks: 131
Number of images in val/images: 131
Number of masks in val/masks: 131
