In [5]:
import pandas as pd

from aidream_registration import constants
from aidream_registration.dataloaders import AtlasImagingNiftiLoader
from aidream_registration.utils.cohort_utils import get_perfusion_patients
from sklearn.utils import resample

from torch.optim.lr_scheduler import ReduceLROnPlateau

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset
from sklearn.model_selection import train_test_split


import numpy as np
import ants
import rt_utils


In [6]:
torch.cuda.empty_cache()

In [7]:
list_patients = get_perfusion_patients()
print("Number of patients:", len(list_patients))


Number of patients: 186


In [8]:
# Load the atlas imaging nifti loader :
atlas_loader = AtlasImagingNiftiLoader(source_mri="PIPELINE_SS")


In [9]:
# First, load the hammersmith atlas, and create a mask :
path_hammersmith = constants.DIR_DATA / "hammersmith" / "T1w_ICBM_skullstripped.nii.gz"
ants_hammersmith = ants.image_read(str(path_hammersmith))

mask_hammersmith = ants_hammersmith > 0
print(f"Hammersmith mask volume: {mask_hammersmith.sum() / 1e3:.2f} cm3")


Hammersmith mask volume: 1886.57 cm3


In [10]:
# Load all segmentations in a dictionary :
dict_segmentations = {
    path_seg.name.removesuffix(".nii.gz"): ants.image_read(str(path_seg)) > 0
    for path_seg in (constants.DIR_DATA / "hammersmith").glob("*.nii.gz")
    if path_seg.name != "T1w_ICBM_skullstripped.nii.gz"
}

print("Number of segmentations:", len(dict_segmentations))

for segmentation, mask_segmentation in dict_segmentations.items():
    print(fr"{segmentation} volume: {mask_segmentation.sum() / 1e3:.2f} cm3")


Number of segmentations: 21
L.Ventricles volume: 6.72 cm3
R_insula volume: 16.48 cm3
R.Ventricles volume: 6.11 cm3
L.Temporal_lobe volume: 115.72 cm3
R.Occipital_lobe volume: 76.46 cm3
L.Grey_nuclei volume: 18.22 cm3
R.Grey_nuclei volume: 18.20 cm3
R.Limbic_lobe volume: 24.28 cm3
Brainstem volume: 26.94 cm3
L.frontal_lobe volume: 225.46 cm3
third_ventricle volume: 0.63 cm3
L.Post_fossea volume: 90.63 cm3
L.Parietal_lobe volume: 142.35 cm3
Corpus_callosum volume: 21.05 cm3
R.Temporal_lobe volume: 121.76 cm3
L.Occipital_lobe volume: 75.31 cm3
R.frontal_lobe volume: 227.50 cm3
R.Post_fossea volume: 88.57 cm3
R.Parietal_lobe volume: 141.59 cm3
L.Limbic_lobe volume: 24.96 cm3
L_insula volume: 16.62 cm3


In [11]:
# Create mask_1 for the ventricle voxels :
mask_1 = dict_segmentations["L.Ventricles"] + dict_segmentations["R.Ventricles"] + dict_segmentations["third_ventricle"]
mask_1 = mask_1 > 0

print(f"Ventricles volume: {mask_1.sum() / 1e3:.2f} cm3")


Ventricles volume: 13.47 cm3


In [39]:
# Create mask82 for the segmented voxels that are not ventricles :
mask_2 = None
for seg, mask_segmentation in dict_segmentations.items():

    if seg not in ["L.Ventricles", "R.Ventricles", "third_ventricle"]:
        if mask_2 is None:
            mask_2 = mask_segmentation
        else:
            mask_2 += mask_segmentation

mask_2 = mask_2 > 0

print(f"Brain volume: {mask_2.sum() / 1e3:.2f} cm3")

mask_2.to_file(str(constants.DIR_DATA / "hammersmith" / "custom" / "mask_2.nii.gz"))


Brain volume: 1472.11 cm3


In [40]:
# Create mask_unknown for the voxels that are not segmented :
mask_unknown = mask_hammersmith * (1 - mask_1) * (1 - mask_2)
mask_unknown = mask_unknown > 0

print(f"Unknown volume: {mask_unknown.sum() / 1e3:.2f} cm3")

mask_unknown.to_file(str(constants.DIR_DATA / "hammersmith" / "custom" / "unknown.nii.gz"))


Unknown volume: 405.38 cm3


In [41]:
# Create two bounding boxes for the ventricles , a small one and a big one :

seg_idx = np.array(np.nonzero(mask_1.numpy()))

bbox_min, bbox_max = np.min(seg_idx, axis=1), np.max(seg_idx, axis=1)

bbox_min_1, bbox_max_1 = bbox_min - 3, bbox_max + 3
bbox_min_2, bbox_max_2 = bbox_min - 8, bbox_max + 8

np_bbox_1 = np.zeros(mask_1.shape)
np_bbox_1[bbox_min_1[0]: bbox_max_1[0], bbox_min_1[1]: bbox_max_1[1], bbox_min_1[2]: bbox_max_1[2]] = 1

np_bbox_2 = np.zeros(mask_1.shape)
np_bbox_2[bbox_min_2[0]: bbox_max_2[0], bbox_min_2[1]: bbox_max_2[1], bbox_min_2[2]: bbox_max_2[2]] = 1

mask_small = (ants.from_numpy(np_bbox_1, origin=mask_1.origin, spacing=mask_1.spacing, direction=mask_1.direction) > 0) * mask_hammersmith
mask_big = (ants.from_numpy(np_bbox_2, origin=mask_1.origin, spacing=mask_1.spacing, direction=mask_1.direction) > 0) * mask_hammersmith


In [42]:
# all voxels inside mask_small that are also in mask_unknown are to be predicted :
mask_pred = mask_small * mask_unknown

print(f"Prediction volume: {mask_pred.sum() / 1e3:.2f} cm3")

# all voxels inside mask_big and not in mask_small that are also in mask_unknown are csf voxels :
mask_csf = mask_big * (1 - mask_small) * mask_unknown

print(f"CSF volume: {mask_csf.sum() / 1e3:.2f} cm3")

# for brain voxels, keep only the voxels that are in mask_big :
mask_2 = mask_2 * mask_big

print(f"new Brain volume: {mask_2.sum() / 1e3:.2f} cm3")



Prediction volume: 152.20 cm3
CSF volume: 34.17 cm3
new Brain volume: 460.25 cm3


In [43]:
mask_4 = mask_2 + mask_csf
mask_4 = mask_4 > 0

In [44]:
# Generate spatial coordinate images :
x,y,z = ants_hammersmith.shape

x_coords = np.linspace(0, 1, x)[:, None, None]
y_coords = np.linspace(0, 1, y)[None, :, None]
z_coords = np.linspace(0, 1, z)[None, None, :]

x_map = np.broadcast_to(x_coords, (x, y, z))
y_map = np.broadcast_to(y_coords, (x, y, z))
z_map = np.broadcast_to(z_coords, (x, y, z))

ants_x_map = ants.from_numpy(x_map, origin=ants_hammersmith.origin, spacing=ants_hammersmith.spacing, direction=ants_hammersmith.direction)
ants_y_map = ants.from_numpy(y_map, origin=ants_hammersmith.origin, spacing=ants_hammersmith.spacing, direction=ants_hammersmith.direction)
ants_z_map = ants.from_numpy(z_map, origin=ants_hammersmith.origin, spacing=ants_hammersmith.spacing, direction=ants_hammersmith.direction)



In [45]:
mask_1.to_file(str(constants.DIR_DATA / "hammersmith" / "custom" / "ventricles.nii.gz"))
mask_2.to_file(str(constants.DIR_DATA / "hammersmith" / "custom" / "brain.nii.gz"))
mask_csf.to_file(str(constants.DIR_DATA / "hammersmith" / "custom" / "csf.nii.gz"))
mask_pred.to_file(str(constants.DIR_DATA / "hammersmith" / "custom" / "prediction.nii.gz"))

In [46]:
# Now, let's treat the problem as a Machine Learning classification problem :
# The training data will be mask_1 and mask_2 and mask_csf :

# First, create the training data :
# add mask_1 :
ventricles_idx = np.array(np.nonzero(mask_1.numpy()))
ventricles_intensity = ants_hammersmith.numpy()[ventricles_idx[0], ventricles_idx[1], ventricles_idx[2]]
df_ventricles = pd.DataFrame({"x": ventricles_idx[0], "y": ventricles_idx[1], "z": ventricles_idx[2], "intensity": ventricles_intensity, "label": 0})

# # now add mask_2 :
# brain_idx = np.array(np.nonzero(mask_2.numpy()))
# brain_intensity = ants_hammersmith.numpy()[brain_idx[0], brain_idx[1], brain_idx[2]]
# df_brain = pd.DataFrame({"x": brain_idx[0], "y": brain_idx[1], "z": brain_idx[2], "intensity": brain_intensity, "label": 1})
#
# # now add mask_csf :
# csf_idx = np.array(np.nonzero(mask_csf.numpy()))
# csf_intensity = ants_hammersmith.numpy()[csf_idx[0], csf_idx[1], csf_idx[2]]
# df_csf = pd.DataFrame({"x": csf_idx[0], "y": csf_idx[1], "z": csf_idx[2], "intensity": csf_intensity, "label": 2})

# mask_4 :
brain_idx = np.array(np.nonzero(mask_4.numpy()))
brain_intensity = ants_hammersmith.numpy()[brain_idx[0], brain_idx[1], brain_idx[2]]
df_brain = pd.DataFrame({"x": brain_idx[0], "y": brain_idx[1], "z": brain_idx[2], "intensity": brain_intensity, "label": 1})


# Concatenate the dataframes :
# df_train = pd.concat([df_ventricles, df_brain, df_csf], ignore_index=True)
df_train = pd.concat([df_ventricles, df_brain], ignore_index=True)

print(fr"len(df_train): {len(df_train)}")

X_train, y_train = df_train[["x", "y", "z", "intensity"]].values, df_train["label"].values

print(f"Number of training samples: {len(X_train)}")
print(f"percentage of ventricles in the training set: {np.mean(y_train == 0):.4f}")
print(f"percentage of brain in the training set: {np.mean(y_train == 1):.4f}")
# print(f"percentage of CSF in the training set: {np.mean(y_train == 2):.2f}")



len(df_train): 507885
Number of training samples: 507885
percentage of ventricles in the training set: 0.0265
percentage of brain in the training set: 0.9735


In [54]:
# # Let's resample the training data to balance the classes :
ventricles = df_train[df_train["label"] == 0]
brain = df_train[df_train["label"] == 1]

# # the desired distribution is 1:1
desired_ventricles_size  = int(len(df_train) * 0.5)
desired_brain_size = len(df_train) - desired_ventricles_size
#
# # Oversampling the ventricles  :
ventricles_resampled = resample(ventricles, n_samples=desired_ventricles_size, replace=True, random_state=42)
# csf_resampled = resample(csf, n_samples=desired_csf_size, replace=True, random_state=42)
#
# # Downsampling the brain :
brain_resampled = resample(brain, n_samples=desired_brain_size, replace=False, random_state=42)
#
# # Concatenate the resampled dataframes :
df_train_resampled = pd.concat([ventricles_resampled, brain_resampled], ignore_index=True)
#
X_train, y_train = df_train_resampled[["x", "y", "z", "intensity"]].values, df_train_resampled["label"].values
#
# print(f"Number of training samples: {len(X_train)}")
# print(f"Number of validation samples: {len(X_val)}")
#
# print(f"percentage of ventricles in the training set: {np.mean(y_train == 0):.2f}, and in the validation set: {np.mean(y_val == 0):.2f}")
# print(f"percentage of CSF in the training set: {np.mean(y_train == 1):.2f}, and in the validation set: {np.mean(y_val == 1):.2f}")
# print(f"percentage of brain in the training set: {np.mean(y_train == 2):.2f}, and in the validation set: {np.mean(y_val == 2):.2f}")


In [55]:
# Build the neural network classifier :
class VoxelClassifier(nn.Module):
    def __init__(self):
        super(VoxelClassifier, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(4, 4),  # Input: 4 features, first hidden layer: 8 neurons
            nn.ReLU(),
            nn.Linear(4, 2)    # Output: 3 classes
        )

    def forward(self, x):
        return self.model(x)


In [56]:
model = VoxelClassifier()

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# Move model to GPU
model.to(device)

cuda


VoxelClassifier(
  (model): Sequential(
    (0): Linear(in_features=4, out_features=4, bias=True)
    (1): ReLU()
    (2): Linear(in_features=4, out_features=2, bias=True)
  )
)

In [57]:
# Convert your training data into PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)  # Features
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)    # Labels


In [58]:
batch_size = len(X_train) // 3

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)


In [59]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
# Training and validation loop
num_epochs = 1000
for epoch in range(num_epochs):

    print(fr"-------------------- Epoch", epoch+1, "--------------------")
    # Training phase
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()           # Zero gradients
        outputs = model(X_batch)        # Forward pass
        loss = criterion(outputs, y_batch)  # Compute loss
        loss.backward()                 # Backward pass
        optimizer.step()                # Update weights

        running_loss += loss.item()

    train_loss = running_loss / len(train_loader)

    # Validation phase
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in train_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()

            # Accuracy calculation
            predicted = outputs.argmax(dim=1)
            correct += (predicted == y_batch).sum().item()
            total += y_batch.size(0)

    val_loss /= len(train_loader)
    val_accuracy = correct / total

    # Log metrics
    print(f"Epoch {epoch+1}/{num_epochs}:")
    print(f"  Training Loss: {train_loss:.4f}")
    print(f"  Validation Loss: {val_loss:.4f}")
    print(f"  Validation Accuracy: {val_accuracy:.4f}")

    # Optional: Log GPU memory usage
    print(f"  GPU Memory Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
    print(f"  GPU Memory Reserved: {torch.cuda.memory_reserved() / 1e9:.2f} GB")


-------------------- Epoch 1 --------------------
Epoch 1/1000:
  Training Loss: 0.7283
  Validation Loss: 0.6410
  Validation Accuracy: 0.6749
  GPU Memory Allocated: 0.05 GB
  GPU Memory Reserved: 0.33 GB
-------------------- Epoch 2 --------------------
Epoch 2/1000:
  Training Loss: 0.6203
  Validation Loss: 0.5916
  Validation Accuracy: 0.7562
  GPU Memory Allocated: 0.05 GB
  GPU Memory Reserved: 0.33 GB
-------------------- Epoch 3 --------------------
Epoch 3/1000:
  Training Loss: 0.5817
  Validation Loss: 0.5583
  Validation Accuracy: 0.7725
  GPU Memory Allocated: 0.05 GB
  GPU Memory Reserved: 0.33 GB
-------------------- Epoch 4 --------------------
Epoch 4/1000:
  Training Loss: 0.5424
  Validation Loss: 0.5089
  Validation Accuracy: 0.7849
  GPU Memory Allocated: 0.05 GB
  GPU Memory Reserved: 0.33 GB
-------------------- Epoch 5 --------------------
Epoch 5/1000:
  Training Loss: 0.4929
  Validation Loss: 0.4643
  Validation Accuracy: 0.7949
  GPU Memory Allocated: 0.05

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7ff4149f1e10>>
Traceback (most recent call last):
  File "/home/maichi/anaconda3/envs/env_aidream_registration/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


In [99]:
# Let's now do the prediction on mask_pred :

pred_idx = np.array(np.nonzero(mask_pred.numpy()))
pred_intensity = ants_hammersmith.numpy()[pred_idx[0], pred_idx[1], pred_idx[2]]

df_pred = pd.DataFrame({"x": pred_idx[0], "y": pred_idx[1], "z": pred_idx[2], "intensity": pred_intensity})
X_pred = df_pred[["x", "y", "z", "intensity"]].values

X_pred_tensor = torch.tensor(X_pred, dtype=torch.float32).to(device)

model.eval()  # Set the model to evaluation mode

# Perform predictions
with torch.no_grad():
    outputs = model(X_pred_tensor)  # Raw model outputs (logits)
    predictions = outputs.argmax(dim=1)  # Predicted class labels

# Convert predictions to NumPy if needed
predictions_np = predictions.cpu().numpy() + 1


In [100]:
ants_prediction = ants.from_numpy(np.zeros(mask_1.shape), origin=mask_1.origin, spacing=mask_1.spacing, direction=mask_1.direction)

ants_prediction[pred_idx[0], pred_idx[1], pred_idx[2]] = predictions_np
ants_prediction[mask_1 == 1] = 1


In [102]:
path_prediction = constants.DIR_DATA / "hammersmith" / "custom" / "ventricle_prediction.nii.gz"
path_prediction.parent.mkdir(parents=True, exist_ok=True)
ants_prediction.to_file(str(path_prediction))

In [None]:
# # Let's do the prediction set on the voxels inside the ventricles' neighborhood  that are not segmented as ventricles :
# mask_pred = mask_ventricles_neighbors * (1 - mask_ventricles)
#
# pred_idx = np.array(np.nonzero(mask_pred.numpy()))
# pred_intensity = ants_hammersmith.numpy()[pred_idx[0], pred_idx[1], pred_idx[2]]
# df_pred = pd.DataFrame({"x": pred_idx[0], "y": pred_idx[1], "z": pred_idx[2], "intensity": pred_intensity})
#
# X_pred = df_pred[["x", "y", "z", "intensity"]].values
# print(f"Nuber of prediction samples: {len(X_pred)} / {mask_ventricles_neighbors.sum()}")

In [None]:
# # Now, let's treat the problem as a Machine Learning classification problem :
# # The training data will be the ventricles' segmentation and the voxels outside the ventricles' neighborhood.
# # The prediction will be done on the voxels inside the ventricles' neighborhood, that are not segmented as ventricles.
#
# # First, create the training data :
# # Add the ventricles' segmentation :
# ventricles_idx = np.array(np.nonzero(mask_ventricles.numpy()))
# ventricles_intensity = ants_hammersmith.numpy()[ventricles_idx[0], ventricles_idx[1], ventricles_idx[2]]
# df_ventricles = pd.DataFrame({"x": ventricles_idx[0], "y": ventricles_idx[1], "z": ventricles_idx[2],
#                               "intensity": ventricles_intensity, "label": 0})
#
# # now Add the voxels outside the ventricles' neighborhood that are CSF :
# csf_no_ventricles_neighbors_idx = np.array(np.nonzero(mask_csf_no_ventricles_neighbors.numpy()))
# csf_no_ventricles_neighbors_intensity = ants_hammersmith.numpy()[csf_no_ventricles_neighbors_idx[0], csf_no_ventricles_neighbors_idx[1], csf_no_ventricles_neighbors_idx[2]]
# df_csf_no_ventricles_neighbors = pd.DataFrame({"x": csf_no_ventricles_neighbors_idx[0], "y": csf_no_ventricles_neighbors_idx[1], "z": csf_no_ventricles_neighbors_idx[2],
#                                                "intensity": csf_no_ventricles_neighbors_intensity, "label": 1})
#
# # Now, add the voxels outside the ventricles' neighborhood that are not CSF :
# no_csf_no_ventricles_neighbors_idx = np.array(np.nonzero(mask_no_csf_no_ventricles_neighbors.numpy()))
# no_csf_no_ventricles_neighbors_intensity = ants_hammersmith.numpy()[no_csf_no_ventricles_neighbors_idx[0], no_csf_no_ventricles_neighbors_idx[1], no_csf_no_ventricles_neighbors_idx[2]]
# df_no_csf_no_ventricles_neighbors = pd.DataFrame({"x": no_csf_no_ventricles_neighbors_idx[0], "y": no_csf_no_ventricles_neighbors_idx[1], "z": no_csf_no_ventricles_neighbors_idx[2],
#                                                   "intensity": no_csf_no_ventricles_neighbors_intensity, "label": 2})
#
# # Concatenate the dataframes :
# df_train = pd.concat([df_ventricles, df_csf_no_ventricles_neighbors, df_no_csf_no_ventricles_neighbors], ignore_index=True)
# print(fr"len(df_train): {len(df_train)}")
#
# X_train, y_train = df_train[["x", "y", "z", "intensity"]].values, df_train["label"].values
#
# print(f"Number of training samples: {len(X_train)}")
#
# print(f"percentage of ventricles in the training set: {np.mean(y_train == 0):.2f}")
# print(f"percentage of CSF in the training set: {np.mean(y_train == 1):.2f}")
# print(f"percentage of brain in the training set: {np.mean(y_train == 2):.2f}")


In [56]:
# # Create a custom ventricles mask by combining the lateral ventricles and the third ventricle :
# mask_ventricles = dict_segmentations["L.Ventricles"] + dict_segmentations["R.Ventricles"] + dict_segmentations["third_ventricle"]
# mask_ventricles = mask_ventricles > 0
#
# print(f"Ventricles volume: {mask_ventricles.sum() / 1e3:.2f} cm3")
#
# path_ventricles = constants.DIR_DATA / "hammersmith" / "custom" / "total_ventricles" / "ventricles.nii.gz"
# path_ventricles.parent.mkdir(parents=True, exist_ok=True)
# mask_ventricles.to_file(str(path_ventricles))


Ventricles volume: 13.47 cm3


In [57]:
# # Create a bigger bounding box for the ventricles' neighborhood :
# seg_idx = np.array(np.nonzero(mask_ventricles.numpy()))
#
# bbox_min, bbox_max = np.min(seg_idx, axis=1), np.max(seg_idx, axis=1)
# bbox_min, bbox_max = bbox_min - 5, bbox_max + 5
#
# np_bbox = np.zeros(mask_ventricles.shape)
# np_bbox[bbox_min[0]: bbox_max[0], bbox_min[1]: bbox_max[1], bbox_min[2]: bbox_max[2]] = 1
#
# mask_big_ventricles_neighbors = (ants.from_numpy(np_bbox, origin=mask_ventricles.origin, spacing=mask_ventricles.spacing, direction=mask_ventricles.direction) > 0) * mask_hammersmith
#
# mask_hammersmith = (mask_big_ventricles_neighbors > 0) * mask_hammersmith
#
# print(fr"New Hammersmith mask volume: {mask_hammersmith.sum() / 1e3:.2f} cm3")

New Hammersmith mask volume: 540.69 cm3


In [58]:
# # Create a mask for the neighbors of the ventricles :
# dict_ventricles_neighbors = {}
#
# for seg in ["L.Ventricles", "R.Ventricles", "third_ventricle"]:
#
#     # Load the segmentation mask (prior) :
#     mask_segmentation = dict_segmentations[seg]
#     seg_idx = np.array(np.nonzero(mask_segmentation.numpy()))
#     # Bounding box :
#     bbox_min, bbox_max = np.min(seg_idx, axis=1), np.max(seg_idx, axis=1)
#     # Dilate by 3 voxels :
#     bbox_min, bbox_max = bbox_min - 3, bbox_max + 3
#     np_bbox = np.zeros(mask_segmentation.shape)
#     np_bbox[bbox_min[0]: bbox_max[0], bbox_min[1]: bbox_max[1], bbox_min[2]: bbox_max[2]] = 1
#
#     dict_ventricles_neighbors[seg] = (ants.from_numpy(np_bbox, origin=mask_segmentation.origin, spacing=mask_segmentation.spacing, direction=mask_segmentation.direction) > 0) * mask_hammersmith
#
# # Create mask_ventricles_neighborhood :
# mask_ventricles_neighbors = dict_ventricles_neighbors["L.Ventricles"] + dict_ventricles_neighbors["R.Ventricles"] + dict_ventricles_neighbors["third_ventricle"]
# mask_ventricles_neighbors = mask_ventricles_neighbors > 0
#
# print(f"Ventricles neighborhood volume: {mask_ventricles_neighbors.sum() / 1e3:.2f} cm3")
#
# # Create mask for brain without the ventricles' neighborhood :
# mask_brain = None
#
# for seg, mask_segmentation in dict_segmentations.items():
#
#     if seg not in ["L.Ventricles", "R.Ventricles", "third_ventricle"]:
#         if mask_brain is None:
#             mask_brain = mask_segmentation
#         else:
#             mask_brain += mask_segmentation
#
# mask_brain = (mask_brain > 0) * mask_hammersmith
#
# print(f"Brain without ventricles neighborhood volume: {mask_brain.sum() / 1e3:.2f} cm3")


Ventricles neighborhood volume: 440.61 cm3
Brain without ventricles neighborhood volume: 361.43 cm3


In [24]:
# # Create a CSF segmentation for the voxels outside the ventricles that are not segmented :
# # Create a CSF segmentation for the voxels outside the ventricles that are not segmented :
# mask_csf_no_ventricles_neighbors = mask_no_ventricles_neighbors
#
# for seg, mask_segmentation in dict_segmentations.items():
#
#     if seg not in ["L.Ventricles", "R.Ventricles", "third_ventricle"]:
#         mask_csf_no_ventricles_neighbors = mask_csf_no_ventricles_neighbors * (1 - mask_segmentation)
#
# assert set(np.unique(mask_csf_no_ventricles_neighbors.numpy())) == {0, 1}
#
# print(f"CSF volume outside of the ventricles neighborhood: {mask_csf_no_ventricles_neighbors.sum() / 1e3:.2f} cm3")
#
# # Create a mask for the segmentations excluding the ventricles and the CSF :
# mask_no_csf_no_ventricles_neighbors = mask_no_ventricles_neighbors * (1 - mask_csf_no_ventricles_neighbors)
#
# print(f"Brain without ventricles and CSF volume: {mask_no_csf_no_ventricles_neighbors.sum() / 1e3:.2f} cm3")


CSF volume outside of the ventricles neighborhood: 19.44 cm3
Brain without ventricles and CSF volume: 80.63 cm3


In [27]:
# radius = 1
# beta = 0.2
#
# mrf = f"[{beta}, {radius}x{radius}x{radius}]"
#
# print(fr"Applying Atropos with MRF = {mrf} ...")
#
# atropos_results = ants.atropos(a=[ants_hammersmith],
#                                x=mask_hammersmith,
#                                m=mrf,
#                                c="[10,0]",
#                                i=[mask_ventricles, mask_csf_no_ventricles_neighbors, mask_no_csf_no_ventricles_neighbors],
#                                verbose=1)
#
# ants_ventricles_atropos = (atropos_results["segmentation"] == 1) * mask_ventricles_neighbors
#
# path_ventricles_atropos = constants.DIR_DATA / "hammersmith" / "custom" / f"ventricle_atropos_{mrf}.nii.gz"
# path_ventricles_atropos.parent.mkdir(parents=True, exist_ok=True)
# ants_ventricles_atropos.to_file(str(path_ventricles_atropos))


In [28]:
# # Apply atropos to improve the quality fo the ventricles segmentation using the ventricles mask as a prior :
#
# radius = 1
# beta = 0.2
#
# mrf = f"[{beta}, {radius}x{radius}x{radius}]"
#
# print(fr"Applying Atropos with MRF = {mrf} ...")
#
# atropos_results = ants.atropos(a=[ants_hammersmith, ants_x_map, ants_y_map],
#                                x=mask_hammersmith,
#                                m=mrf,
#                                c="[10,0]",
#                                i=[mask_ventricles, mask_csf_no_ventricles_neighbors, mask_no_csf_no_ventricles_neighbors],
#                                verbose=1)
#
# ants_ventricles_atropos = (atropos_results["segmentation"] == 1) * mask_ventricles_neighbors
#
# path_ventricles_atropos = constants.DIR_DATA / "hammersmith" / "custom" / f"ventricle_atropos_{mrf}_coords.nii.gz"
# path_ventricles_atropos.parent.mkdir(parents=True, exist_ok=True)
# ants_ventricles_atropos.to_file(str(path_ventricles_atropos))

In [29]:
# # Now, let's treat the problem as a Machine Learning classification problem :
# # The training data will be the ventricles' segmentation and the voxels outside the ventricles' neighborhood.
# # The prediction will be done on the voxels inside the ventricles' neighborhood, that are not segmented as ventricles.
#
# # First, create the training data :
# # Add the ventricles' segmentation :
# ventricles_idx = np.array(np.nonzero(mask_ventricles.numpy()))
# ventricles_intensity = ants_hammersmith.numpy()[ventricles_idx[0], ventricles_idx[1], ventricles_idx[2]]
# df_ventricles = pd.DataFrame({"x": ventricles_idx[0], "y": ventricles_idx[1], "z": ventricles_idx[2],
#                               "intensity": ventricles_intensity, "label": 0})
#
# # now Add the voxels outside the ventricles' neighborhood that are CSF :
# csf_no_ventricles_neighbors_idx = np.array(np.nonzero(mask_csf_no_ventricles_neighbors.numpy()))
# csf_no_ventricles_neighbors_intensity = ants_hammersmith.numpy()[csf_no_ventricles_neighbors_idx[0], csf_no_ventricles_neighbors_idx[1], csf_no_ventricles_neighbors_idx[2]]
# df_csf_no_ventricles_neighbors = pd.DataFrame({"x": csf_no_ventricles_neighbors_idx[0], "y": csf_no_ventricles_neighbors_idx[1], "z": csf_no_ventricles_neighbors_idx[2],
#                                                "intensity": csf_no_ventricles_neighbors_intensity, "label": 1})
#
# # Now, add the voxels outside the ventricles' neighborhood that are not CSF :
# no_csf_no_ventricles_neighbors_idx = np.array(np.nonzero(mask_no_csf_no_ventricles_neighbors.numpy()))
# no_csf_no_ventricles_neighbors_intensity = ants_hammersmith.numpy()[no_csf_no_ventricles_neighbors_idx[0], no_csf_no_ventricles_neighbors_idx[1], no_csf_no_ventricles_neighbors_idx[2]]
# df_no_csf_no_ventricles_neighbors = pd.DataFrame({"x": no_csf_no_ventricles_neighbors_idx[0], "y": no_csf_no_ventricles_neighbors_idx[1], "z": no_csf_no_ventricles_neighbors_idx[2],
#                                                   "intensity": no_csf_no_ventricles_neighbors_intensity, "label": 2})
#
# # Concatenate the dataframes :
# df_train = pd.concat([df_ventricles, df_csf_no_ventricles_neighbors, df_no_csf_no_ventricles_neighbors], ignore_index=True)
# print(fr"len(df_train): {len(df_train)}")
#
# X_train, y_train = df_train[["x", "y", "z", "intensity"]].values, df_train["label"].values
#
# print(f"Number of training samples: {len(X_train)}")
#
# print(f"percentage of ventricles in the training set: {np.mean(y_train == 0):.2f}")
# print(f"percentage of CSF in the training set: {np.mean(y_train == 1):.2f}")
# print(f"percentage of brain in the training set: {np.mean(y_train == 2):.2f}")


len(df_train): 113541
Number of training samples: 113541
percentage of ventricles in the training set: 0.12
percentage of CSF in the training set: 0.17
percentage of brain in the training set: 0.71


In [30]:
# # Let's resample the training data to balance the classes :
# ventricles = df_train[df_train["label"] == 0]
# csf = df_train[df_train["label"] == 1]
# brain = df_train[df_train["label"] == 2]
#
# # the desired distribution is 3:3:4
# desired_ventricles_size = desired_csf_size = int(len(df_train) * 0.3)
# desired_brain_size = len(df_train) - desired_ventricles_size - desired_csf_size
#
# # Oversampling the ventricles and the CSF :
# ventricles_resampled = resample(ventricles, n_samples=desired_ventricles_size, replace=True, random_state=42)
# csf_resampled = resample(csf, n_samples=desired_csf_size, replace=True, random_state=42)
#
# # Downsampling the brain :
# brain_resampled = resample(brain, n_samples=desired_brain_size, replace=False, random_state=42)
#
# # Concatenate the resampled dataframes :
# df_train_resampled = pd.concat([ventricles_resampled, csf_resampled, brain_resampled], ignore_index=True)
#
# X_train, X_val, y_train, y_val = train_test_split(df_train_resampled[["x", "y", "z", "intensity"]].values, df_train_resampled["label"].values, test_size=0.2, random_state=42)
#
# print(f"Number of training samples: {len(X_train)}")
# print(f"Number of validation samples: {len(X_val)}")
#
# print(f"percentage of ventricles in the training set: {np.mean(y_train == 0):.2f}, and in the validation set: {np.mean(y_val == 0):.2f}")
# print(f"percentage of CSF in the training set: {np.mean(y_train == 1):.2f}, and in the validation set: {np.mean(y_val == 1):.2f}")
# print(f"percentage of brain in the training set: {np.mean(y_train == 2):.2f}, and in the validation set: {np.mean(y_val == 2):.2f}")


Number of training samples: 90832
Number of validation samples: 22709
percentage of ventricles in the training set: 0.30, and in the validation set: 0.30
percentage of CSF in the training set: 0.30, and in the validation set: 0.30
percentage of brain in the training set: 0.40, and in the validation set: 0.40


In [31]:
# # Let's do the prediction set on the voxels inside the ventricles' neighborhood  that are not segmented as ventricles :
# mask_pred = mask_ventricles_neighbors * (1 - mask_ventricles)
#
# pred_idx = np.array(np.nonzero(mask_pred.numpy()))
# pred_intensity = ants_hammersmith.numpy()[pred_idx[0], pred_idx[1], pred_idx[2]]
# df_pred = pd.DataFrame({"x": pred_idx[0], "y": pred_idx[1], "z": pred_idx[2], "intensity": pred_intensity})
#
# X_pred = df_pred[["x", "y", "z", "intensity"]].values
# print(f"Nuber of prediction samples: {len(X_pred)} / {mask_ventricles_neighbors.sum()}")


Nuber of prediction samples: 427148 / 440611


In [39]:
# # Build the neural network classifier :
# class VoxelClassifier(nn.Module):
#     def __init__(self):
#         super(VoxelClassifier, self).__init__()
#         self.model = nn.Sequential(
#             nn.Linear(4, 4),  # Input: 4 features, first hidden layer: 8 neurons
#             nn.ReLU(),
#             nn.Linear(4, 3)    # Output: 3 classes
#         )
#
#     def forward(self, x):
#         return self.model(x)


In [40]:
# model = VoxelClassifier()
#
# # Check if GPU is available
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# print(device)
#
# # Move model to GPU
# model.to(device)


cuda


VoxelClassifier(
  (model): Sequential(
    (0): Linear(in_features=4, out_features=4, bias=True)
    (1): ReLU()
    (2): Linear(in_features=4, out_features=3, bias=True)
  )
)

In [41]:
# Convert your training data into PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)  # Features
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)    # Labels

# Convert your validation data into PyTorch tensors
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)  # Features
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(device)    # Labels


In [42]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

batch_size = 2000

train_loader = DataLoader(train_dataset, batch_size=90832, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=22709, shuffle=False)

# Verify DataLoaders by iterating through one batch
for X_batch, y_batch in train_loader:
    print("Training Batch:")
    print("  Features Shape:", X_batch.shape)
    print("  Labels Shape:", y_batch.shape)
    print("  Label Counts:", torch.bincount(y_batch))
    break  # Check only the first batch

for X_batch, y_batch in val_loader:
    print("Validation Batch:")
    print("  Features Shape:", X_batch.shape)
    print("  Labels Shape:", y_batch.shape)
    print("  Label Counts:", torch.bincount(y_batch))
    break  # Check only the first batch


Training Batch:
  Features Shape: torch.Size([90832, 4])
  Labels Shape: torch.Size([90832])
  Label Counts: tensor([27210, 27191, 36431], device='cuda:0')
Validation Batch:
  Features Shape: torch.Size([22709, 4])
  Labels Shape: torch.Size([22709])
  Label Counts: tensor([6852, 6871, 8986], device='cuda:0')


In [43]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [44]:
# Training and validation loop
num_epochs = 10000
for epoch in range(num_epochs):

    print(fr"-------------------- Epoch", epoch+1, "--------------------")
    # Training phase
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()           # Zero gradients
        outputs = model(X_batch)        # Forward pass
        loss = criterion(outputs, y_batch)  # Compute loss
        loss.backward()                 # Backward pass
        optimizer.step()                # Update weights

        running_loss += loss.item()

    train_loss = running_loss / len(train_loader)

    # Validation phase
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()

            # Accuracy calculation
            predicted = outputs.argmax(dim=1)
            correct += (predicted == y_batch).sum().item()
            total += y_batch.size(0)

    val_loss /= len(val_loader)
    val_accuracy = correct / total

    # Log metrics
    print(f"Epoch {epoch+1}/{num_epochs}:")
    print(f"  Training Loss: {train_loss:.4f}")
    print(f"  Validation Loss: {val_loss:.4f}")
    print(f"  Validation Accuracy: {val_accuracy:.4f}")

    # Optional: Log GPU memory usage
    print(f"  GPU Memory Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
    print(f"  GPU Memory Reserved: {torch.cuda.memory_reserved() / 1e9:.2f} GB")


-------------------- Epoch 1 --------------------
Epoch 1/10000:
  Training Loss: 17.4586
  Validation Loss: 17.3820
  Validation Accuracy: 0.3017
  GPU Memory Allocated: 0.02 GB
  GPU Memory Reserved: 0.05 GB
-------------------- Epoch 2 --------------------
Epoch 2/10000:
  Training Loss: 17.2700
  Validation Loss: 17.1944
  Validation Accuracy: 0.3017
  GPU Memory Allocated: 0.02 GB
  GPU Memory Reserved: 0.05 GB
-------------------- Epoch 3 --------------------
Epoch 3/10000:
  Training Loss: 17.0825
  Validation Loss: 17.0078
  Validation Accuracy: 0.3017
  GPU Memory Allocated: 0.02 GB
  GPU Memory Reserved: 0.05 GB
-------------------- Epoch 4 --------------------
Epoch 4/10000:
  Training Loss: 16.8960
  Validation Loss: 16.8223
  Validation Accuracy: 0.3017
  GPU Memory Allocated: 0.02 GB
  GPU Memory Reserved: 0.05 GB
-------------------- Epoch 5 --------------------
Epoch 5/10000:
  Training Loss: 16.7106
  Validation Loss: 16.6379
  Validation Accuracy: 0.3017
  GPU Memory 

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7140e8b71a50>>
Traceback (most recent call last):
  File "/home/maichi/anaconda3/envs/env_aidream_registration/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


Epoch 917/10000:
  Training Loss: 0.8917
  Validation Loss: 0.8929
  Validation Accuracy: 0.5445
  GPU Memory Allocated: 0.02 GB
  GPU Memory Reserved: 0.05 GB
-------------------- Epoch 918 --------------------


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7140e8b71a50>>
Traceback (most recent call last):
  File "/home/maichi/anaconda3/envs/env_aidream_registration/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


KeyboardInterrupt: 

In [22]:
X_test_tensor = torch.tensor(X_pred, dtype=torch.float32).to(device)

In [40]:
# Assuming `X_test_tensor` is the test data tensor
model.eval()  # Set the model to evaluation mode

# Perform predictions
with torch.no_grad():
    outputs = model(X_test_tensor)  # Raw model outputs (logits)
    predictions = outputs.argmax(dim=1)  # Predicted class labels

# Convert predictions to NumPy if needed
predictions_np = predictions.cpu().numpy() + 1


In [44]:
ants_prediction = ants.from_numpy(np.zeros(mask_ventricles_neighbors.shape), origin=mask_ventricles_neighbors.origin, spacing=mask_ventricles_neighbors.spacing, direction=mask_ventricles_neighbors.direction)

ants_prediction[pred_idx[0], pred_idx[1], pred_idx[2]] = predictions_np
ants_prediction[mask_ventricles == 1] = 1

In [45]:
ants_prediction.to_file(str(constants.DIR_DATA / "hammersmith" / "custom" / "ventricles_prediction.nii.gz"))