# MRI CNN

## Imports
First we need to import the necesary libraries for the project

In [1]:
import os

import tensorflow as tf
import nibabel as nib
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

2024-03-11 16:12:35.825793: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-11 16:12:38.505458: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-11 16:12:38.505624: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-11 16:12:38.901169: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-11 16:12:39.899175: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-11 16:12:39.904540: I tensorflow/core/platform/cpu_feature_guard.cc:1

## First Step (Loading Data)

First we create a function to load the .nii images data and get their labels

In [None]:
# Function to load .nii images
def load_nii(file_path):
    return np.array([nib.load(file_path).get_fdata() for file_path in file_paths])

# Obtention of all .nii files paths and their imageID
file_paths = []
images_ids = []
base_folder = ".data/Image_Collections/ADNI1_Annual_2_Yr_3T"

for root, dirs, files in os.walk(base_folder):
    for file in files:
        if file.endswith(".nii"):
            file_paths.append(os.path.join(root, file))
            images_ids.append(file.split('.')[0])

# Load labels
labels_df = pd.read_csv('.data/Image_Collections/ADNI1_Annual_2_Yr_3T_2_20_2024.csv')  # Replace with the actual path
labels_dict = dict(zip(labels_df['Image Data ID'], labels_df['Group']))
labels = [labels_dict[image_data_id] for image_data_id in images_ids]

## Second Step (Split Data)

Now we encode the obtained data and split it into the train and test subsets (We will consider adding a validation subset if we consider it necessary)

In [None]:
# First we get the image dimensions

number_of_images = len(file_paths)

usable_file_paths = []
usable_images_ids = []
usable_labels = []

for i in range(10): #range(number_of_images):

    # Load the NIfTI image
    img = nib.load(file_paths[i])

    image_shape = img.shape
    image_shape_string = f'{image_shape[0]},{image_shape[1]},{image_shape[2]}'

    # Since not all the images are the same shape, we are going to use the ones with a shape of 240x256x160, since they are the most abundant
    if image_shape_string == '240,256,160':
        usable_file_paths.append(file_paths[i])
        usable_images_ids.append(images_ids[i])
        usable_labels.append(labels[i])

In [None]:
# Encode labels
label_dict = {'CN': 0, 'MCI': 1, 'AD': 2}
encoded_labels = tf.keras.utils.to_categorical([label_dict[label] for label in labels], num_classes=3)

# Split the data
X_train_paths, X_test_paths, y_train, y_test = train_test_split(file_paths, encoded_labels, test_size=0.2, random_state=42)

## Third Step (Build CNN Model and Copile it)

Now we are going to build the CNN Model and Compile it

In [None]:
# Build CNN Model

width, height, depth = 240, 256, 160
channels = 1 

model = tf.keras.Sequential([
    tf.keras.layers.Conv3D(8, (3, 3, 3), activation='relu', input_shape=(width, height, depth, channels)),
    tf.keras.layers.MaxPooling3D((2, 2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])

In [None]:
# Compile Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

## Fourth Step (Train Model)

Now we need to train the model with the previously created model

In [14]:
# Define a custom data generator
def data_generator(file_paths, labels, batch_size):
    while True:
        for i in range(0, len(file_paths), batch_size):
            batch_file_paths = file_paths[i:i+batch_size]
            batch_labels = labels[i:i+batch_size]
            batch_images = load_nii(batch_file_paths)
            yield batch_images, batch_labels

# Use the data generator during model training
batch_size = 2  # Experiment with different batch sizes
train_data_generator = data_generator(X_train_paths, y_train, batch_size)

model.fit(train_data_generator, steps_per_epoch=len(X_train_paths)//batch_size, epochs=5)

## Fifth Step (Test Model)

In this step we are going to use the test subset to see how good our model did

In [None]:
X_test = load_nii(X_test_paths)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}')