<a href="https://colab.research.google.com/github/mrshamshir/Automated-Neurological-Disease-Classification/blob/main/SVM_VoI_V2(final).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Imports and data loading

In [None]:
# import necessary libraries for entire notebook

from pathlib import Path
import nibabel as nib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score

In [None]:
# path to train datasets, labels and VoI template files

train_rCBF = Path("/content/drive/MyDrive/Assignment/training_images_rcbf.nii")
train_DAT = Path("/content/drive/MyDrive/Assignment/training_images_sbr.nii")

voi_template = Path("/content/drive/MyDrive/Assignment/voi_template.nii")

labels = pd.read_csv("/content/drive/MyDrive/Assignment/Diagnoses_of_training_data.csv")

In [None]:
# Load NIfTI and extract image data
train_PET_rCBF = nib.load(train_rCBF)
train_data_rCBF = train_PET_rCBF.get_fdata()

train_PET_DAT = nib.load(train_DAT)
train_data_DAT = train_PET_DAT.get_fdata()

voi = nib.load(voi_template)
data_voi = voi.get_fdata()

### Feature engineering and dataset creation

In [None]:
def create_xdata(rCBF, DAT):
    # combine two images of same subjects
    res = np.stack((rCBF, DAT), axis = 3)
    res = np.transpose(res, (4, 0, 1, 2, 3))
    return res

xdata_train = create_xdata(train_data_rCBF, train_data_DAT)
print(xdata_train.shape)

(40, 64, 64, 64, 2)


In [None]:
def mean_regional_extraction(data, template):
    """
        Extracts and calculates the mean values from specified regions within a multi-dimensional dataset
        (e.g. brain), based on a given template.

        Parameters:
        - data (numpy.ndarray): The input dataset from which to extract mean values. Expected to have dimensions
        where the first dimension is the number of observations (e.g., subjects, time points) and the
        subsequent dimensions correspond to spatial dimensions (e.g., in neuroimaging, x, y, z coordinates of brain voxels).
        - template (numpy.ndarray): A template or mask with the same spatial dimensions as `data`, where each unique
        non-zero value represents a different region of interest (ROI). The regions are defined by distinct integer
        values, with typically 0 representing background or non-interest areas.

        Returns:
        - numpy.ndarray: A 2D array where each row corresponds to an observation from the input `data` and each column
        represents the mean value of a region defined in `template`. If `data` includes dimensions beyond the spatial
        (e.g., channels, time points), these are aggregated into the columns as well, resulting in a flat array of
        mean regional values for each observation.

        This method is useful for summarizing spatially distributed data according to predefined regions, such as
        extracting mean regional brain activity from neuroimaging data based on anatomical or functional regions of interest.
    """

    # Identify unique regions and their occurrence in the template
    tp_values, tp_counts = np.unique(template, return_counts=True)

    mean_region_values = []

    # Iterate through each region value and its count
    for value, count in zip(tp_values, tp_counts):
        # Process regions with value greater than zero, all non-backgrounds
        if value > 0:
            # Find the indices in the template corresponding to the current region
            indices = np.where(template == value)
            # Extract values from `data` at these indices, across all observations
            region_values = data[:, indices[0], indices[1], indices[2], :]
            # Compute the mean of these values, collapsing the spatial dimensions
            mean_region_value = np.mean(region_values, axis=1)
            mean_region_values.append(mean_region_value)

    # Convert list of mean values into a numpy array and reshape for output
    return np.array(mean_region_values).transpose((1,0,2)).reshape((data.shape[0], -1))


train_mr_features = mean_regional_extraction(xdata_train, data_voi, )
print(train_mr_features.shape)

(40, 106)


In [None]:
# Split the dataset into training and validation sets with 80% for training and 20% for validation, preserving class distribution.

X_train, X_val, y_train, y_val = train_test_split(train_mr_features,
                                                    np.array(labels['diagnose']),
                                                    test_size=0.2, random_state=13,
                                                    stratify=labels['diagnose'])

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

(32, 106)
(32,)
(8, 106)
(8,)


In [None]:
print('train dataset', np.unique(y_train, return_counts=True))
print('validation dataset', np.unique(y_val, return_counts=True))

train dataset (array([1, 2, 3, 4]), array([8, 8, 8, 8]))
validation dataset (array([1, 2, 3, 4]), array([2, 2, 2, 2]))


#### normalization for each feature

In [None]:
def calculate_mean_std(data):
    """
        Calculate the mean and standard deviation for each feature across the dataset.

        Args:
        - data (numpy.ndarray): Input dataset with shape (num_samples, num_features).

        Returns:
        - mean (numpy.ndarray): Mean value for each feature, with shape (num_features,).
        - std (numpy.ndarray): Standard deviation for each feature, with shape (num_features,).
    """
    # Reshape the dataset to collapse the sample dimension
    flattened_data = data.reshape(-1, data.shape[-1])

    # Calculate mean and standard deviation for each feature
    mean = np.mean(flattened_data, axis=0)
    std = np.std(flattened_data, axis=0)

    return mean, std



def normalizer(data, mean, std):
    """
        Normalize the input data using the provided mean and standard deviation.

        Args:
        - data (numpy.ndarray): Input data to be normalized.
        - mean (numpy.ndarray): Mean values for each feature, with shape (num_features,).
        - std (numpy.ndarray): Standard deviation for each feature, with shape (num_features,).

        Returns:
        - ret (numpy.ndarray): Normalized data.
    """
    ret = (data - mean) / std
    return ret



In [None]:
# We should calculate mean and std only based on the data that we are going to train our model on.

mean, std = calculate_mean_std(X_train)
print("Mean for each channel:", mean.shape)
print("Standard deviation for each channel:", std.shape)

Mean for each channel: (106,)
Standard deviation for each channel: (106,)


In [None]:
nX_train = normalizer(X_train, mean, std)
nX_val = normalizer(X_val, mean, std)
print(nX_train.shape)
print(nX_val.shape)

(32, 106)
(8, 106)


### Train and evaluate SVM model

In [None]:
def evaluate_svm(gt, preds):
    """
        Evaluate SVM model performance using ground truth and predicted labels.

        Parameters:
        - gt (array-like): Ground truth labels.
        - preds (array-like): Predicted labels.

        Prints:
        - Validation accuracy.
        - Confusion matrix.
        - Precision and recall scores for each class.
    """
    # Calculate accuracy
    accuracy = accuracy_score(gt, preds)
    print("Validation Accuracy:", accuracy)

    # Calculate confusion matrix
    conf_matrix = confusion_matrix(gt, preds)
    print("Confusion Matrix:")
    print(conf_matrix)

    # Calculate precision and recall
    precision = precision_score(gt, preds, average=None)
    recall = recall_score(gt, preds, average=None)

    print("Precision:", precision)
    print("Recall:", recall)


#### train and test first SVM on train and validation dataset without feature normalization.

In [None]:
# Train an SVM classifier
svm_classifier1 = svm.SVC(kernel='linear')
svm_classifier1.fit(X_train, y_train)


In [None]:
# Predict using the SVM classifier and evaluate
predicted_labels = svm_classifier1.predict(X_val)
evaluate_svm(y_val, predicted_labels)

Validation Accuracy: 0.75
Confusion Matrix:
[[2 0 0 0]
 [0 1 1 0]
 [0 0 2 0]
 [0 1 0 1]]
Precision: [1.         0.5        0.66666667 1.        ]
Recall: [1.  0.5 1.  0.5]


#### train and test second SVM on train and validation dataset with feature normalization.

In [None]:
# Train an SVM classifier
svm_classifier2 = svm.SVC(kernel='linear')
svm_classifier2.fit(nX_train, y_train)

In [None]:
# Predict using the SVM classifier
predicted_labels = svm_classifier2.predict(nX_val)

evaluate_svm(y_val, predicted_labels)

Validation Accuracy: 1.0
Confusion Matrix:
[[2 0 0 0]
 [0 2 0 0]
 [0 0 2 0]
 [0 0 0 2]]
Precision: [1. 1. 1. 1.]
Recall: [1. 1. 1. 1.]


## Final model
Train our final model on all of training data.
Predict labels for test data that we don't have their labels and save them on CSV file.

### Load the test data

In [None]:
# path to train datasets, labels and VoI template files

test_rCBF = Path("/content/drive/MyDrive/Assignment/test_images_rcbf.nii")
test_DAT = Path("/content/drive/MyDrive/Assignment/test_images_sbr.nii")

In [None]:
# Load NIfTI and extract image data

test_PET_rCBF = nib.load(test_rCBF)
test_data_rCBF = test_PET_rCBF.get_fdata()

test_PET_DAT = nib.load(test_DAT)
test_data_DAT = test_PET_DAT.get_fdata()

### Dataset creation, feature engineering and feature normalization

In [None]:
xdata_test = create_xdata(test_data_rCBF, test_data_DAT)
print(xdata_test.shape)

(41, 64, 64, 64, 2)


In [None]:
train_mr_features = mean_regional_extraction(xdata_train, data_voi)
print(train_mr_features.shape)

(40, 106)


In [None]:
test_mr_features = mean_regional_extraction(xdata_test, data_voi)
print(test_mr_features.shape)

(41, 106)


In [None]:
# This time we are going to train our model on all of training data,
# so we calcualte mean and std for all of them.

mean, std = calculate_mean_std(train_mr_features)
print("Mean for each channel:", mean.shape)
print("Standard deviation for each channel:", std.shape)

Mean for each channel: (106,)
Standard deviation for each channel: (106,)


In [None]:
train_normal = normalizer(train_mr_features, mean, std)
print(train_normal.shape)

(40, 106)


In [None]:
test_normal = normalizer(test_mr_features, mean, std)
print(test_normal.shape)

(41, 106)


### Train SVM model, predict on test and save to file

In [None]:
svm_classifier3 = svm.SVC(kernel='linear')
svm_classifier3.fit(train_normal, np.array(labels['diagnose']))

In [None]:
test_preds3 = svm_classifier3.predict(test_normal)
print(test_preds3.shape)

(41,)


In [None]:
# Save the DataFrame to a CSV file, including patient number

index_array = np.arange(1, test_preds3.shape[0] + 1)
combined_array = np.column_stack((index_array, test_preds3))

df = pd.DataFrame(combined_array, columns=['patient_number', 'SVM_predication'])
df.to_csv("patient_predictions.csv", index=False)
