In [1]:
# All Baselines for Node-Level classification - for left and right temporal lobe nodes
import os
import numpy as np
import pandas as pd
import random
from tqdm import tqdm
from scipy.io import loadmat
from sklearn.neural_network import MLPClassifier as MLP
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.svm import SVC as SVM

from sklearn.metrics import confusion_matrix, balanced_accuracy_score
from matplotlib import pyplot as plt

import xgboost
from xgboost import XGBClassifier as xgb
from xgboost import XGBRFClassifier as xgbrf

import torch
import torch.nn
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
from torch.utils.data import DataLoader, random_split

In [2]:
def calculate_metrics(y_pred, y_true):

    return balanced_accuracy_score(y_true, y_pred)

In [3]:
# Root Folder
root='/home/neil/Lab_work/Jeong_Lab_Multi_Modal_MRI/Right_Temporal_Lobe/'

In [31]:
def get_list_of_node_nums():
    node_numbers_with_smote = [
        "948"
    ]

    return node_numbers_with_smote

In [5]:
def load_train_data(root: str, node_num: str):

    train_path = os.path.join(root, 'Node_'+node_num, 'Aug_Train_Data', 'ALL_Patients')  
    x_file = f"X_train_aug"
    y_file = f"Y_train_aug"
    x_mat_name = "X_aug_train"
    y_mat_name = "Y_aug_train"  

    raw_path_x = os.path.join(train_path, f"{x_file}.mat")
    raw_path_y = os.path.join(train_path, f"{y_file}.mat")

    # Load the data from .mat files
    X_mat_l = loadmat(raw_path_x)
    X_mat = X_mat_l[x_mat_name]

    Y_mat_l = loadmat(raw_path_y)
    Y_mat = Y_mat_l[y_mat_name]
    Y_mat = Y_mat.reshape(Y_mat.shape[1],)

    return X_mat, Y_mat

In [6]:
def load_test_data(root: str, node_num: str):

    val_path = os.path.join(root, 'Node_'+node_num, 'Orig_Val_Data', 'ALL_Patients')  
    x_file = f"X_valid_orig"
    y_file = f"Y_valid_orig"
    x_mat_name = "X_orig_valid"
    y_mat_name = "Y_orig_valid"  

    raw_path_x = os.path.join(val_path, f"{x_file}.mat")
    raw_path_y = os.path.join(val_path, f"{y_file}.mat")

    # Load the data from .mat files
    X_mat_l = loadmat(raw_path_x)
    X_mat = X_mat_l[x_mat_name]

    Y_mat_l = loadmat(raw_path_y)
    Y_mat = Y_mat_l[y_mat_name]
    Y_mat = Y_mat.reshape(Y_mat.shape[1],)

    return X_mat, Y_mat

In [None]:
# choose model
# model = 'MLP'
# model = 'RF'
# model = 'XGB'
model = 'SVM'

In [34]:
# Main loop to run the baseline models over all the nodes

node_num_list = [] # stores the node numbers for all nodes
bal_acc_list = [] # stores the balanced accuracies for all nodes

node_numbers_with_smote = get_list_of_node_nums()

for node_num in node_numbers_with_smote:
    
    print(f'Node num: {node_num}')

    # load the data for the given node
    X_train, Y_train = load_train_data(root, node_num)
    
    X_test, Y_test = load_test_data(root, node_num)

    # Define the model
    if model == 'MLP':
        clf = MLP(hidden_layer_sizes=(250,), random_state=None, max_iter=1000, early_stopping=False)
    elif model == 'RF':
        clf = RF(n_estimators=100, random_state=None,)
    elif model == 'XGB':
        clf = xgb(objective='binary:logistic',max_depth = 5,n_estimators = 10)
    elif model == 'SVM':
        clf = SVM(C=1.0, kernel='rbf', max_iter=-1, random_state=None)
    else:
        raise NotImplementedError("Unknown Model.")

    # Train the model
    print(f'Training on Node number: {node_num}')
    clf.fit(X_train, Y_train)

    # Test the model
    print(f'Evaluating on Node number: {node_num}')
    y_true = Y_test
    y_pred = clf.predict(X_test)

    # Evaluate Trained Model with evaluation metrics
    bal_acc = calculate_metrics(y_pred, y_true)
    print(bal_acc)            

    # for saving balanced accuracy of nodes
    node_num_list.append(node_num)
    bal_acc_list.append(bal_acc)



Node num: 948
Training on Node number: 948
Evaluating on Node number: 948
0.6190476190476191


In [None]:
# dictionary of lists
dict = {'node number': node_num_list, 'balanced_acc': bal_acc_list}
    
df = pd.DataFrame(dict)

# saving the dataframe
metric_filename = model + '_metrics_ALL.csv'
save_metrics_path = os.path.join(root,"metrics",metric_filename)
df.to_csv(save_metrics_path, header=True, index=False)