In [1]:
# All Baselines for Node-Level classification - for left and right temporal lobe nodes
import os
import numpy as np
import pandas as pd
import random
from tqdm import tqdm
from scipy.io import loadmat
from sklearn.neural_network import MLPClassifier as MLP
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.svm import SVC as SVM

from sklearn.metrics import balanced_accuracy_score
from matplotlib import pyplot as plt

import xgboost
from xgboost import XGBClassifier as xgb

import warnings
warnings.filterwarnings("ignore")


In [4]:
def calculate_metrics(y_pred, y_true):

    return balanced_accuracy_score(y_true, y_pred)

In [5]:
# Root Folder
# root='/home/neil/Lab_work/Jeong_Lab_Multi_Modal_MRI/Right_Temporal_Lobe/'
root='/home/neil/Lab_work/Jeong_Lab_Multi_Modal_MRI/Left_Temporal_Lobe/'

In [6]:
# Right Temporal Lobe Nodes
# def get_list_of_node_nums():
#     node_numbers_with_smote = [
#     "888","889","890","891","892","893","894","895","896","897","898","899","900","901","902","903","904","905","906","907","908","909","910","911","912","913","914","915","916","917","918","919","920","921","922","923","924","925","926","927","928","929","930","931","932","933","934","935","936","937","938","939","940","941","942","943","944","945","946","947","948","949","950","951","952","953","954","955","956","957","958","959","960","961","962","963","964","965","966","968","969","970","971","973","974","975","976","977","978","979","980","981","982","983"
#     ]

#     return node_numbers_with_smote

# Left Temporal Lobe Nodes
def get_list_of_node_nums():
    node_numbers_with_smote = [
    "385","386","387","388","389","390","391","392","393","394","395","396","397","398","399","400","401","402","403","404","405","406","407","408","409","410","411","412","413","414","415","416","417","418","419","420","421","422","423","424","425","426","427","428","429","430","431","432","433","434","435","436","437","438","439","440","441","442","443","444","445","446","447","448","449","450","451","452","453","454","455","456","458","459","460","461","462","463","464","465","466","467","468","469","470","471","472","473","474","475","476","477","478","479"
    ]

    return node_numbers_with_smote

In [7]:
def load_train_data(root: str, node_num: str):

    train_path = os.path.join(root, 'Node_'+node_num, 'Aug_Train_Data', 'ALL_Patients')  
    x_file = f"X_train_aug"
    y_file = f"Y_train_aug"
    x_mat_name = "X_aug_train"
    y_mat_name = "Y_aug_train"  

    raw_path_x = os.path.join(train_path, f"{x_file}.mat")
    raw_path_y = os.path.join(train_path, f"{y_file}.mat")

    # Load the data from .mat files
    X_mat_l = loadmat(raw_path_x)
    X_mat = X_mat_l[x_mat_name]

    Y_mat_l = loadmat(raw_path_y)
    Y_mat = Y_mat_l[y_mat_name]
    Y_mat = Y_mat.reshape(Y_mat.shape[1],)

    return X_mat, Y_mat

In [8]:
def load_test_data(root: str, node_num: str):

    val_path = os.path.join(root, 'Node_'+node_num, 'Orig_Val_Data', 'ALL_Patients')  
    x_file = f"X_valid_orig"
    y_file = f"Y_valid_orig"
    x_mat_name = "X_orig_valid"
    y_mat_name = "Y_orig_valid"  

    raw_path_x = os.path.join(val_path, f"{x_file}.mat")
    raw_path_y = os.path.join(val_path, f"{y_file}.mat")

    # Load the data from .mat files
    X_mat_l = loadmat(raw_path_x)
    X_mat = X_mat_l[x_mat_name]

    Y_mat_l = loadmat(raw_path_y)
    Y_mat = Y_mat_l[y_mat_name]
    Y_mat = Y_mat.reshape(Y_mat.shape[1],)

    return X_mat, Y_mat

In [9]:
# choose model
# model = 'MLP'
# model = 'RF'
model = 'XGB'
# model = 'SVM'

In [10]:
# Main loop to run the baseline models over all the nodes (for all 5 trials)

node_numbers_with_smote = get_list_of_node_nums()

for node_num in node_numbers_with_smote:
    
    print(f'Node num: {node_num}')

    num_trials = 5
    
    # Create empty lists to store results for bal_accuracy
    val_bal_acc_list = [[] for _ in range(num_trials)]

    # load the data for the given node
    X_train, Y_train = load_train_data(root, node_num)
    
    X_test, Y_test = load_test_data(root, node_num)

    # Define the model
    if model == 'MLP':
        clf = MLP(hidden_layer_sizes=(256,), learning_rate_init=0.01, random_state=None, max_iter=1000, early_stopping=False)
    elif model == 'RF':
        clf = RF(n_estimators=100, random_state=None,)
    elif model == 'XGB':
        clf = xgb(objective='binary:logistic',max_depth = 5,n_estimators = 10, random_state=None) # This is used
        # clf = xgb(objective='binary:logistic', n_estimators = 100, random_state=None) # with _1.xlsx extension (NOT Used)
    elif model == 'SVM':
        clf = SVM(C=1.0, kernel='rbf', max_iter=-1, random_state=None)
    else:
        raise NotImplementedError("Unknown Model.")

    # Run 5 trials for each node
    for i in range(num_trials):
        print(f'Training Trial {i+1} of Node number {node_num}')

        # Train the model
        clf.fit(X_train, Y_train)

        # Test the model
        print(f'Evaluating Trial {i+1} of Node number: {node_num}')
        y_true = Y_test
        y_pred = clf.predict(X_test)

        # Evaluate Trained Model with evaluation metrics
        bal_acc = calculate_metrics(y_pred, y_true)  
        print(f"Balanced Accuracy: {bal_acc}")

        val_bal_acc_list[i].append(bal_acc) 

    # Save the results in a dataframe
    # Combine data
    row_data_val = [node_num] + [val_bal_acc_list[j][0] for j in range(num_trials)]

    # Create a DataFrame
    headers_val = ['Node #', 'Val_Bal_Acc_1', 'Val_Bal_Acc_2', 'Val_Bal_Acc_3', 'Val_Bal_Acc_4', 'Val_Bal_Acc_5']

    df_val = pd.DataFrame([row_data_val], columns=headers_val)

    # Saving to Excel
    # path = "/home/neil/Lab_work/Jeong_Lab_Multi_Modal_MRI/Right_Temporal_Lobe/"
    path = "/home/neil/Lab_work/Jeong_Lab_Multi_Modal_MRI/Left_Temporal_Lobe/"  
    save_path = os.path.join(path, "Node_"+str(node_num), "_Results")

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    filename_val = model + "_results_val.xlsx"
    save_filepath_val = os.path.join(save_path, filename_val)

    df_val.to_excel(save_filepath_val, index=False, sheet_name='Sheet1')

    print("\nDone!")



Node num: 385
Training Trial 1 of Node number 385
Evaluating Trial 1 of Node number: 385
Balanced Accuracy: 1.0
Training Trial 2 of Node number 385
Evaluating Trial 2 of Node number: 385
Balanced Accuracy: 1.0
Training Trial 3 of Node number 385
Evaluating Trial 3 of Node number: 385
Balanced Accuracy: 1.0
Training Trial 4 of Node number 385
Evaluating Trial 4 of Node number: 385
Balanced Accuracy: 1.0
Training Trial 5 of Node number 385
Evaluating Trial 5 of Node number: 385
Balanced Accuracy: 1.0

Done!
Node num: 386
Training Trial 1 of Node number 386
Evaluating Trial 1 of Node number: 386
Balanced Accuracy: 1.0
Training Trial 2 of Node number 386
Evaluating Trial 2 of Node number: 386
Balanced Accuracy: 1.0
Training Trial 3 of Node number 386
Evaluating Trial 3 of Node number: 386
Balanced Accuracy: 1.0
Training Trial 4 of Node number 386
Evaluating Trial 4 of Node number: 386
Balanced Accuracy: 1.0
Training Trial 5 of Node number 386
Evaluating Trial 5 of Node number: 386
Balanced

In [11]:
# Combine all node results into one dataframe

# Define the paths of your Excel files
# base_path = '/home/neil/Lab_work/Jeong_Lab_Multi_Modal_MRI/Right_Temporal_Lobe/'
base_path = '/home/neil/Lab_work/Jeong_Lab_Multi_Modal_MRI/Left_Temporal_Lobe/'

# node_nums = ["888","889","890","891","892","893","894","895","896","897","898","899","900","901","902","903","904","905","906","907","908","909","910","911","912","913","914","915","916","917","918","919","920","921","922","923","924","925","926","927","928","929","930","931","932","933","934","935","936","937","938","939","940","941","942","943","944","945","946","947","948","949","950","951","952","953","954","955","956","957","958","959","960","961","962","963","964","965","966","968","969","970","971","973","974","975","976","977","978","979","980","981","982","983"]

node_nums = ["385","386","387","388","389","390","391","392","393","394","395","396","397","398","399","400","401","402","403","404","405","406","407","408","409","410","411","412","413","414","415","416","417","418","419","420","421","422","423","424","425","426","427","428","429","430","431","432","433","434","435","436","437","438","439","440","441","442","443","444","445","446","447","448","449","450","451","452","453","454","455","456","458","459","460","461","462","463","464","465","466","467","468","469","470","471","472","473","474","475","476","477","478","479"]

file_paths_val = []

for node_num in node_nums:
    file_path_val = os.path.join(base_path, "Node_"+node_num, "_Results", model + "_results_val.xlsx") # For FULL modality Only
    file_paths_val.append(file_path_val)

# Initialize an empty DataFrame
combined_df_val = pd.DataFrame()

# Loop through the files and stack the rows
for path in file_paths_val:
    # Load the Excel file
    df = pd.read_excel(path)  

    # Stack the rows
    combined_df_val = pd.concat([combined_df_val, df], axis=0) # For FULL modality Only

# Reset the index to avoid duplicate row indices
combined_df_val = combined_df_val.reset_index(drop=True)

# Save the combined DataFrame to a new Excel file
# combined_df_val.to_excel(model+'_RightTemp_val_FULL_modality_Only_1.xlsx', index=False)
combined_df_val.to_excel(model+'_LeftTemp_val_FULL_modality_Only_1.xlsx', index=False)