# Using Multiple Random Forest Models

In [None]:
## This notebook is primarily developed by Janani Prasad

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn import model_selection
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
import plotly.graph_objects as go
from sklearn.metrics import (roc_curve, auc, roc_auc_score,
                             confusion_matrix)
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("../results/AD_Control_ML.csv")
df.head()

In [None]:
X = df.drop('AD',axis='columns').to_numpy()
y = df['AD'].to_numpy()

In [None]:

def get_auc_scores(clf, X_train, X_test, y_train, y_test):
    y_train_score = clf.predict_proba(X_train)[:, 1]
    y_test_score = clf.predict_proba(X_test)[:, 1] 
    auc_train = roc_auc_score(y_train, y_train_score)
    auc_test = roc_auc_score(y_test, y_test_score) 
 
    return auc_test

## Random Forest with cv=5

In [None]:
def run_model_rf(X_training, y_training):

    np.random.seed(1234)

    params = {'max_depth':  [None, 2, 3, 4]}

    modelROC5 = GridSearchCV(
        estimator=RandomForestClassifier(random_state = np.random.seed(1234)),
        param_grid=params,
        cv=5,
        scoring='roc_auc',
        n_jobs=5,
        verbose=1,
    )

    modelROC5.fit(X_training, y_training)
    return modelROC5

In [None]:
auc_test_list = []
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
kf.get_n_splits(X, y)
importance_tot = np.zeros(8)

metrics = ['auc', 'fpr', 'tpr', 'thresholds', 'importance', 'y_test', 'y_pred']
results = {
    'test' : {m:[] for m in metrics}
}

count = 0

for i in range(100):
    for train_index, test_index in kf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        modelROC5Run = run_model_rf(X_train, y_train)
        auc_test = get_auc_scores(modelROC5Run, X_train, X_test, y_train, y_test)
        auc_test_list.append(auc_test)
        importance = np.array(modelROC5Run.best_estimator_.feature_importances_)
        importance_tot = importance_tot + importance
        fpr, tpr, thresholds = roc_curve(y_test, modelROC5Run.predict_proba(X_test)[:,1])
        results['test']['fpr'].append(fpr)
        results['test']['tpr'].append(tpr)
        results['test']['thresholds'].append(thresholds)
        results['test']['auc'].append(auc_test)
        results['test']['importance'].append(importance)
        y_pred = modelROC5Run.predict(X_test)
        results['test']['y_test'].append(y_test)
        results['test']['y_pred'].append(y_pred)
        count += 1

importance_tot = importance_tot/count

## ROC

In [None]:

## Used code from here: https://towardsdatascience.com/pooled-roc-with-xgboost-and-plotly-553a8169680c
kind = 'test'
c_fill      = 'rgba(52, 152, 219, 0.2)'
c_line      = 'rgba(52, 152, 219, 0.5)'
c_line_main = 'rgba(41, 128, 185, 1.0)'
c_grid      = 'rgba(189, 195, 199, 0.5)'
c_annot     = 'rgba(149, 165, 166, 0.5)'
c_highlight = 'rgba(192, 57, 43, 1.0)'
fpr_mean    = np.linspace(0, 1, count)
interp_tprs = []
for i in range(count):
    fpr           = results[kind]['fpr'][i]
    tpr           = results[kind]['tpr'][i]
    interp_tpr    = np.interp(fpr_mean, fpr, tpr)
    interp_tpr[0] = 0.0
    interp_tprs.append(interp_tpr)
tpr_mean     = np.mean(interp_tprs, axis=0)
tpr_mean[-1] = 1.0
tpr_std      = 2*np.std(interp_tprs, axis=0)
tpr_upper = np.max(interp_tprs, axis=0)
tpr_lower = np.min(interp_tprs, axis=0)

auc          = np.mean(results[kind]['auc'])
fig = go.Figure([
    go.Scatter(
        x          = fpr_mean,
        y          = tpr_upper,
        line       = dict(color=c_line, width=1),
        hoverinfo  = "skip",
        showlegend = False,
        name       = 'upper'),
    go.Scatter(
        x          = fpr_mean,
        y          = tpr_lower,
        fill       = 'tonexty',
        fillcolor  = c_fill,
        line       = dict(color=c_line, width=1),
        hoverinfo  = "skip",
        showlegend = False,
        name       = 'lower'),
    go.Scatter(
        x          = fpr_mean,
        y          = tpr_mean,
        line       = dict(color=c_line_main, width=2),
        hoverinfo  = "skip",
        showlegend = True,
        name       = f'AUC: {auc:.2f}')
])
fig.add_shape(
    type ='line', 
    line =dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)
fig.update_layout(
    template    = 'plotly_white', 
    title_x     = 0.5,
    xaxis_title = '(1 - Specificity)',
    yaxis_title = 'Sensitivity',
    width       = 500,
    height      = 500,
    legend      = dict(
        yanchor="bottom", 
        xanchor="right", 
        x=0.95,
        y=0.01,
    )
)
fig.update_yaxes(
    range       = [0, 1],
    gridcolor   = c_grid,
    scaleanchor = "x", 
    scaleratio  = 1,
    linecolor   = 'black')
fig.update_xaxes(
    range       = [0, 1],
    gridcolor   = c_grid,
    constrain   = 'domain',
    linecolor   = 'black')


## Feature importance plot

In [None]:
importance_tot_noage = importance_tot[:-1,]
importance_arr = np.array(results['test']['importance'])
importance_arr_noage = np.delete(importance_arr,7,1)
importance_arr_mean_noage = np.mean(importance_arr_noage,axis=0)
importance_arr_std_noage = np.std(importance_arr_noage,axis=0)
plt.rcParams.update({
    "mathtext.fontset": "stix",
    "font.family": "STIXGeneral",
    "xtick.labelsize": 18,
    "ytick.labelsize": 15
})
# plot feature importance
ind = np.argsort(importance_tot_noage)[::-1]
print(ind)
plt.bar([x for x in range(len(importance_arr_mean_noage))], importance_arr_mean_noage[ind], 
yerr = importance_arr_std_noage[ind], color="#08519c", capsize=8)
parlist = [r'$\tau_e$',r'$\tau_i$',r'$\alpha$',r'$v$',r'$g_{ei}$',r'$g_{ii}$',r'$\tau_G$']

parlist_sorted = [parlist[i] for i in ind]
plt.xticks([0,1,2,3,4,5,6],parlist_sorted)


plt.tight_layout()

In [None]:
print(classification_report(list(np.concatenate(results['test']['y_test']).flat), 
list(np.concatenate(results['test']['y_pred']).flat), target_names=['Control','AD']))

In [None]:
ytest = list(np.concatenate(results['test']['y_test']).flat)
ypred = list(np.concatenate(results['test']['y_pred']).flat)
print('accuracy:',accuracy_score(ytest, ypred))
print('precision:', precision_score(ytest,ypred))
print('recall score:',recall_score(ytest,ypred))
print('f1 score:',f1_score(ytest,ypred))
