In [1]:
import pandas as pd 
import numpy as np
import os
from os import walk
import statistics
from matplotlib import pyplot as pt
from scipy.stats import kurtosis
from scipy.stats import skew
from scipy.stats import mannwhitneyu
import math 
from scipy.stats import spearmanr
import warnings
warnings.filterwarnings('ignore')
import plotly.graph_objects as go
from tqdm.notebook import tqdm
%matplotlib inline
from matplotlib import pyplot as plt
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import plotly.graph_objects as go
def plot_roc(real_y, pred_y): 
  
    metrics = ['auc', 'fpr', 'tpr', 'thresholds']
    results = {
    'train': {m:[] for m in metrics},
    'val'  : {m:[] for m in metrics},
    'test' : {m:[] for m              in metrics}
    }

    n_bootstraps = 100

    for i in range(n_bootstraps):
        np.random.seed(0+i)
        indices = np.random.randint(0, len(pred_y), len(pred_y))
        if len(np.unique(real_y[indices])) < 2:
            # We need at least one positive and one negative sample for ROC AUC
            # to be defined: reject the sample
            continue
        fpr, tpr, thresholds = roc_curve(real_y[indices], pred_y[indices])
        results["test"]['fpr'].append(fpr)
        results["test"]['tpr'].append(tpr)
        results["test"]['thresholds'].append(thresholds)
        results["test"]['auc'].append(roc_auc_score(real_y[indices], pred_y[indices]))
        kind = 'test'
        c_fill      = 'rgba(52, 152, 219, 0.2)'
        c_line      = 'rgba(52, 152, 219, 0.5)'
        c_line_main = 'rgba(41, 128, 185, 1.0)'
        c_grid      = 'rgba(189, 195, 199, 0.5)'
        c_annot     = 'rgba(149, 165, 166, 0.5)'
        c_highlight = 'rgba(192, 57, 43, 1.0)'
        fpr_mean    = np.linspace(0, 1, n_bootstraps)
        interp_tprs = []
        
    for i in range(n_bootstraps):
        fpr           = results[kind]['fpr'][i]
        tpr           = results[kind]['tpr'][i]
        interp_tpr    = np.interp(fpr_mean, fpr, tpr)
        interp_tpr[0] = 0.0
        interp_tprs.append(interp_tpr)
    tpr_mean     = np.mean(interp_tprs, axis=0)
    tpr_mean[-1] = 1.0
    tpr_std      = 2*np.std(interp_tprs, axis=0)
    tpr_upper    = np.clip(tpr_mean+tpr_std, 0, 1)
    tpr_lower    = tpr_mean-tpr_std
    auc          = np.mean(results[kind]['auc'])



    fig = go.Figure([
        go.Scatter(
            x          = fpr_mean,
            y          = tpr_upper,
            line       = dict(color=c_line, width=1),
            hoverinfo  = "skip",
            showlegend = False,
            name       = 'upper'),
        go.Scatter(
            x          = fpr_mean,
            y          = tpr_lower,
            fill       = 'tonexty',
            fillcolor  = c_fill,
            line       = dict(color=c_line, width=1),
            hoverinfo  = "skip",
            showlegend = False,
            name       = 'lower'),
        go.Scatter(
            x          = fpr_mean,
            y          = tpr_mean,
            line       = dict(color=c_line_main, width=2),
            hoverinfo  = "skip",
            showlegend = True,
            name       = f'AUC: {auc:.3f}')
    ])
    fig.add_shape(
        type ='line', 
        line =dict(dash='dash'),
        x0=0, x1=1, y0=0, y1=1
    )
    fig.update_layout(
        template    = 'plotly_white', 
        title_x     = 0.5,
        xaxis_title = "1 - Specificity",
        yaxis_title = "Sensitivity",
        width       = 800,
        height      = 800,
        legend      = dict(
            yanchor="bottom", 
            xanchor="right", 
            x=0.95,
            y=0.01,
        )
    )
    fig.update_yaxes(
        range       = [0, 1],
        gridcolor   = c_grid,
        scaleanchor = "x", 
        scaleratio  = 1,
        linecolor   = 'black')
    fig.update_xaxes(
        range       = [0, 1],
        gridcolor   = c_grid,
        constrain   = 'domain',
        linecolor   = 'black')

    fig.update_yaxes(
        range       = [0, 1],
        gridcolor   = c_grid,
        scaleanchor = "x", 
        scaleratio  = 1,
        linecolor   = 'black')
    fig.update_xaxes(
        range       = [0, 1],
        gridcolor   = c_grid,
        constrain   = 'domain',
        linecolor   = 'black')
    return fig

In [3]:
import pickle
from sklearn.metrics import roc_curve, auc,roc_auc_score
import os 
os.chdir("/content/gdrive/MyDrive/TFM")
#os.chdir("/Users/mariabaeza/Desktop/Universidad/BigData/TFM")
with open('best_svc.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
    all_y_svcro, all_probs_svcro,all_y_subject_svcro, all_probs_subject_svcro = pickle.load(f)

all_y_svc = all_y_svcro
all_p_svc = all_probs_svcro

with open('best_rf.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
    all_y_svcro, all_probs_svcro,all_y_subject_svcro, all_probs_subject_svcro = pickle.load(f)

all_y_rf = all_y_svcro
all_p_rf = all_probs_svcro

with open('best_knn.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
    all_y_svcro, all_probs_svcro,all_y_subject_svcro, all_probs_subject_svcro = pickle.load(f)

all_y_knn = all_y_svcro
all_p_knn = all_probs_svcro


with open('best_gp.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
    all_y_svcfo, all_probs_svcfo,all_y_subject_svcfo, all_probs_subject_svcfo = pickle.load(f)

all_y_gp = all_y_svcfo
all_p_gp = all_probs_svcfo

with open('best_lr.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
    all_y_svcfo, all_probs_svcfo,all_y_subject_svcfo, all_probs_subject_svcfo = pickle.load(f)

all_y_lr = all_y_svcfo
all_p_lr = all_probs_svcfo

In [14]:
#SVC
import plotly
pred_y = all_p_svc
real_y = all_y_svc
metrics = ['auc', 'fpr', 'tpr', 'thresholds']
results = {
'train': {m:[] for m in metrics},
'val'  : {m:[] for m in metrics},
'test' : {m:[] for m              in metrics}
}

n_bootstraps = 100

for i in range(n_bootstraps):
    np.random.seed(0+i)
    indices = np.random.randint(0, len(pred_y), len(pred_y))
    if len(np.unique(real_y[indices])) < 2:
        # We need at least one positive and one negative sample for ROC AUC
        # to be defined: reject the sample
        continue
    fpr, tpr, thresholds = roc_curve(real_y[indices], pred_y[indices])
    results["test"]['fpr'].append(fpr)
    results["test"]['tpr'].append(tpr)
    results["test"]['thresholds'].append(thresholds)
    results["test"]['auc'].append(roc_auc_score(real_y[indices], pred_y[indices]))
    kind = 'test'
    c_fill_svc      = 'rgba(52, 152, 219, 0.2)'
    c_line_svc      = 'rgba(52, 152, 219, 0.5)'
    c_line_main_svc = 'rgba(41, 128, 185, 1.0)'
    c_grid      = 'rgba(189, 195, 199, 0.5)'
    c_annot     = 'rgba(149, 165, 166, 0.5)'
    c_highlight = 'rgba(192, 57, 43, 1.0)'
    fpr_mean_svc    = np.linspace(0, 1, n_bootstraps)
    interp_tprs = []
    
for i in range(n_bootstraps):
    fpr           = results[kind]['fpr'][i]
    tpr           = results[kind]['tpr'][i]
    interp_tpr    = np.interp(fpr_mean_svc, fpr, tpr)
    interp_tpr[0] = 0.0
    interp_tprs.append(interp_tpr)
tpr_mean_svc     = np.mean(interp_tprs, axis=0)
tpr_mean_svc[-1] = 1.0
tpr_std_svc      = 2*np.std(interp_tprs, axis=0)
tpr_upper_svc    = np.clip(tpr_mean_svc+tpr_std_svc, 0, 1)
tpr_lower_svc   = tpr_mean_svc-tpr_std_svc
auc_svc         = np.mean(results[kind]['auc'])



#RF
pred_y = all_p_rf
real_y = all_y_rf
metrics = ['auc', 'fpr', 'tpr', 'thresholds']
results = {
'train': {m:[] for m in metrics},
'val'  : {m:[] for m in metrics},
'test' : {m:[] for m              in metrics}
}

n_bootstraps = 100

for i in range(n_bootstraps):
    np.random.seed(0+i)
    indices = np.random.randint(0, len(pred_y), len(pred_y))
    if len(np.unique(real_y[indices])) < 2:
        # We need at least one positive and one negative sample for ROC AUC
        # to be defined: reject the sample
        continue
    fpr, tpr, thresholds = roc_curve(real_y[indices], pred_y[indices])
    results["test"]['fpr'].append(fpr)
    results["test"]['tpr'].append(tpr)
    results["test"]['thresholds'].append(thresholds)
    results["test"]['auc'].append(roc_auc_score(real_y[indices], pred_y[indices]))
    kind = 'test'
    
    c_fill_rf      = 'rgba(171, 210, 172, 0.2)'
    c_line_rf      = 'rgba(171, 210, 172, 0.5)'
    c_line_main_rf = 'rgba(160, 200, 160, 1.0)'
    c_grid      = 'rgba(189, 195, 199, 0.5)'
    c_annot     = 'rgba(149, 165, 166, 0.5)'
    c_highlight = 'rgba(192, 57, 43, 1.0)'
    fpr_mean_rf    = np.linspace(0, 1, n_bootstraps)
    interp_tprs = []
    
for i in range(n_bootstraps):
    fpr           = results[kind]['fpr'][i]
    tpr           = results[kind]['tpr'][i]
    interp_tpr    = np.interp(fpr_mean_rf, fpr, tpr)
    interp_tpr[0] = 0.0
    interp_tprs.append(interp_tpr)
tpr_mean_rf     = np.mean(interp_tprs, axis=0)
tpr_mean_rf[-1] = 1.0
tpr_std_rf      = 2*np.std(interp_tprs, axis=0)
tpr_upper_rf    = np.clip(tpr_mean_rf+tpr_std_rf, 0, 1)
tpr_lower_rf    = tpr_mean_rf-tpr_std_rf
auc_rf         = np.mean(results[kind]['auc'])

#KNN
pred_y = all_p_knn
real_y = all_y_knn
metrics = ['auc', 'fpr', 'tpr', 'thresholds']
results = {
'train': {m:[] for m in metrics},
'val'  : {m:[] for m in metrics},
'test' : {m:[] for m              in metrics}
}

n_bootstraps = 100

for i in range(n_bootstraps):
    np.random.seed(0+i)
    indices = np.random.randint(0, len(pred_y), len(pred_y))
    if len(np.unique(real_y[indices])) < 2:
        # We need at least one positive and one negative sample for ROC AUC
        # to be defined: reject the sample
        continue
    fpr, tpr, thresholds = roc_curve(real_y[indices], pred_y[indices])
    results["test"]['fpr'].append(fpr)
    results["test"]['tpr'].append(tpr)
    results["test"]['thresholds'].append(thresholds)
    results["test"]['auc'].append(roc_auc_score(real_y[indices], pred_y[indices]))
    kind = 'test'
    c_fill_knn      = 'rgba(208, 178, 241, 0.2)'
    c_line_knn      = 'rgba(208, 178, 241, 0.5)'
    c_line_main_knn = 'rgba(190, 158, 221, 1.0)'
    c_grid      = 'rgba(189, 195, 199, 0.5)'
    c_annot     = 'rgba(149, 165, 166, 0.5)'
    c_highlight = 'rgba(192, 57, 43, 1.0)'
    fpr_mean_knn    = np.linspace(0, 1, n_bootstraps)
    interp_tprs = []
    
for i in range(n_bootstraps):
    fpr           = results[kind]['fpr'][i]
    tpr           = results[kind]['tpr'][i]
    interp_tpr    = np.interp(fpr_mean_knn, fpr, tpr)
    interp_tpr[0] = 0.0
    interp_tprs.append(interp_tpr)
tpr_mean_knn     = np.mean(interp_tprs, axis=0)
tpr_mean_knn[-1] = 1.0
tpr_std_knn      = 2*np.std(interp_tprs, axis=0)
tpr_upper_knn    = np.clip(tpr_mean_knn+tpr_std_knn, 0, 1)
tpr_lower_knn    = tpr_mean_knn-tpr_std_knn
auc_knn          = np.mean(results[kind]['auc'])


#GP
pred_y = all_p_gp
real_y = all_y_gp
metrics = ['auc', 'fpr', 'tpr', 'thresholds']
results = {
'train': {m:[] for m in metrics},
'val'  : {m:[] for m in metrics},
'test' : {m:[] for m in metrics}
}

n_bootstraps = 100

for i in range(n_bootstraps):
    np.random.seed(0+i)
    indices = np.random.randint(0, len(pred_y), len(pred_y))
    if len(np.unique(real_y[indices])) < 2:
        # We need at least one positive and one negative sample for ROC AUC
        # to be defined: reject the sample
        continue
    fpr, tpr, thresholds = roc_curve(real_y[indices], pred_y[indices])
    results["test"]['fpr'].append(fpr)
    results["test"]['tpr'].append(tpr)
    results["test"]['thresholds'].append(thresholds)
    results["test"]['auc'].append(roc_auc_score(real_y[indices], pred_y[indices]))
    kind = 'test'
    c_fill_gp      = 'rgba(255, 223, 182, 0.2)'
    c_line_gp     = 'rgba(255, 223, 182, 0.5)'
    c_line_main_gp = 'rgba(235, 203, 165, 1.0)'
    c_grid      = 'rgba(189, 195, 199, 0.5)'
    c_annot     = 'rgba(149, 165, 166, 0.5)'
    c_highlight = 'rgba(192, 57, 43, 1.0)'
    fpr_mean_gp    = np.linspace(0, 1, n_bootstraps)
    interp_tprs = []
    
for i in range(n_bootstraps):
    fpr           = results[kind]['fpr'][i]
    tpr           = results[kind]['tpr'][i]
    interp_tpr    = np.interp(fpr_mean_gp, fpr, tpr)
    interp_tpr[0] = 0.0
    interp_tprs.append(interp_tpr)
tpr_mean_gp     = np.mean(interp_tprs, axis=0)
tpr_mean_gp[-1] = 1.0
tpr_std_gp      = 2*np.std(interp_tprs, axis=0)
tpr_upper_gp    = np.clip(tpr_mean_gp+tpr_std_gp, 0, 1)
tpr_lower_gp    = tpr_mean_gp-tpr_std_gp
auc          = np.mean(results[kind]['auc'])


#LR
pred_y = all_p_lr
real_y = all_y_lr
metrics = ['auc', 'fpr', 'tpr', 'thresholds']
results = {
'train': {m:[] for m in metrics},
'val'  : {m:[] for m in metrics},
'test' : {m:[] for m              in metrics}
}

n_bootstraps = 100

for i in range(n_bootstraps):
    np.random.seed(0+i)
    indices = np.random.randint(0, len(pred_y), len(pred_y))
    if len(np.unique(real_y[indices])) < 2:
        # We need at least one positive and one negative sample for ROC AUC
        # to be defined: reject the sample
        continue
    fpr, tpr, thresholds = roc_curve(real_y[indices], pred_y[indices])
    results["test"]['fpr'].append(fpr)
    results["test"]['tpr'].append(tpr)
    results["test"]['thresholds'].append(thresholds)
    results["test"]['auc'].append(roc_auc_score(real_y[indices], pred_y[indices]))
    kind = 'test'
    c_fill_lr      = 'rgba(52, 152, 219, 0.2)'
    c_line_lr       = 'rgba(52, 152, 219, 0.5)'
    c_line_main_lr  = 'rgba(41, 128, 185, 1.0)'
    c_grid      = 'rgba(189, 195, 199, 0.5)'
    c_annot     = 'rgba(149, 165, 166, 0.5)'
    c_highlight = 'rgba(192, 57, 43, 1.0)'
    fpr_mean_lr     = np.linspace(0, 1, n_bootstraps)
    interp_tprs = []
    
for i in range(n_bootstraps):
    fpr           = results[kind]['fpr'][i]
    tpr           = results[kind]['tpr'][i]
    interp_tpr    = np.interp(fpr_mean_lr, fpr, tpr)
    interp_tpr[0] = 0.0
    interp_tprs.append(interp_tpr)
tpr_mean_lr      = np.mean(interp_tprs, axis=0)
tpr_mean_lr [-1] = 1.0
tpr_std_lr       = 2*np.std(interp_tprs, axis=0)
tpr_upper_lr     = np.clip(tpr_mean_lr +tpr_std_lr , 0, 1)
tpr_lower_lr     = tpr_mean_lr -tpr_std_lr 
auc          = np.mean(results[kind]['auc'])


fig = go.Figure([
    go.Scatter(
        x          = fpr_mean_svc,
        y          = tpr_upper_svc,
        line       = dict(color=c_line_svc, width=1),
        hoverinfo  = "skip",
        showlegend = False,
        name       = 'upper'),
    go.Scatter(
        x          = fpr_mean_svc,
        y          = tpr_lower_svc,
        fill       = 'tonexty',
        fillcolor  = c_fill_svc,
        line       = dict(color=c_line_svc, width=1),
        hoverinfo  = "skip",
        showlegend = False,
        name       = 'lower'),
    go.Scatter(
        x          = fpr_mean_svc,
        y          = tpr_mean_svc,
        line       = dict(color=c_line_main_svc, width=4),
        hoverinfo  = "skip",
        showlegend = True,
        name       = f'SVC AUC: {0.86}'),
    
    go.Scatter(
        x          = fpr_mean_rf,
        y          = tpr_upper_rf,
        line       = dict(color=c_line_rf, width=1),
        hoverinfo  = "skip",
        showlegend = False,
        name       = 'upper'),
    go.Scatter(
        x          = fpr_mean_rf,
        y          = tpr_lower_rf,
        fill       = 'tonexty',
        fillcolor  = c_fill_rf,
        line       = dict(color=c_line_rf, width=1),
        hoverinfo  = "skip",
        showlegend = False,
        name       = 'lower'),
    go.Scatter(
        x          = fpr_mean_rf,
        y          = tpr_mean_rf,
        line       = dict(color=c_line_main_rf, width=4),
        hoverinfo  = "skip",
        showlegend = True,
        name       = f'RF AUC: {0.85}'), 
            go.Scatter(
        x          = fpr_mean_knn,
        y          = tpr_upper_knn,
        line       = dict(color=c_line_knn, width=1),
        hoverinfo  = "skip",
        showlegend = False,
        name       = 'upper'),
    go.Scatter(
        x          = fpr_mean_knn,
        y          = tpr_lower_knn,
        fill       = 'tonexty',
        fillcolor  = c_fill_knn,
        line       = dict(color=c_line_knn, width=1),
        hoverinfo  = "skip",
        showlegend = False,
        name       = 'lower'),
    go.Scatter(
        x          = fpr_mean_knn,
        y          = tpr_mean_knn,
        line       = dict(color=c_line_main_knn, width=4),
        hoverinfo  = "skip",
        showlegend = True,
        name       = f'KNN AUC: {0.84}'),
           go.Scatter(
        x          = fpr_mean_gp,
        y          = tpr_upper_gp,
        line       = dict(color=c_line_gp, width=1),
        hoverinfo  = "skip",
        showlegend = False,
        name       = 'upper'),
  
])

fig.add_shape(
    type ='line', 
    line =dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)
fig.update_layout(
    template    = 'plotly_white', 
    title_x     = 0.5,
    xaxis_title = "1 - Specificity",
    yaxis_title = "Sensitivity",
    width       = 800,
    height      = 800,
    legend      = dict(
        yanchor="bottom", 
        xanchor="right", 
        x=0.95,
        y=0.01,
    )
)
fig.update_yaxes(
    range       = [0, 1],
    gridcolor   = c_grid,
    scaleanchor = "x", 
    scaleratio  = 1,
    linecolor   = 'black')
fig.update_xaxes(
    range       = [0, 1],
    gridcolor   = c_grid,
    constrain   = 'domain',
    linecolor   = 'black')

fig.update_yaxes(
    range       = [0, 1],
    gridcolor   = c_grid,
    scaleanchor = "x", 
    scaleratio  = 1,
    linecolor   = 'black')
fig.update_xaxes(
    range       = [0, 1],
    gridcolor   = c_grid,
    constrain   = 'domain',
    linecolor   = 'black')




In [None]:
fig.write_image("/Users/mariabaeza/Desktop/Universidad/BigData/TFM/Resultados/roc.eps")