# IACOV - Model Development for Prognosis - MV
## Strategy 1) Training using one hospital data and testing with 30% of the same hospital

First we import the required libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# para evitarmos a exibição dos dados em notacao científica
pd.set_option('display.float_format', lambda x: '%.3f' % x)

#comment next line to not use MLFlow for cleaning data
from MLFlow_Classification import *
from MLFlow_Utils import *

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


### This code uses just 14 hospitals, since ICU or MV has no variation for 4 hospitals
#### - The code that prepares the data to run all strategies for ICU and MV is: PREPARING_DATA_TO_UCI_MV_ANALYSES
#### - df_iacov_en.csv (used for death) was replaced here by df_iacov_model_lean_14hosp.csv
#### - put df_iacov_model_lean_14hosp.csv in same folder as this notebook

In [2]:
#Put csv in same folder as this notebook - for ICU we called df_iacov_model_lean_14hosp isntead of df_iacov_en.csv
dataset = pd.read_csv("df_iacov_model_lean_14hosp.csv", delimiter=";")
df_iacov_model = pd.DataFrame(dataset)
df_iacov_model.shape

(6046, 30)

In [3]:
df_iacov_model.columns

Index(['city_hospital', 'age', 'male', 'race', 'heart_rate', 'resp_rate',
       'sys_press', 'dias_press', 'mean_press', 'temp', 'hemoglobin',
       'platelets', 'hematocrit', 'red_cells_count', 'hcm', 'rdw', 'mcv',
       'leukocytes', 'neutrophil', 'lymphocytes', 'basophils', 'eosinophils',
       'monocytes', 'crp', 'death', 'icu', 'mv', 'state', 'region', 'excluir'],
      dtype='object')

## 2.1) Assign binary outcome MV to be predicted

### 2.1.1) Maintain essential variables (LEAN Model)

In [4]:
#22 predictors + outcome MV
df_iacov_model_lean = df_iacov_model[['city_hospital'
                                      ,'age'
                                      ,'male'
                                      ,'heart_rate'
                                      ,'resp_rate'
                                      ,'sys_press'
                                      ,'dias_press'
                                      ,'mean_press'
                                      ,'temp'
                                      ,'hemoglobin'
                                      ,'platelets'
                                      ,'hematocrit'
                                      ,'red_cells_count'
                                      ,'hcm'
                                      ,'rdw'
                                      ,'mcv'
                                      ,'leukocytes'
                                      ,'neutrophil'
                                      ,'lymphocytes'
                                      ,'basophils'
                                      ,'eosinophils'
                                      ,'monocytes'
                                      ,'crp'
                                      ,'mv']]
df_iacov_model_lean.shape

(6046, 24)

In [5]:
df_iacov_model_lean.to_csv('df_iacov_model_lean.csv', sep=';', index=False)

### 2.1.2) Filter a specific hospital

In [6]:
#Put csv in same folder as this notebook
df_all_hospitals = pd.read_csv("df_iacov_model_lean.csv", delimiter=";", decimal=".")
df_all_hospitals.shape

(6046, 24)

In [7]:
column_summaries(df_all_hospitals)

Unnamed: 0,Missing Values,% missing of Total Values,# Unique Values,# Values as [0],datatype,skew,count,mean,std,min,25%,50%,75%,max
city_hospital,0,0.0,14,0,object,,,,,,,,,
age,0,0.0,98,0,float64,-0.06,6046.0,57.521,17.869,18.0,44.0,58.0,71.0,105.0
male,0,0.0,2,2790,float64,-0.155,6046.0,0.539,0.499,0.0,0.0,1.0,1.0,1.0
heart_rate,809,13.381,183,0,float64,0.611,5237.0,85.689,17.209,13.0,75.0,85.0,96.0,233.0
resp_rate,1457,24.099,69,1,float64,2.897,4589.0,21.881,6.38,0.0,18.0,20.0,24.0,108.0
sys_press,1336,22.097,215,0,float64,-0.279,4710.0,124.054,22.791,10.0,110.0,122.0,138.0,242.0
dias_press,1325,21.915,150,0,float64,-0.361,4721.0,74.608,14.626,6.0,67.0,76.0,81.0,141.0
mean_press,1366,22.593,1452,326,float64,-1.751,4680.0,83.731,28.475,0.0,78.981,90.0,98.648,166.0
temp,1173,19.401,128,1,float64,62.972,4873.0,36.43,4.976,0.0,36.0,36.3,36.8,372.0
hemoglobin,1904,31.492,226,0,float64,-0.628,4142.0,12.752,2.189,1.4,11.692,13.0,14.2,30.6


In [8]:
import ipywidgets as widgets
from IPython.display import clear_output

In [9]:
df_all_hospitals.city_hospital.value_counts()

city_hospital
HC_USP                        1500
HOSPPORTUGUES_SALVADOR        1359
CEARA_UNIMED                   845
HRL_2021_02                    539
HMV_POA_02                     456
RIO_DE_JANEIRO_UNIMED          449
HOSPSANTAJULIA_MANAUS          247
HOSPSANTACATARINABLUMENAU      148
HOSPSAOFRANCISCO_MOGIGUACU     124
PERNAMBUCO_FULL                112
PELOTAS                         91
CEARA_HUWC                      73
HEVV                            56
AMAZONAS_HUGV                   47
Name: count, dtype: int64

In [10]:
# Hospitals excluded for ICU and MV analyses: 'HOSPSANTACASASP_FULL', 'HOSPGRUPOSANTA', 'HUTRIN_2021_02', 'RIO_DE_JANEIRO_HUCFF'
dropdown_hospital = widgets.Dropdown(options = ['Choose a hospital ...'
                                                ,'HC_USP'                     #SP
                                                ,'HOSPPORTUGUES_SALVADOR'     #BA
                                                ,'CEARA_UNIMED'               #CE
                                                ,'HRL_2021_02'                #GO - Luiziania
                                                ,'HMV_POA_02'                 #RS - Moinhos de Vento
                                                ,'RIO_DE_JANEIRO_UNIMED'      #RJ
                                                ,'HOSPSANTAJULIA_MANAUS'      #AM
                                                ,'HOSPSANTACATARINABLUMENAU'  #SC
                                                ,'HOSPSAOFRANCISCO_MOGIGUACU' #SP
                                                ,'PERNAMBUCO_FULL'            #PE
                                                ,'PELOTAS'                    #RS
                                                ,'CEARA_HUWC'                 #CE
                                                ,'HEVV'                       #ES - Vila Velha
                                                ,'AMAZONAS_HUGV'              #AM
#                                                 ,'GHC_02'                     #RS
                                               ])

In [11]:
specific_hospital = 'Choose a hospital ...'
df_iacov_model_lean = df_all_hospitals

def dropdown_hospital_eventhandler(change):
    global specific_hospital 
    specific_hospital = change.new
    dropdown_hospital.observe(dropdown_hospital_eventhandler, names='value')

# df_iacov_model_lean[df_iacov_model_lean.city_hospital == specific_hospital].shape

In [12]:
dropdown_hospital.observe(dropdown_hospital_eventhandler, names='value')

**Select Hospital to train**

In [14]:
display(dropdown_hospital)

Dropdown(index=6, options=('Choose a hospital ...', 'HC_USP', 'HOSPPORTUGUES_SALVADOR', 'CEARA_UNIMED', 'HRL_2…

### df_single_hospital is the data of hospital selected

In [15]:
df_single_hospital = df_all_hospitals[df_all_hospitals.city_hospital == specific_hospital]
df_single_hospital.shape

(449, 24)

In [16]:
column_summaries(df_single_hospital)

Unnamed: 0,Missing Values,% missing of Total Values,# Unique Values,# Values as [0],datatype,skew,count,mean,std,min,25%,50%,75%,max
city_hospital,0,0.0,1,0,object,,,,,,,,,
age,0,0.0,74,0,float64,-0.476,449.0,66.165,17.043,22.0,55.0,68.0,80.0,97.0
male,0,0.0,2,189,float64,-0.321,449.0,0.579,0.494,0.0,0.0,1.0,1.0,1.0
heart_rate,16,3.563,71,0,float64,1.082,433.0,83.748,15.184,49.0,73.0,82.0,92.0,168.0
resp_rate,42,9.354,27,0,float64,3.253,407.0,20.263,4.616,5.0,18.0,20.0,21.0,63.0
sys_press,20,4.454,94,0,float64,0.157,429.0,130.753,20.556,54.0,118.0,130.0,142.0,195.0
dias_press,20,4.454,65,0,float64,0.109,429.0,77.159,13.039,31.0,69.0,77.0,86.0,127.0
mean_press,20,4.454,71,0,float64,0.026,429.0,95.531,13.925,37.0,87.0,95.0,104.0,144.0
temp,28,6.236,44,0,float64,-7.33,421.0,36.337,1.173,21.4,36.0,36.4,36.7,38.8
hemoglobin,29,6.459,96,0,float64,-0.593,420.0,12.742,2.031,5.0,11.7,12.9,14.1,18.1


### 2.1.3) Dropping unused variables - city_hospital

In [17]:
df_single_hospital = df_single_hospital.drop(['city_hospital'],axis=1)
df_single_hospital.shape

(449, 23)

In [18]:
#Preencho NAs com 0 
df_single_hospital['mv'] = df_single_hospital['mv'].fillna(0)
df_single_hospital.mv.value_counts()

mv
0    285
1    164
Name: count, dtype: int64

In [19]:
(df_single_hospital.mv.value_counts()/len(df_single_hospital))*100

mv
0   63.474
1   36.526
Name: count, dtype: float64

### From now on the outcome will call CLASS

In [20]:
df_single_hospital['class'] = df_single_hospital['mv'].astype('int')
df_single_hospital = df_single_hospital.drop(['mv'],axis=1)

### 2.1.4) Recalculating null mean_press

In [21]:
def isNullMeanPressure(row):

    if pd.isnull(row['mean_press']):
    
        if pd.notnull(row['sys_press']) and pd.notnull(row['dias_press']):
            return (row['sys_press']+row['dias_press'])/2
        else:
            return row['mean_press']
    else:
        return row['mean_press']

In [22]:
#Check mean_press missing before recalculating
#column_summaries(df_single_hospital)

In [23]:
df_single_hospital['mean_press'] = df_single_hospital.apply(isNullMeanPressure, axis=1)

In [24]:
#Check mean_press missing after recalculating
#column_summaries(df_single_hospital)

## 2.2) Prepare Experiment

1. First, create your experiment 

In [25]:
df_single_hospital.shape

(449, 23)

In [26]:
#import inspect
#inspect.getsource(setup)

__Atention!__: If any Dtype is bool, cast to int32, otherwise experiment will fail

In [27]:
df_single_hospital.info()

<class 'pandas.core.frame.DataFrame'>
Index: 449 entries, 4602 to 5050
Data columns (total 23 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   age              449 non-null    float64
 1   male             449 non-null    float64
 2   heart_rate       433 non-null    float64
 3   resp_rate        407 non-null    float64
 4   sys_press        429 non-null    float64
 5   dias_press       429 non-null    float64
 6   mean_press       429 non-null    float64
 7   temp             421 non-null    float64
 8   hemoglobin       420 non-null    float64
 9   platelets        420 non-null    float64
 10  hematocrit       97 non-null     float64
 11  red_cells_count  420 non-null    float64
 12  hcm              420 non-null    float64
 13  rdw              420 non-null    float64
 14  mcv              420 non-null    float64
 15  leukocytes       420 non-null    float64
 16  neutrophil       420 non-null    float64
 17  lymphocytes      

In [28]:
# from sklearn.model_selection import train_test_split
# X_train_specific, X_test_specific, y_train_specific, y_test_specific = train_test_split(df_iacov_model_lean.loc[:, df_iacov_model_lean.columns != 'class'], df_iacov_model_lean['class'], test_size=0.30, random_state=42, stratify=df_iacov_model_lean['class'])
# X_test_specific.shape

In [29]:
#!pip install --upgrade scikit-learn

In [30]:
#import sklearn
#print(sklearn.__version__)

In [31]:
#import sys
#print(sys.path)

**Instalar bibliotecas a seguir caso dê erro no setup do experimento**
* Atualizar o pacote de imbalanced-Learn e reiniciar o kernel
* Instalar demais bibliotecas

In [32]:
#!pip install -U imbalanced-learn

In [33]:
#!pip install datefinder

In [34]:
#!pip install cufflinks

In [35]:
#!pip install lightgbm

In [36]:
#!pip install xgboost

In [37]:
#!pip install pyod

In [38]:
#!pip install catboost

In [39]:
#!pip install hyperopt

In [40]:
#!pip install shap

In [41]:
#import pyod

In [42]:
from imblearn.under_sampling import EditedNearestNeighbours

exp = setup(df_single_hospital, target='class'
            , categorical_features = ['male']
            , numeric_features = ['crp','basophils','eosinophils','red_cells_count','monocytes','hemoglobin','resp_rate','neutrophil','hematocrit','heart_rate','resp_rate','sys_press','dias_press','mean_press','temp']
            , normalize=True
            #, remove_multicollinearity=True
            ,numeric_imputation='median'
#             ,numeric_imputation='ignore'
            #,multicollinearity_thresho\ld=0.9
            , resample=True
            , resample_method='random_over'
#             , train_size = 0.70 #Default: 0.70
           )

 
Setup Succesfully Completed!


Unnamed: 0,Description,Value
0,session_id,42
1,Target Type,Binary
2,Label Encoded,
3,Original Data,"(449, 23)"
4,Missing Values,True
5,Numeric Features,21
6,Categorical Features,1
7,Ordinal Features,False
8,High Cardinality Features,False
9,High Cardinality Method,


In [45]:
X, y, X_train, X_test, y_train, y_test, seed, prep_pipe, _ = exp

In [46]:
X_train.shape

(398, 22)

In [47]:
X_test.shape

(135, 22)

In [48]:
specific_hospital

'RIO_DE_JANEIRO_UNIMED'

### Put X_test and y_test in separate files

In [49]:
x_test_specific_name = 'X_test_' + 'baseline_' + specific_hospital +  ".csv"
x_test_specific_name

'X_test_baseline_RIO_DE_JANEIRO_UNIMED.csv'

In [50]:
y_test_specific_name = 'y_mv_' + 'baseline_' + specific_hospital + ".csv"
y_test_specific_name

'y_mv_baseline_RIO_DE_JANEIRO_UNIMED.csv'

In [51]:
X_test.to_csv(x_test_specific_name, sep=';')

In [52]:
y_test.to_csv(y_test_specific_name, sep=';')

### Filtering only 5 pre-selected models (Check training metrics)

In [53]:
seed

42

In [54]:
#Modelos que suportam missing (1) - Modelos pre-selecionados(2)
compare_models(blacklist = ["lr","knn","nb","dt","svm","rbfsvm","gpc","ridge","qda","ada","gbc","lda","et","mlp","rf"] , turbo = False)

Unnamed: 0,Model,Accuracy,AUC,Recall,Specificity,Prec.,F1,Kappa,Hosmer_lemeshow,Spiegelhalter,Scaled_brier
0,CatBoost Classifier,0.8719,0.9368,0.9197,0.8239,0.8417,0.8775,0.7438,0.4553,0.4616,0.5847
1,Light Gradient Boosting Machine,0.8468,0.9092,0.8897,0.8037,0.8226,0.8514,0.6935,0.1653,0.1518,0.4901
2,Extreme Gradient Boosting,0.8367,0.9088,0.8547,0.8184,0.8305,0.8373,0.6733,0.125,0.0631,0.4722


### Tune selected models by AUC (and hyperopt bayesian optimization) to use on test data

In [55]:
rstate = np.random.RandomState(42)

In [56]:
rstate = np.random.default_rng(42)

In [57]:
from hyperopt import fmin

In [58]:
print(pd. __version__)

2.2.0


In [59]:
cattuned = tune_model('catboost', optimize='AUC', n_iter=20)

Unnamed: 0,Accuracy,AUC,Recall,Specificity,Prec.,F1,Kappa,Hosmer_lemeshow,Spiegelhalter,Scaled_brier
0,0.75,0.93,0.7,0.8,0.778,0.737,0.5,0.757,0.178,0.551
1,0.775,0.88,0.75,0.8,0.789,0.769,0.55,0.0,0.0,0.347
2,0.825,0.902,0.9,0.75,0.783,0.837,0.65,0.0,0.0,0.415
3,0.825,0.917,0.9,0.75,0.783,0.837,0.65,0.0,0.0,0.39
4,0.9,0.955,0.9,0.9,0.9,0.9,0.8,0.0,0.081,0.677
5,0.8,0.912,0.8,0.8,0.8,0.8,0.6,0.008,0.0,0.381
6,0.825,0.96,0.9,0.75,0.783,0.837,0.65,0.024,0.013,0.561
7,0.975,0.998,1.0,0.95,0.952,0.976,0.95,0.998,0.478,0.928
8,0.846,0.963,0.947,0.75,0.783,0.857,0.694,0.0,0.001,0.515
9,0.897,0.982,1.0,0.789,0.833,0.909,0.794,0.058,0.055,0.656


In [60]:
lgbmtuned = tune_model('lightgbm', optimize='AUC', n_iter=20)

Unnamed: 0,Accuracy,AUC,Recall,Specificity,Prec.,F1,Kappa,Hosmer_lemeshow,Spiegelhalter,Scaled_brier
0,0.825,0.925,0.8,0.85,0.842,0.821,0.65,0.0,0.002,0.462
1,0.75,0.838,0.75,0.75,0.75,0.75,0.5,0.0,0.0,0.143
2,0.75,0.863,0.85,0.65,0.708,0.773,0.5,0.0,0.0,0.2
3,0.8,0.912,0.9,0.7,0.75,0.818,0.6,0.0,0.0,0.282
4,0.875,0.938,0.9,0.85,0.857,0.878,0.75,0.0,0.0,0.549
5,0.8,0.912,0.8,0.8,0.8,0.8,0.6,0.0,0.002,0.444
6,0.825,0.973,1.0,0.65,0.741,0.851,0.65,0.0,0.0,0.511
7,0.95,0.995,1.0,0.9,0.909,0.952,0.9,0.436,0.411,0.821
8,0.795,0.932,0.947,0.65,0.72,0.818,0.593,0.0,0.0,0.372
9,0.872,0.924,1.0,0.737,0.8,0.889,0.742,0.0,0.0,0.485


In [61]:
xgbtuned = tune_model('xgboost', optimize='AUC', n_iter=20)

Unnamed: 0,Accuracy,AUC,Recall,Specificity,Prec.,F1,Kappa,Hosmer_lemeshow,Spiegelhalter,Scaled_brier
0,0.85,0.905,0.95,0.75,0.792,0.864,0.7,0.152,0.753,0.488
1,0.775,0.835,0.8,0.75,0.762,0.78,0.55,0.003,0.191,0.35
2,0.725,0.8,0.85,0.6,0.68,0.756,0.45,0.0,0.01,0.263
3,0.775,0.83,0.95,0.6,0.704,0.808,0.55,0.0,0.014,0.296
4,0.875,0.91,0.9,0.85,0.857,0.878,0.75,0.25,0.792,0.583
5,0.825,0.897,0.9,0.75,0.783,0.837,0.65,0.935,0.634,0.427
6,0.875,0.95,1.0,0.75,0.8,0.889,0.75,0.214,0.694,0.549
7,0.95,0.973,1.0,0.9,0.909,0.952,0.9,0.487,0.27,0.758
8,0.821,0.874,1.0,0.65,0.731,0.844,0.644,0.0,0.115,0.411
9,0.821,0.863,1.0,0.632,0.741,0.851,0.637,0.032,0.169,0.418


### Select best algorithm based on AUC in test 
##### In previous code (death) this title was indicating that this selection was being done in test data, however it seems that it is based on training

In [62]:
preds_cat = predict_model(cattuned)

Unnamed: 0,Model,Accuracy,AUC,Recall,Specificity,Prec.,F1,Kappa,Hosmer_lemeshow,Spiegelhalter,Scaled_brier
0,CatBoost Classifier,0.763,0.84,0.775,0.756,0.644,0.704,0.509,0.0,0.0,0.257


In [63]:
preds_lgb = predict_model(lgbmtuned)

Unnamed: 0,Model,Accuracy,AUC,Recall,Specificity,Prec.,F1,Kappa,Hosmer_lemeshow,Spiegelhalter,Scaled_brier
0,Light Gradient Boosting Machine,0.778,0.865,0.816,0.756,0.656,0.727,0.543,0.0,0.0,0.28


In [64]:
preds_xgb = predict_model(xgbtuned)

Unnamed: 0,Model,Accuracy,AUC,Recall,Specificity,Prec.,F1,Kappa,Hosmer_lemeshow,Spiegelhalter,Scaled_brier
0,Extreme Gradient Boosting,0.719,0.863,0.857,0.639,0.575,0.689,0.449,0.0,0.001,0.212


### Understand best algorithm features and results
#### Put in PREDS the best algorithm chosen in the previous lines

In [None]:
# Put in PREDS the best algorithm chosen in the previous lines: preds_cat OR preds_lgb OR preds_xgb

#preds = preds_cat
#preds = preds_lgb
preds = preds_xgb

#binary_classification_metrics(y_test, preds['Label'], preds['Score'])

In [None]:
#ROC AUC (INTERVALO)
["{0:0.2f}".format(i) for i in AUC_CI(y_test, preds['Score']._values)]

In [None]:
npv(y_test, preds['Label'])

In [None]:
ppv(y_test, preds['Label'])

In [None]:
# evaluate_model(xgbtuned)

In [None]:
#!pip install shap

#### Run INTERPRETE_MODEL only for the best algorithm chosen in the previous lines

In [None]:
#interpret_model(cattuned)
#interpret_model(lgbmtuned)
interpret_model(xgbtuned)

In [None]:
# plot_model(xgbtuned, plot='ktops')

### 2.3.1) Build a model with all data
#### Run FINALIZE MODEL only for the best algorithm chosen in the previous lines

In [None]:
#final_model = finalize_model(cattuned)
#final_model = finalize_model(lgbmtuned)
final_model = finalize_model(xgbtuned)

### 2.3.2) Using model to predict unseen outcome

In [None]:
str(final_model.__class__.__name__)

In [None]:
tuned_model_name = str(final_model.__class__.__name__) + '_' + 'tuned' + '_baseline_' + specific_hospital
tuned_model_name

In [None]:
final_model_name = str(final_model.__class__.__name__) + '_baseline_' \
                    + specific_hospital
final_model_name

In [None]:
save_model(final_model, final_model_name ,verbose=True)

In [None]:
#final_model
final_model.get_all_params()

# Use the tuned saved model to predict on specific hospital
PS: I don't finalize the model only in this strategy, because otherwise, it would be trained with the same data that it would be tested, resulting in false better performance

In [None]:
### O joblib abaixo não funcionou, mas entendemos que o predito no teste no caso da estratégia 1 JÁ FOI OBTIDO NO PRED ACIMA

import pickle
import joblib
from sklearn.metrics import (roc_curve, auc)

#baseline_pkl = final_model_name + ".pkl"
#prep_pipe_baseline, baseline = joblib.load(baseline_pkl)

X_test_baseline = pd.read_csv(x_test_specific_name, delimiter=";",index_col=0)
y_test_baseline = pd.read_csv(y_test_specific_name, delimiter=";",index_col=0)

y_pred_prob_baseline = final_model.predict_proba(X_test_baseline)[:,1]
fpr_baseline , tpr_baseline, thresholds_baseline = roc_curve(y_test_baseline, y_pred_prob_baseline)
auc_baseline = auc(fpr_baseline, tpr_baseline)
auc_baseline

In [None]:
fig, (ax1) = plt.subplots(1, 1,figsize=(8,6))
# fig.suptitle('Receiver Operating Characteristic')

#############################################
## General VS Specific
#############################################

# ICU AGREGADO E SIMPLES
ax1.set_facecolor('xkcd:white')
ax1.spines['left'].set_color('#000000')
ax1.spines['bottom'].set_color('#000000')
ax1.spines['top'].set_color('#000000')
ax1.spines['right'].set_color('#000000')
# ax1.grid(color='grey', linestyle='solid',alpha=0.3)
ax1.plot([0,1],[0,1], 'k--')

n = 1
#using plasma cmap for each axis
colors = plt.cm.rainbow(np.linspace(0, 1, n))

ci_baseline = AUC_CI(y_test_baseline["class"], y_pred_prob_baseline)

# for i in range(n):

ax1.plot(fpr_baseline, tpr_baseline, label= "Baseline Model - AUC " + str(np.around(ci_baseline,2))
         , color=colors[0], linewidth=2, linestyle=":")

ax1.legend(frameon=True, shadow=True, borderpad=1, facecolor="white")
ax1.set_xlabel("FPR")
ax1.set_ylabel("TPR")

# plt.savefig("sup_fig1.svg")