# RF result

In [1]:
import argparse
import sys
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV,RepeatedKFold, RepeatedStratifiedKFold, KFold, cross_val_score, StratifiedKFold, StratifiedShuffleSplit
from sklearn.metrics import roc_curve, auc, roc_auc_score, classification_report, confusion_matrix, accuracy_score, f1_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import os
import pickle
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, auc, roc_auc_score, recall_score, confusion_matrix, accuracy_score, f1_score, average_precision_score
import warnings
warnings.filterwarnings('ignore')

In [2]:
raw_data = 'C:/Users/Salvador/Modelo_COVID19/Libretas manuscrito/BCM Infectius diseases/data_final_mor.csv'
data = pd.read_csv(raw_data)
print ("Total records", data.shape[0])

Total records 11564


In [3]:
X = data.drop(['Unnamed: 0','Mortality'],axis=1).astype(int)
y = data['Mortality'].astype(int)
y = y.values.reshape(y.shape[0],1)
feature_list = X.columns
print("X:", X.shape)
print("Y:", y.shape)

X: (11564, 37)
Y: (11564, 1)


In [4]:
import joblib
RF_loaded = joblib.load('C:/Users/Salvador/Modelo_COVID19/Libretas manuscrito/BCM Infectius diseases/PKL/modelRF1.pkl')

In [5]:
RF_loaded.best_params_

{'class_weight': 'balanced',
 'criterion': 'gini',
 'max_depth': 8,
 'max_features': 'log2',
 'min_impurity_decrease': 0.0001,
 'n_estimators': 200,
 'random_state': 422}

In [6]:
print(X.shape)
print(y.shape)

(11564, 37)
(11564, 1)


In [7]:
X = X.to_numpy()

In [8]:
rskf = RepeatedStratifiedKFold(n_splits=10, n_repeats=10, random_state=242)

test_auc = []
test_acc = []
test_ap = [] 
test_f1 = []
test_recall = []
test_precision = []

for train_index, test_index in rskf.split(X,y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]

  clf = RandomForestClassifier(**RF_loaded.best_params_)
  clf.fit(X_train, y_train)
  
  ps = clf.predict_proba(X_test)
  yp = clf.predict(X_test) 
  
  test_auc.append(roc_auc_score(y_test, ps[:,1]))
  test_acc.append(accuracy_score(y_test, yp))
  test_ap.append(average_precision_score(y_test, yp))
  test_f1.append(f1_score(y_test, yp))
  test_recall.append(recall_score(y_test, yp))
  test_precision.append(precision_score(y_test, yp))

In [9]:
RF_results = pd.DataFrame({'auc':test_auc, 'acc':test_acc, 'ap':test_ap, 'f1':test_f1, 'recall':test_recall,'precision':test_precision})
RF_results.head()

Unnamed: 0,auc,acc,ap,f1,recall,precision
0,0.844907,0.742437,0.30043,0.475352,0.833333,0.332512
1,0.854079,0.739844,0.290963,0.465364,0.808642,0.326683
2,0.843885,0.740709,0.291523,0.466192,0.803681,0.328321
3,0.846786,0.749352,0.302065,0.478417,0.815951,0.338422
4,0.857751,0.773356,0.321434,0.501901,0.814815,0.362637


In [10]:
file = 'C:/Users/Salvador/Modelo_COVID19/Libretas manuscrito/BCM Infectius diseases/result/RF_result.csv'
RF_results.to_csv(file,index=False)