In [1]:
import pandas as pd
import os
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.calibration import CalibratedClassifierCV
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix

In [2]:
csv_path = os.path.join("..", "..", "data","LINKED_DATA", "TSR_EHR", "TSR_6_CLEANED.csv")
tsr_6 = pd.read_csv(csv_path)
tsr_6.head()

Unnamed: 0,height_nm,weight_nm,edu_id,pro_id,opc_id,ih_fl,ivtpamg_nm,hospitalised_time,nivtpa_id,nivtpa1_fl,...,nihs_8_out,nihs_9_out,nihs_10_out,nihs_11_out,total_out,SexName,Age,mrs_tx_1,mrs_tx_3,mrs_tx_6
0,153.0,62.0,3,1,3,0,0.0,8.0,0,999,...,1,0,1,0,4,0,67.0,1,1,1
1,152.0,62.0,3,1,2,0,0.0,4.0,0,999,...,1,0,0,0,1,0,69.0,1,0,0
2,148.0,56.0,2,1,2,0,0.0,5.0,0,999,...,1,0,0,0,2,0,71.0,0,0,0
3,152.0,56.0,4,1,2,0,0.0,3.0,1,0,...,0,0,0,0,0,0,71.0,0,0,0
4,160.0,60.0,2,1,3,0,0.0,4.0,0,999,...,0,0,0,0,4,0,62.0,3,3,3


In [3]:
tsr_6_input = tsr_6.drop(["mrs_tx_6"], axis=1)
tsr_6_input[tsr_6_input == "N"] = 0
tsr_6_input[tsr_6_input == "Y"] = 1
tsr_6_input = tsr_6_input.astype("float64")
tsr_6_input = np.array(tsr_6_input.values)

tsr_6_input_nomrs = tsr_6.drop(["mrs_tx_6", "mrs_tx_3", "mrs_tx_1"], axis=1)
tsr_6_input_nomrs[tsr_6_input_nomrs == "N"] = 0
tsr_6_input_nomrs[tsr_6_input_nomrs == "Y"] = 1
tsr_6_input_nomrs = tsr_6_input_nomrs.astype("float64")
tsr_6_input_nomrs = np.array(tsr_6_input_nomrs.values)

# 6 classes

In [4]:
tsr_6_output = tsr_6.mrs_tx_6
tsr_6_output = tsr_6_output.astype("float64")
tsr_6_output = np.array(tsr_6_output.values)

## SVM

In [5]:
svc = CalibratedClassifierCV(LinearSVC(penalty = "l2", dual=False, loss = "squared_hinge", C = 1, multi_class = "ovr", 
                                       random_state = 19)) 
svc_scores = cross_val_score(svc,tsr_6_input,tsr_6_output,cv = 10,scoring='accuracy')
print(svc_scores)
print(svc_scores.mean(), svc_scores.std())

[0.51682692 0.56144578 0.58072289 0.56144578 0.54698795 0.57349398
 0.59759036 0.62891566 0.62650602 0.47710843]
0.5671043790546803 0.04427265406672428


In [6]:
svc.fit(tsr_6_input,tsr_6_output)
svc_predict =svc.predict_proba(tsr_6_input)
print(svc_predict)

[[2.62630828e-01 4.36144827e-01 1.79607732e-01 5.90131023e-02
  6.10074427e-02 1.59606671e-03]
 [4.82963580e-01 3.30278037e-01 1.16349493e-01 2.75117104e-02
  4.28076717e-02 8.95077371e-05]
 [4.35502619e-01 3.90729605e-01 9.92472343e-02 2.42406221e-02
  5.00859597e-02 1.93960671e-04]
 ...
 [1.96591181e-04 5.14861668e-02 8.44364505e-02 7.84446130e-02
  3.46359020e-01 4.39077158e-01]
 [4.47502548e-01 3.57509939e-01 1.15367133e-01 5.27176679e-02
  2.68932534e-02 9.45921566e-06]
 [1.95877948e-01 5.84952461e-01 1.27338808e-01 5.20396461e-02
  3.85399454e-02 1.25119145e-03]]


In [7]:
svc_pred = cross_val_predict(svc,tsr_6_input,tsr_6_output,cv = 10)
confusion_matrix(tsr_6_output, svc_pred)

array([[480, 197,   1,   6,   1,   2],
       [191, 933,  22,  38,  12,   3],
       [  8, 406,  28,  81,  42,   6],
       [  2, 131,  41, 213, 172,  20],
       [  3,  22,   8, 121, 297, 156],
       [  1,  12,   1,  18,  73, 403]], dtype=int64)

In [8]:
svc1 = CalibratedClassifierCV(LinearSVC(penalty = "l2", dual=False, loss = "squared_hinge", C = 1, multi_class = "ovr", 
                                       random_state = 19)) 
svc_scores1 = cross_val_score(svc1,tsr_6_input_nomrs,tsr_6_output,cv = 10,scoring='accuracy')
print(svc_scores1)
print(svc_scores1.mean(), svc_scores1.std())

[0.41826923 0.46987952 0.48192771 0.45060241 0.44819277 0.47951807
 0.48192771 0.48915663 0.48674699 0.38072289]
0.4586943929564411 0.03353015888203472


In [9]:
svc1.fit(tsr_6_input_nomrs,tsr_6_output)
svc_predict1 =svc1.predict_proba(tsr_6_input_nomrs)
print(svc_predict1)

[[0.24917628 0.40797357 0.17433398 0.06990812 0.07102823 0.02757982]
 [0.32759771 0.37114698 0.15689171 0.05004688 0.08238716 0.01192955]
 [0.24366189 0.43176777 0.14583386 0.05374681 0.10052211 0.02446756]
 ...
 [0.01396494 0.06286017 0.11007298 0.10818105 0.36008193 0.34483893]
 [0.13830491 0.40890712 0.2218385  0.14608549 0.07497908 0.00988491]
 [0.33674387 0.48128917 0.09533672 0.03949874 0.03237625 0.01475526]]


In [10]:
svc_pred1 = cross_val_predict(svc,tsr_6_input,tsr_6_output,cv = 10)
confusion_matrix(tsr_6_output, svc_pred1)

array([[480, 197,   1,   6,   1,   2],
       [191, 933,  22,  38,  12,   3],
       [  8, 406,  28,  81,  42,   6],
       [  2, 131,  41, 213, 172,  20],
       [  3,  22,   8, 121, 297, 156],
       [  1,  12,   1,  18,  73, 403]], dtype=int64)

## RF

In [11]:
rf = CalibratedClassifierCV(RandomForestClassifier(criterion = "gini", n_estimators = 15, bootstrap=True, random_state = 19,
                                                  max_features = 0.8)) 
rf_scores = cross_val_score(rf,tsr_6_input,tsr_6_output,cv = 10,scoring='accuracy')
print(rf_scores)
print(rf_scores.mean(), rf_scores.std())

[0.83894231 0.77831325 0.78313253 0.80240964 0.75421687 0.81927711
 0.84578313 0.86506024 0.87710843 0.77831325]
0.8142556765523633 0.039316092051628566


In [12]:
rf.fit(tsr_6_input,tsr_6_output)
rf_predict =rf.predict_proba(tsr_6_input)
print(rf_predict)

[[0.04000209 0.86645474 0.03114201 0.02557868 0.021572   0.01525048]
 [0.8675884  0.04180575 0.03009286 0.02481917 0.02092116 0.01477265]
 [0.85046666 0.05834707 0.03030459 0.0249512  0.02107068 0.0148598 ]
 ...
 [0.03266449 0.0457411  0.03321087 0.02754244 0.56290207 0.29793904]
 [0.83803154 0.07077873 0.03029367 0.02496977 0.021071   0.01485528]
 [0.08984111 0.80060191 0.03994975 0.03064247 0.02282925 0.01613552]]


In [13]:
rf_pred = cross_val_predict(rf,tsr_6_input,tsr_6_output,cv = 10)
confusion_matrix(tsr_6_output, rf_pred)

array([[523, 133,  23,   5,   1,   2],
       [ 59, 983, 129,  24,   2,   2],
       [  4,  46, 428,  80,  11,   2],
       [  1,   6,  15, 465,  87,   5],
       [  3,   3,   9,  23, 537,  32],
       [  1,   5,   5,   5,  48, 444]], dtype=int64)

In [14]:
rf1 = CalibratedClassifierCV(RandomForestClassifier(criterion = "gini", n_estimators = 15, bootstrap=True, random_state = 19,
                                                  max_features = 0.8)) 
rf_scores1 = cross_val_score(rf1,tsr_6_input_nomrs,tsr_6_output,cv = 10,scoring='accuracy')
print(rf_scores1)
print(rf_scores1.mean(), rf_scores1.std())

[0.54567308 0.52771084 0.5253012  0.47951807 0.50120482 0.54457831
 0.54698795 0.57349398 0.54939759 0.49638554]
0.529025139017609 0.027459224541674414


In [15]:
rf1.fit(tsr_6_input_nomrs,tsr_6_output)
rf_predict1 =rf1.predict_proba(tsr_6_input_nomrs)
print(rf_predict1)

[[0.07728163 0.74656812 0.06191197 0.04787064 0.04120978 0.02515784]
 [0.57465898 0.24627601 0.06232256 0.05131628 0.0406614  0.02476477]
 [0.68114596 0.1454417  0.05660159 0.04628846 0.03987006 0.03065223]
 ...
 [0.04903728 0.08993016 0.06395325 0.05959659 0.51558801 0.22189472]
 [0.54273872 0.22343855 0.11604724 0.04970156 0.04234806 0.02572587]
 [0.10491886 0.65762199 0.09732522 0.06482629 0.0483315  0.02697615]]


In [16]:
rf_pred1 = cross_val_predict(rf1,tsr_6_input,tsr_6_output,cv = 10)
confusion_matrix(tsr_6_output, rf_pred1)

array([[523, 133,  23,   5,   1,   2],
       [ 59, 983, 129,  24,   2,   2],
       [  4,  46, 428,  80,  11,   2],
       [  1,   6,  15, 465,  87,   5],
       [  3,   3,   9,  23, 537,  32],
       [  1,   5,   5,   5,  48, 444]], dtype=int64)

## XGBoost

In [17]:
xgb = CalibratedClassifierCV(XGBClassifier(booster = "gbtree", objective="multi:softprob", eval_metric = "auc", 
                                            use_label_encoder = False, random_state = 19)) 
xgb_scores = cross_val_score(xgb,tsr_6_input,tsr_6_output,cv = 10)
print(xgb_scores)
print(xgb_scores.mean(), xgb_scores.std())

[0.84375    0.77590361 0.79036145 0.78554217 0.75662651 0.8313253
 0.85783133 0.86746988 0.8746988  0.77831325]
0.8161822289156626 0.04121576140668579


In [18]:
xgb.fit(tsr_6_input,tsr_6_output)
xgb_predict =xgb.predict_proba(tsr_6_input)
print(xgb_predict)

[[0.04512145 0.84098348 0.04130014 0.03170251 0.02317658 0.01771583]
 [0.83629073 0.0563939  0.03875986 0.02994878 0.02192818 0.01667855]
 [0.83547843 0.0570138  0.03889254 0.02997052 0.0219563  0.01668841]
 ...
 [0.04092791 0.05704307 0.04076101 0.03114246 0.64629912 0.18382643]
 [0.83733321 0.05547512 0.03871359 0.02991198 0.02190423 0.01666188]
 [0.04371344 0.84093849 0.04258106 0.03183324 0.02319587 0.0177379 ]]


In [19]:
xgb_pred = cross_val_predict(xgb,tsr_6_input,tsr_6_output,cv = 10)
confusion_matrix(tsr_6_output, xgb_pred)

array([[528, 130,  22,   4,   1,   2],
       [ 57, 988, 126,  24,   2,   2],
       [  4,  46, 428,  80,  11,   2],
       [  1,   7,  14, 467,  85,   5],
       [  3,   3,   9,  26, 533,  33],
       [  1,   4,   6,   6,  47, 444]], dtype=int64)

In [20]:
xgb1 = CalibratedClassifierCV(XGBClassifier(booster = "gbtree", objective="multi:softprob", eval_metric = "auc", 
                                            use_label_encoder = False, random_state = 19)) 
xgb_scores1 = cross_val_score(xgb1,tsr_6_input_nomrs,tsr_6_output,cv = 10)
print(xgb_scores1)
print(xgb_scores1.mean(), xgb_scores1.std())

[0.55769231 0.51325301 0.49879518 0.45783133 0.46746988 0.54216867
 0.53975904 0.5373494  0.55180723 0.45783133]
0.5123957367933272 0.03744529465468969


In [21]:
xgb1.fit(tsr_6_input_nomrs,tsr_6_output)
xgb_predict1 =xgb1.predict_proba(tsr_6_input_nomrs)
print(xgb_predict1)

[[0.12025963 0.6174519  0.09060211 0.07298136 0.06138691 0.03731809]
 [0.58102131 0.17824582 0.08284752 0.06721667 0.05655691 0.03411177]
 [0.61201422 0.15046757 0.0817671  0.06619166 0.0558391  0.03372034]
 ...
 [0.06770862 0.09848145 0.08031112 0.06551596 0.52392851 0.16405434]
 [0.55172081 0.14411421 0.1418541  0.07025728 0.05743581 0.03461779]
 [0.11449874 0.59234981 0.11380467 0.07778259 0.06327538 0.03828881]]


In [22]:
xgb_pred1 = cross_val_predict(xgb1,tsr_6_input,tsr_6_output,cv = 10)
confusion_matrix(tsr_6_output, xgb_pred1)

array([[528, 130,  22,   4,   1,   2],
       [ 57, 988, 126,  24,   2,   2],
       [  4,  46, 428,  80,  11,   2],
       [  1,   7,  14, 467,  85,   5],
       [  3,   3,   9,  26, 533,  33],
       [  1,   4,   6,   6,  47, 444]], dtype=int64)

# 2 classes

In [23]:
tsr_6_output[(tsr_6_output == 0)|(tsr_6_output == 1)|(tsr_6_output == 2)] = 0
tsr_6_output[(tsr_6_output == 3)|(tsr_6_output == 4)|(tsr_6_output == 5)] = 1

## SVM

In [24]:
svc2 = CalibratedClassifierCV(LinearSVC(penalty = "l2", dual=False, loss = "squared_hinge", C = 1, multi_class = "ovr", 
                                       random_state = 19)) 
svc_scores2 = cross_val_score(svc2,tsr_6_input,tsr_6_output,cv = 10,scoring='accuracy')
print(svc_scores2)
print(svc_scores2.mean(), svc_scores2.std())

[0.90384615 0.9373494  0.95180723 0.95421687 0.9373494  0.95421687
 0.95421687 0.95421687 0.94457831 0.92289157]
0.941468952734013 0.016003503213462875


In [25]:
svc2.fit(tsr_6_input,tsr_6_output)
svc_predict2 =svc2.predict_proba(tsr_6_input)
print(svc_predict2)

[[9.91738275e-01 8.26172474e-03]
 [9.99194398e-01 8.05601774e-04]
 [9.98234342e-01 1.76565816e-03]
 ...
 [1.42816728e-03 9.98571833e-01]
 [9.99878052e-01 1.21948397e-04]
 [9.94883991e-01 5.11600858e-03]]


In [26]:
svc_pred2 = cross_val_predict(svc2,tsr_6_input,tsr_6_output,cv = 10)
confusion_matrix(tsr_6_output, svc_pred2)

array([[2327,  130],
       [ 113, 1581]], dtype=int64)

In [27]:
svc3 = CalibratedClassifierCV(LinearSVC(penalty = "l2", dual=False, loss = "squared_hinge", C = 1, multi_class = "ovr", 
                                       random_state = 19)) 
svc_scores3 = cross_val_score(svc3,tsr_6_input_nomrs,tsr_6_output,cv = 10,scoring='accuracy')
print(svc_scores3)
print(svc_scores3.mean(), svc_scores3.std())

[0.82932692 0.87951807 0.8939759  0.88674699 0.87951807 0.88915663
 0.8313253  0.83855422 0.85301205 0.82650602]
0.8607640176088971 0.026209961518877074


In [28]:
svc3.fit(tsr_6_input_nomrs,tsr_6_output)
svc_predict3 =svc3.predict_proba(tsr_6_input_nomrs)
print(svc_predict3)

[[0.94668835 0.05331165]
 [0.96648344 0.03351656]
 [0.94636012 0.05363988]
 ...
 [0.01299893 0.98700107]
 [0.95043305 0.04956695]
 [0.98923322 0.01076678]]


In [29]:
svc_pred3 = cross_val_predict(svc3,tsr_6_input,tsr_6_output,cv = 10)
confusion_matrix(tsr_6_output, svc_pred3)

array([[2327,  130],
       [ 113, 1581]], dtype=int64)

## RF

In [30]:
rf2 = CalibratedClassifierCV(RandomForestClassifier(criterion = "gini", n_estimators = 15, bootstrap=True, random_state = 19,
                                                  max_features = 0.8)) 
rf_scores2 = cross_val_score(rf2,tsr_6_input,tsr_6_output,cv = 10,scoring='accuracy')
print(rf_scores2)
print(rf_scores2.mean(), rf_scores2.std())

[0.92307692 0.94939759 0.9686747  0.97108434 0.93975904 0.95903614
 0.96144578 0.97108434 0.95421687 0.94698795]
0.9544763670064874 0.014515746268267491


In [31]:
rf2.fit(tsr_6_input,tsr_6_output)
rf_predict2 =rf2.predict_proba(tsr_6_input)
print(rf_predict2)

[[0.97970288 0.02029712]
 [0.97970288 0.02029712]
 [0.97970288 0.02029712]
 ...
 [0.03411972 0.96588028]
 [0.97692192 0.02307808]
 [0.97557557 0.02442443]]


In [32]:
rf_pred2 = cross_val_predict(rf2,tsr_6_input,tsr_6_output,cv = 10)
confusion_matrix(tsr_6_output, rf_pred2)

array([[2326,  131],
       [  58, 1636]], dtype=int64)

In [33]:
rf3 = CalibratedClassifierCV(RandomForestClassifier(criterion = "gini", n_estimators = 15, bootstrap=True, random_state = 19,
                                                  max_features = 0.8))  
rf_scores3 = cross_val_score(rf3,tsr_6_input_nomrs,tsr_6_output,cv = 10,scoring='accuracy')
print(rf_scores3)
print(rf_scores3.mean(), rf_scores3.std())

[0.81730769 0.87228916 0.89156627 0.88192771 0.8746988  0.88674699
 0.83614458 0.82650602 0.85301205 0.83855422]
0.8578753475440222 0.02556737220238637


In [34]:
rf3.fit(tsr_6_input_nomrs,tsr_6_output)
rf_predict3 =rf3.predict_proba(tsr_6_input_nomrs)
print(rf_predict3)

[[0.9151256  0.0848744 ]
 [0.94686082 0.05313918]
 [0.95137321 0.04862679]
 ...
 [0.12080764 0.87919236]
 [0.94791324 0.05208676]
 [0.93967523 0.06032477]]


In [35]:
rf_pred3 = cross_val_predict(rf3,tsr_6_input,tsr_6_output,cv = 10)
confusion_matrix(tsr_6_output, rf_pred3)

array([[2326,  131],
       [  58, 1636]], dtype=int64)

## XGBoost

In [36]:
xgb2 = CalibratedClassifierCV(XGBClassifier(booster = "gbtree", objective="binary:logistic", eval_metric = "auc", 
                                            use_label_encoder = False, random_state = 19))
xgb_scores2 = cross_val_score(xgb2,tsr_6_input,tsr_6_output,cv = 10)
print(xgb_scores2)
print(xgb_scores2.mean(), xgb_scores2.std())

[0.92548077 0.94939759 0.9686747  0.9686747  0.93975904 0.96385542
 0.96144578 0.96626506 0.95180723 0.94457831]
0.953993860055607 0.01365505069244317


In [37]:
xgb2.fit(tsr_6_input,tsr_6_output)
xgb_predict2 =xgb2.predict_proba(tsr_6_input)
print(xgb_predict2)

[[0.97595077 0.02404923]
 [0.97589151 0.02410849]
 [0.97584418 0.02415582]
 ...
 [0.05836251 0.94163749]
 [0.97595456 0.02404544]
 [0.97588442 0.02411558]]


In [38]:
xgb_pred2 = cross_val_predict(xgb2,tsr_6_input,tsr_6_output,cv = 10)
confusion_matrix(tsr_6_output, xgb_pred2)

array([[2331,  126],
       [  65, 1629]], dtype=int64)

In [39]:
xgb3 = CalibratedClassifierCV(XGBClassifier(booster = "gbtree", objective="binary:logistic", eval_metric = "auc", 
                                            use_label_encoder = False, random_state = 19))
xgb_scores3 = cross_val_score(xgb3,tsr_6_input_nomrs,tsr_6_output,cv = 10)
print(xgb_scores3)
print(xgb_scores3.mean(), xgb_scores3.std())

[0.84615385 0.88674699 0.88674699 0.87951807 0.8626506  0.90361446
 0.84096386 0.84096386 0.86024096 0.86024096]
0.8667840593141797 0.020438570391413908


In [40]:
xgb3.fit(tsr_6_input_nomrs,tsr_6_output)
xgb_predict3 =xgb3.predict_proba(tsr_6_input_nomrs)
print(xgb_predict3)

[[0.91659353 0.08340647]
 [0.91630272 0.08369728]
 [0.9141637  0.0858363 ]
 ...
 [0.10434855 0.89565145]
 [0.9155376  0.0844624 ]
 [0.91669914 0.08330086]]


In [41]:
xgb_pred3 = cross_val_predict(xgb3,tsr_6_input,tsr_6_output,cv = 10)
confusion_matrix(tsr_6_output, xgb_pred3)

array([[2331,  126],
       [  65, 1629]], dtype=int64)

# Summary

## Mean & Std

In [42]:
svc_mean = np.array([svc_scores.mean(), svc_scores.std(), svc_scores[0], svc_scores[1], svc_scores[2], svc_scores[3],
                     svc_scores[4], svc_scores[5], svc_scores[6], svc_scores[7], svc_scores[8], svc_scores[9]])
rf_mean = np.array([rf_scores.mean(), rf_scores.std(), rf_scores[0], rf_scores[1], rf_scores[2], rf_scores[3],rf_scores[4], 
                    rf_scores[5], rf_scores[6], rf_scores[7], rf_scores[8], rf_scores[9]])
xgb_mean = np.array([xgb_scores.mean(), xgb_scores.std(), xgb_scores[0], xgb_scores[1], xgb_scores[2], xgb_scores[3],
                     xgb_scores[4], xgb_scores[5], xgb_scores[6], xgb_scores[7], xgb_scores[8], xgb_scores[9]])

svc_mean1 = np.array([svc_scores1.mean(), svc_scores1.std(), svc_scores1[0], svc_scores1[1], svc_scores1[2], svc_scores1[3],
                     svc_scores1[4], svc_scores1[5], svc_scores1[6], svc_scores1[7], svc_scores1[8], svc_scores1[9]])
rf_mean1 = np.array([rf_scores1.mean(), rf_scores1.std(), rf_scores1[0], rf_scores1[1], rf_scores1[2], rf_scores1[3],rf_scores1[4], 
                    rf_scores1[5], rf_scores1[6], rf_scores1[7], rf_scores1[8], rf_scores1[9]])
xgb_mean1 = np.array([xgb_scores1.mean(), xgb_scores1.std(), xgb_scores1[0], xgb_scores1[1], xgb_scores1[2], xgb_scores1[3],
                     xgb_scores1[4], xgb_scores1[5], xgb_scores1[6], xgb_scores1[7], xgb_scores1[8], xgb_scores1[9]])

svc_mean2 = np.array([svc_scores2.mean(), svc_scores2.std(), svc_scores2[0], svc_scores2[1], svc_scores2[2], svc_scores2[3],
                     svc_scores2[4], svc_scores2[5], svc_scores2[6], svc_scores2[7], svc_scores2[8], svc_scores2[9]])
rf_mean2 = np.array([rf_scores2.mean(), rf_scores2.std(), rf_scores2[0], rf_scores2[1], rf_scores2[2], rf_scores2[3],rf_scores2[4], 
                    rf_scores2[5], rf_scores2[6], rf_scores2[7], rf_scores2[8], rf_scores2[9]])
xgb_mean2 = np.array([xgb_scores2.mean(), xgb_scores2.std(), xgb_scores2[0], xgb_scores2[1], xgb_scores2[2], xgb_scores2[3],
                     xgb_scores2[4], xgb_scores2[5], xgb_scores2[6], xgb_scores2[7], xgb_scores2[8], xgb_scores2[9]])

svc_mean3 = np.array([svc_scores3.mean(), svc_scores3.std(), svc_scores3[0], svc_scores3[1], svc_scores3[2], svc_scores3[3],
                     svc_scores3[4], svc_scores3[5], svc_scores3[6], svc_scores3[7], svc_scores3[8], svc_scores3[9]])
rf_mean3 = np.array([rf_scores3.mean(), rf_scores3.std(), rf_scores3[0], rf_scores3[1], rf_scores3[2], rf_scores3[3],rf_scores3[4], 
                    rf_scores3[5], rf_scores3[6], rf_scores3[7], rf_scores3[8], rf_scores3[9]])
xgb_mean3 = np.array([xgb_scores3.mean(), xgb_scores3.std(), xgb_scores3[0], xgb_scores3[1], xgb_scores3[2], xgb_scores3[3],
                     xgb_scores3[4], xgb_scores3[5], xgb_scores3[6], xgb_scores3[7], xgb_scores3[8], xgb_scores3[9]])

In [43]:
tsr_6_mean = pd.DataFrame([svc_mean, rf_mean, xgb_mean,svc_mean1, rf_mean1, xgb_mean1, svc_mean2, rf_mean2, xgb_mean2,
                          svc_mean3, rf_mean3, xgb_mean3]).T
tsr_6_mean.index = ["Mean", "Std", "mean_1", "mean_2", "mean_3", "mean_4", "mean_5", "mean_6", "mean_7", "mean_8", "mean_9", "mean_10"]
tsr_6_mean.columns = ["svc", 'rf', 'xgb', "svc1", 'rf1', 'xgb1', 'svc2', 'rf2', 'xgb2', "svc3", 'rf3', 'xgb3']

In [44]:
csv_save = os.path.join(".", "tsr_6_mean.csv")
tsr_6_mean.to_csv(csv_save, index = True)

## Predicted Probability

In [45]:
svc_predict_0 = svc_predict[:, 0]
svc_predict_1 = svc_predict[:, 1]
svc_predict_2 = svc_predict[:, 2]
svc_predict_3 = svc_predict[:, 3]
svc_predict_4 = svc_predict[:, 4]
svc_predict_5 = svc_predict[:, 5]

rf_predict_0 = rf_predict[:, 0]
rf_predict_1 = rf_predict[:, 1]
rf_predict_2 = rf_predict[:, 2]
rf_predict_3 = rf_predict[:, 3]
rf_predict_4 = rf_predict[:, 4]
rf_predict_5 = rf_predict[:, 5]

xgb_predict_0 = xgb_predict[:, 0]
xgb_predict_1 = xgb_predict[:, 1]
xgb_predict_2 = xgb_predict[:, 2]
xgb_predict_3 = xgb_predict[:, 3]
xgb_predict_4 = xgb_predict[:, 4]
xgb_predict_5 = xgb_predict[:, 5]

svc_predict1_0 = svc_predict1[:, 0]
svc_predict1_1 = svc_predict1[:, 1]
svc_predict1_2 = svc_predict1[:, 2]
svc_predict1_3 = svc_predict1[:, 3]
svc_predict1_4 = svc_predict1[:, 4]
svc_predict1_5 = svc_predict1[:, 5]

rf_predict1_0 = rf_predict1[:, 0]
rf_predict1_1 = rf_predict1[:, 1]
rf_predict1_2 = rf_predict1[:, 2]
rf_predict1_3 = rf_predict1[:, 3]
rf_predict1_4 = rf_predict1[:, 4]
rf_predict1_5 = rf_predict1[:, 5]

xgb_predict1_0 = xgb_predict1[:, 0]
xgb_predict1_1 = xgb_predict1[:, 1]
xgb_predict1_2 = xgb_predict1[:, 2]
xgb_predict1_3 = xgb_predict1[:, 3]
xgb_predict1_4 = xgb_predict1[:, 4]
xgb_predict1_5 = xgb_predict1[:, 5]

svc_predict2_0 = svc_predict2[:, 0]
svc_predict2_1 = svc_predict2[:, 1]

rf_predict2_0 = rf_predict2[:, 0]
rf_predict2_1 = rf_predict2[:, 1]

xgb_predict2_0 = xgb_predict2[:, 0]
xgb_predict2_1 = xgb_predict2[:, 1]

svc_predict3_0 = svc_predict3[:, 0]
svc_predict3_1 = svc_predict3[:, 1]

rf_predict3_0 = rf_predict3[:, 0]
rf_predict3_1 = rf_predict3[:, 1]

xgb_predict3_0 = xgb_predict3[:, 0]
xgb_predict3_1 = xgb_predict3[:, 1]

In [46]:
tsr_6_pred_prob = pd.DataFrame([svc_predict_0, svc_predict_1, svc_predict_2, svc_predict_3, svc_predict_4, svc_predict_5, 
                               rf_predict_0, rf_predict_1, rf_predict_2, rf_predict_3, rf_predict_4, rf_predict_5,
                               xgb_predict_0, xgb_predict_1, xgb_predict_2, xgb_predict_3, xgb_predict_4, xgb_predict_5,
                                svc_predict1_0, svc_predict1_1, svc_predict1_2, svc_predict1_3, svc_predict1_4, svc_predict1_5, 
                               rf_predict1_0, rf_predict1_1, rf_predict1_2, rf_predict1_3, rf_predict1_4, rf_predict1_5,
                               xgb_predict1_0, xgb_predict1_1, xgb_predict1_2, xgb_predict1_3, xgb_predict1_4, xgb_predict1_5,
                               svc_predict2_0, svc_predict2_1, rf_predict2_0, rf_predict2_1, xgb_predict2_0, xgb_predict2_1,
                               svc_predict3_0, svc_predict3_1, rf_predict3_0, rf_predict3_1, xgb_predict3_0, xgb_predict3_1]).T
tsr_6_pred_prob.columns = ["svc_predict_0", "svc_predict_1", "svc_predict_2", "svc_predict_3", "svc_predict_4", "svc_predict_5", 
                           "rf_predict_0", "rf_predict_1", "rf_predict_2", "rf_predict_3", "rf_predict_4", "rf_predict_5",
                           "xgb_predict_0", "xgb_predict_1", "xgb_predict_2", 'xgb_predict_3', "xgb_predict_4", "xgb_predict_5",
                           "svc_predict1_0", "svc_predict1_1", "svc_predict1_2", "svc_predict1_3", "svc_predict1_4", "svc_predict1_5", 
                           "rf_predict1_0", "rf_predict1_1", "rf_predict1_2", "rf_predict1_3", "rf_predict1_4", "rf_predict1_5",
                           "xgb_predict1_0", "xgb_predict1_1", "xgb_predict1_2", 'xgb_predict1_3', "xgb_predict1_4", "xgb_predict1_5",
                           "svc_predict2_0", "svc_predict2_1", "rf_predict2_0", "rf_predict2_1", "xgb_predict2_0", "xgb_predict2_1",
                           "svc_predict3_0", "svc_predict3_1", "rf_predict3_0", "rf_predict3_1", "xgb_predict3_0", "xgb_predict3_1"]

In [47]:
csv_save2 = os.path.join(".", "tsr_6_pred_prob.csv")
tsr_6_pred_prob.to_csv(csv_save2, index = False)