In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [None]:
# 加载数据集
data = pd.read_csv('data33.csv')

In [None]:
from sklearn.model_selection import train_test_split
X = data.drop(["PATIENT_UPDATE"], axis = 1)
Y = data["PATIENT_UPDATE"]

X_train,X_test,y_train,y_test= train_test_split(X,Y,test_size=0.3)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import auc
from sklearn.metrics import confusion_matrix
import seaborn as sns
def print_confusionMatrix(Y_TestLabels, PredictedLabels):
    confusionMatx = confusion_matrix(Y_TestLabels, PredictedLabels)
    
    precision = confusionMatx/confusionMatx.sum(axis = 0)
    
    recall = (confusionMatx.T/confusionMatx.sum(axis = 1)).T
    
    sns.set(font_scale=1.5)
    
    # confusionMatx = [[1, 2],
    #                  [3, 4]]
    # confusionMatx.T = [[1, 3],
    #                   [2, 4]]
    # confusionMatx.sum(axis = 1)  axis=0 corresponds to columns and axis=1 corresponds to rows in two diamensional array
    # confusionMatx.sum(axix =1) = [[3, 7]]
    # (confusionMatx.T)/(confusionMatx.sum(axis=1)) = [[1/3, 3/7]
    #                                                  [2/3, 4/7]]

    # (confusionMatx.T)/(confusionMatx.sum(axis=1)).T = [[1/3, 2/3]
    #                                                    [3/7, 4/7]]
    # sum of row elements = 1
    
    labels = ["0=No SEVER ","1=SEVER "]
    
    plt.figure(figsize=(16,7))
    sns.heatmap(confusionMatx, cmap = "YlGnBu_r", annot = True, fmt = ".1f", xticklabels=labels, yticklabels=labels)
    plt.title("Confusion Matrix", fontsize = 30)
    plt.xlabel('Predicted Class', fontsize = 20)
    plt.ylabel('Original Class', fontsize = 20)
    plt.tick_params(labelsize = 15)
    plt.xticks(rotation = 90)
    plt.show()

    
    print("-"*125)
    
    plt.figure(figsize=(16,7))
    sns.heatmap(precision, cmap = "YlGnBu_r", annot = True, fmt = ".2f", xticklabels=labels, yticklabels=labels)
    plt.title("Precision Matrix", fontsize = 30)
    plt.xlabel('Predicted Class', fontsize = 20)
    plt.ylabel('Original Class', fontsize = 20)
    plt.tick_params(labelsize = 15)
    plt.xticks(rotation = 90)
    plt.show()

    
    print("-"*125)
    
    plt.figure(figsize=(16,7))
    sns.heatmap(recall, cmap = "YlGnBu_r", annot = True, fmt = ".2f", xticklabels=labels, yticklabels=labels)
    plt.title("Recall Matrix", fontsize = 30)
    plt.xlabel('Predicted Class', fontsize = 20)
    plt.ylabel('Original Class', fontsize = 20)
    plt.tick_params(labelsize = 15)
    plt.xticks(rotation = 90)
    plt.show()

    
    sensitivity = confusionMatx[0,0]/(confusionMatx[0,0]+confusionMatx[1,0])
    print('Sensitivity : ', sensitivity )

    specificity = confusionMatx[1,1]/(confusionMatx[1,1]+confusionMatx[0,1])
    print('Specificity : ', specificity)
    
def plot_roc_curve(fpr, tpr):
    plt.figure()
    lw = 2
    roc_auc = auc(fpr,tpr)
    plt.plot(fpr, tpr, color='darkorange',lw=lw, 
             label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0,1], [0,1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0, 1.0])
    plt.ylim([0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    #plt.title('')
    plt.legend(loc="lower right")
    plt.show()  
    
    
    

# LogisticRegression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
 
clf = LogisticRegression().fit(X_train, y_train)
clf.score(X_test, y_test)

In [None]:
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
precision, recall, f1 = precision_recall_fscore_support(y_test,y_pred,average='macro')[:-1]
print("accuracy: ", accuracy)
print("precision: ",precision)
print("recall: ",recall)
print("f1: ",f1)

In [None]:
print_confusionMatrix(y_test, y_pred)

In [None]:
from sklearn.metrics import roc_curve
y_score = clf.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test, y_score[:,1])
plot_roc_curve(fpr, tpr)    

In [None]:
df1 = pd.DataFrame({'y_test':y_test,'y_score[:,1]': y_score[:,1]})
df1.to_csv('Pictures/pre_lab_5.csv',index=False)

# KNeighborsClassifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier().fit(X_train, y_train)
clf.score(X_test, y_test)

In [None]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
precision, recall, f1 = precision_recall_fscore_support(y_test,y_pred,average='macro')[:-1]
print("accuracy: ", accuracy)
print("precision: ",precision)
print("recall: ",recall)
print("f1: ",f1)

In [None]:
print_confusionMatrix(y_test, y_pred)

In [None]:
from sklearn.metrics import roc_curve
y_score = clf.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test, y_score[:,1])
plot_roc_curve(fpr, tpr)    

In [None]:
df1 = pd.DataFrame({'y_test':y_test,'y_score[:,1]': y_score[:,1]})
df1.to_csv('Pictures/pre_lab_6.csv',index=False)

# GaussianNB

In [None]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB().fit(X_train, y_train)
clf.score(X_test, y_test)

In [None]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
precision, recall, f1 = precision_recall_fscore_support(y_test,y_pred,average='macro')[:-1]
print("accuracy: ", accuracy)
print("precision: ",precision)
print("recall: ",recall)
print("f1: ",f1)

In [None]:
print_confusionMatrix(y_test, y_pred)

In [None]:
from sklearn.metrics import roc_curve
y_score = clf.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test, y_score[:,1])
plot_roc_curve(fpr, tpr)    

In [None]:
df1 = pd.DataFrame({'y_test':y_test,'y_score[:,1]': y_score[:,1]})
df1.to_csv('Pictures/pre_lab_7.csv',index=False)

# DecisionTreeClassifier

In [None]:
from sklearn import tree
clf = tree.DecisionTreeClassifier(max_depth=3)
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

In [None]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
precision, recall, f1 = precision_recall_fscore_support(y_test,y_pred,average='macro')[:-1]
print("accuracy: ", accuracy)
print("precision: ",precision)
print("recall: ",recall)
print("f1: ",f1)

In [None]:
print_confusionMatrix(y_test, y_pred)

In [None]:
from sklearn.metrics import roc_curve
y_score = clf.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test, y_score[:,1])
plot_roc_curve(fpr, tpr)    

In [None]:
df1 = pd.DataFrame({'y_test':y_test,'y_score[:,1]': y_score[:,1]})
df1.to_csv('Pictures/pre_lab_8.csv',index=False)

# BaggingClassifier

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
clf = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
 
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

In [None]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
precision, recall, f1 = precision_recall_fscore_support(y_test,y_pred,average='macro')[:-1]
print("accuracy: ", accuracy)
print("precision: ",precision)
print("recall: ",recall)
print("f1: ",f1)

In [None]:
print_confusionMatrix(y_test, y_pred)

In [None]:
from sklearn.metrics import roc_curve
y_score = clf.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test, y_score[:,1])
plot_roc_curve(fpr, tpr)    

In [None]:
df1 = pd.DataFrame({'y_test':y_test,'y_score[:,1]': y_score[:,1]})
df1.to_csv('Pictures/pre_lab_9.csv',index=False)

# RandomForestClassifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = clf = RandomForestClassifier(n_estimators=10, max_depth=3, min_samples_split=12, random_state=0)
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

In [None]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
precision, recall, f1 = precision_recall_fscore_support(y_test,y_pred,average='macro')[:-1]
print("accuracy: ", accuracy)
print("precision: ",precision)
print("recall: ",recall)
print("f1: ",f1)

In [None]:
print_confusionMatrix(y_test, y_pred)

In [None]:
from sklearn.metrics import roc_curve
y_score = clf.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test, y_score[:,1])
plot_roc_curve(fpr, tpr)    

In [None]:
df1 = pd.DataFrame({'y_test':y_test,'y_score[:,1]': y_score[:,1]})
df1.to_csv('Pictures/pre_lab_10.csv',index=False)

# ExtraTreesClassifier

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
clf = ExtraTreesClassifier(n_estimators=10, max_depth=None, min_samples_split=2, random_state=0)
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

In [None]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
precision, recall, f1 = precision_recall_fscore_support(y_test,y_pred,average='macro')[:-1]
print("accuracy: ", accuracy)
print("precision: ",precision)
print("recall: ",recall)
print("f1: ",f1)

In [None]:
print_confusionMatrix(y_test, y_pred)

In [None]:
from sklearn.metrics import roc_curve
y_score = clf.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test, y_score[:,1])
plot_roc_curve(fpr, tpr)    

In [None]:
df1 = pd.DataFrame({'y_test':y_test,'y_score[:,1]': y_score[:,1]})
df1.to_csv('Pictures/pre_lab_11.csv',index=False)

# AdaBoostClassifier

In [None]:
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier()
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

In [None]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
precision, recall, f1 = precision_recall_fscore_support(y_test,y_pred,average='macro')[:-1]
print("accuracy: ", accuracy)
print("precision: ",precision)
print("recall: ",recall)
print("f1: ",f1)

In [None]:
print_confusionMatrix(y_test, y_pred)

In [None]:
from sklearn.metrics import roc_curve
y_score = clf.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test, y_score[:,1])
plot_roc_curve(fpr, tpr)    

In [None]:
df1 = pd.DataFrame({'y_test':y_test,'y_score[:,1]': y_score[:,1]})
df1.to_csv('Pictures/pre_lab_12.csv',index=False)

# GradientBoostingClassifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(learning_rate=0.3, max_depth=1, random_state=0)
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

In [None]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
precision, recall, f1 = precision_recall_fscore_support(y_test,y_pred,average='macro')[:-1]
print("accuracy: ", accuracy)
print("precision: ",precision)
print("recall: ",recall)
print("f1: ",f1)

In [None]:
print_confusionMatrix(y_test, y_pred)

In [None]:
from sklearn.metrics import roc_curve
y_score = clf.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test, y_score[:,1])
plot_roc_curve(fpr, tpr)    

In [None]:
df1 = pd.DataFrame({'y_test':y_test,'y_score[:,1]': y_score[:,1]})
df1.to_csv('Pictures/pre_lab_13.csv',index=False)