In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization, InputLayer, Input, add, dot, multiply, concatenate, Reshape
#from tensorflow.python.keras.optimizers import Adam, SGD
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC
import numpy as np
import pandas as pd
#from sklearn.model_selection import train_test_split

In [2]:
# reading data
data = pd.read_csv('PrOCTOR_sample_data_all.csv', header=0)
data1 = data.fillna(data.mean()['MolecularWeight':'Salivary Gland'])
data1["target"] = np.where(data1.iloc[:, 1] == "passed", 1, 0)

In [3]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# x, y variable
X = data1.iloc[:, 2:-1]
y = data1['target']
#print(X)

In [4]:
X = np.array(X)
X = scaler.fit_transform(X)
y = np.array(y)
y_1 = y.reshape(y.shape[0],-1)
#y = scaler.fit_transform(X)
des=X[:,:13]
body=X[:,13:]

In [5]:
print(des.shape)
print(body.shape)
print(y.shape)
print(y_1.shape)

(828, 13)
(828, 34)
(828,)
(828, 1)


In [6]:
_, des_col = des.shape
_, body_col = body.shape

In [7]:
def predict_change(predict):
    y_predict = []
    for i in range(len(predict)):
        y_predict = np.append(y_predict, predict[i])
    
    y_predict = np.array(y_predict)
    y_predict = y_predict.reshape(y_predict.shape[0],-1)
    return y_predict

In [8]:
from sklearn.metrics import roc_curve, precision_recall_curve, auc, f1_score, confusion_matrix, accuracy_score, matthews_corrcoef, accuracy_score
from imblearn.metrics import geometric_mean_score
import math

class Evaluation:
    def __init__(self, pred, y):
        self.y_pred = pred
        self.y = y
        
    def matrix(self):
        y_1 = self.y.reshape(self.y.shape[0],-1)
        y_pred = np.array(self.y_pred)
        y_pred = y_pred.reshape(y_pred.shape[0], -1)
        
        y_classify = []
        for i in range(len(self.y_pred)):
            if self.y_pred[i] >= 0.5:
                a = 1.
                y_classify.append(a)
            else:
                a = 0.
                y_classify.append(a)
        
        fpr,tpr,threshold = roc_curve(y_1 , y_pred, pos_label = 1)
        precision, recall, threshold = precision_recall_curve(y_1, y_pred, pos_label = 1)
        
        roc_auc = auc(fpr,tpr)
        auprc = auc(recall, precision)
        mean_precision = np.mean(precision)
        mean_recall = np.mean(recall)
        F1 = 2 * (mean_precision * mean_recall) / (mean_precision + mean_recall)
        # binary 
        accuracy = accuracy_score(y_1, y_classify)
        mcc = matthews_corrcoef(y_1, y_classify) 
        g_mean = geometric_mean_score(y_1, y_classify)
        confusion = confusion_matrix(y_1, y_classify)
        print(confusion.ravel())
        tn, fp, fn, tp = confusion.ravel()
        tpr = tp / (tp + fn)
        tnr = tn / (tn + fp)
        ppv = tp / (tp + fp)
        fnr = fn / (fn + tp)
        fpr = fp / (fp + tn)

        confu_precision = ppv 
        confu_recall = tpr # sensitivity
        confu_f1 = 2 * ((ppv * tpr) / (ppv + tpr))
        confu_accuracy = (tp + tn) / (tp + tn + fp + fn)
        confu_mcc = ((tp * tn)-(fp-fn))/ math.sqrt((tp + fp)*(tp + fn)*(tn + fp)*(tn + fn))
        confu_g_mean = math.sqrt(tpr * tnr)
        Optimized_precision = (confu_accuracy - abs(tnr-tpr)) / (tnr + tpr)

        print('공통 \nAUC :',roc_auc) # pb
        print("AUPRC :", auprc) # pb
        print("Optimized precision :", Optimized_precision)

        print("\nfunction 사용\nAccuracy :", accuracy) #pb
        print("Precision(pb) :",mean_precision )
        print("Recall(pb) :", mean_recall) # pb
        print("F1 score(pb) :", F1) #pb

        print("MCC :", mcc)
        print("G-mean :", g_mean)


        print("\nConfusion_matrix 사용 \n", confusion)
        print("Accuracy :", confu_accuracy)
        print("Precision :", confu_precision) 
        print("Recall :", confu_recall) 
        print("F1 score :", confu_f1) 

        print("MCC :", confu_mcc)
        print("G-mean :", confu_g_mean)
        
        return roc_auc, auprc, Optimized_precision, accuracy, mean_precision, mean_recall, F1, mcc, g_mean, confu_accuracy, confu_precision, confu_recall, confu_f1, confu_mcc, confu_g_mean;

In [21]:
from sklearn.model_selection import LeaveOneOut, KFold, cross_val_score
from imblearn.over_sampling import SMOTE
from sklearn.utils.class_weight import compute_class_weight
from sklearn.ensemble import RandomForestClassifier

# 모델설정
sm = SMOTE(random_state=202004)
loo = LeaveOneOut()
kfold = KFold(n_splits = 5, shuffle = True, random_state = 111)
kfold.get_n_splits(X)

5

In [14]:
dict_x = dict()
dict_y = dict()

class train_model:
    def __init__(self, x, y, pd_des, pd_body):
        self.X = x
        self.y = y
        self.pd_des = pd_des
        self.pd_body = pd_body

        
    def train_based(self):
        for train_index, test_index in kfold.split(self.X):
            print("TEST:", test_index)
            #des_train, des_test = des[train_index], des[test_index]
            #body_train, body_test = body[train_index], body[test_index]
            X_train, X_test = self.X[train_index], self.X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            rf_des = RandomForestClassifier(n_estimators=100, max_features = des_col)
            rf_des.fit(X_train, y_train)

            #rf_body = RandomForestClassifier(n_estimators=100, max_features = body_col)
            #rf_body.fit(body_train, y_train)

            pred_y = rf_des.predict_proba(X_test)
            #pred_body = rf_body.predict_proba(body_test)

            #print(pred_des[:,1].shape) # pass의 확률
            pred_y = pred_y[:,1]
            #pred_body = pred_body[:,1]
            print(pred_y)
            #print(pred_body)
            for i in range(len(test_index)):
                index = test_index[i]
                self.pd_des[index] = pred_y[i]
                #self.pd_body[index] = pred_body[i]
            
            #print(self.pd_des)
            #print(self.pd_body)
        return self.pd_des#, self.pd_body
    '''
    def train_smote(self):
        for train_index, test_index in kfold.split(self.X):
            print("TEST:", test_index)
            des_train, des_test = des[train_index], des[test_index]
            body_train, body_test = body[train_index], body[test_index]
            y_train, y_test = y[train_index], y[test_index]
            
            sm_des_train, sm_y_train = sm.fit_sample(des_train, y_train)
            sm_body_train, _ = sm.fit_sample(body_train, y_train)

            self.model.compile(optimizer= keras.optimizers.Adam(), loss='binary_crossentropy', metrics = ['accuracy'])
            kf_history = self.model.fit(x = [sm_des_train, sm_body_train], y = sm_y_train, epochs=50)

            y_pred = self.model.predict([des_test[:], body_test[:]])
            
            for i in range(len(test_index)):
                index = test_index[i]
                self.pd_sm_y[index] = y_pred[i]
            
        return self.pd_sm_y
    '''
    def train_weight(self):
        for train_index, test_index in kfold.split(self.X):
            print("TEST:", test_index)
            #des_train, des_test = des[train_index], des[test_index]
            #body_train, body_test = body[train_index], body[test_index]
            X_train, X_test = self.X[train_index], self.X[test_index]
            y_train, y_test = y[train_index], y[test_index]
                        
            neg, pos = np.bincount(y_train)
            total = neg + pos

            weight_for_0 = (1 / neg)*(total)/2.0 
            weight_for_1 = (1 / pos)*(total)/2.0

            class_weight = {0: weight_for_0, 1: weight_for_1}
            
            rf_des = RandomForestClassifier(n_estimators=100, max_features = des_col, class_weight = class_weight)
            rf_des.fit(X_train, y_train)

            #rf_body = RandomForestClassifier(n_estimators=100, max_features = body_col, class_weight = class_weight)
            #rf_body.fit(body_train, y_train)

            pred_des = rf_des.predict_proba(X_test)
            #pred_body = rf_body.predict_proba(body_test)

            pred_des = pred_des[:,1]
            #pred_body = pred_body[:,1]
            print(pred_des)
            #print(pred_body)
            for i in range(len(test_index)):
                index = test_index[i]
                self.pd_des[index] = pred_des[i]
                #self.pd_body[index] = pred_body[i]
            
            #print(self.pd_des)
            #print(self.pd_body)
        return self.pd_des#, self.pd_body

    def train_sm_weight(self):
        for train_index, test_index in kfold.split(self.X):
            print("TEST:", test_index)
            #des_train, des_test = des[train_index], des[test_index]
            #body_train, body_test = body[train_index], body[test_index]
            X_train, X_test = self.X[train_index], self.X[test_index]
            y_train, y_test = y[train_index], y[test_index]
                        
            neg, pos = np.bincount(y_train)
            total = neg + pos

            weight_for_0 = (1 / neg)*(total)/2.0 
            weight_for_1 = (1 / pos)*(total)/2.0

            class_weight = {0: weight_for_0, 1: weight_for_1}
            
            sm_X_train, sm_y_train = sm.fit_resample(X_train, y_train)
            #sm_body_train, _ = sm.fit_sample(body_train, y_train)
            
            rf_des = RandomForestClassifier(n_estimators=100, max_features = des_col, class_weight = class_weight)
            rf_des.fit(sm_X_train, sm_y_train)

            #rf_body = RandomForestClassifier(n_estimators=100, max_features = body_col, class_weight = class_weight)
            #rf_body.fit(sm_body_train, sm_y_train)

            pred_des = rf_des.predict_proba(X_test)
            #pred_body = rf_body.predict_proba(body_test)

            pred_des = pred_des[:,1]
            #pred_body = pred_body[:,1]
            print(pred_des)
            #print(pred_body)
            for i in range(len(test_index)):
                index = test_index[i]
                self.pd_des[index] = pred_des[i]
                #self.pd_body[index] = pred_body[i]

        return self.pd_des#, self.pd_body

In [47]:
rf_y, f_sm_y, rf_cw_y, rf_smcw_y = [], [], [], []
output_rf = train_model(X, y, dict_x, dict_y)
rf_y = output_rf.train_based()
final_result = Evaluation(predict_change(rf_y), y)

TEST: [  1  11  12  34  36  49  56  71  93 112 119 125 126 140 145 152 157 183
 190 194 198 205 209 210 211 214 233 237 257 264 272 298 303 323 328 340
 354 368 373 380 399 403 409 412 422 432 434 447 456 461 464 468 474 476
 491 496 506 518 520 523 528 532 538 547 553 556 589 611 624 629 645 658
 665 700 702 708 735 757 766 774 799 800 825]
[0.99 0.78 0.92 0.97 0.94 0.94 0.99 0.92 0.91 0.91 0.81 0.98 1.   0.82
 0.96 0.98 0.97 1.   0.82 1.   0.95 0.93 0.88 1.   1.   0.99 0.84 0.9
 1.   0.68 0.84 0.95 0.94 1.   0.84 0.75 0.94 0.98 0.92 0.98 0.96 0.91
 0.92 0.94 1.   0.75 0.7  0.96 1.   0.89 0.96 0.99 0.93 0.95 0.97 0.93
 0.95 0.99 0.86 0.91 0.83 0.92 0.92 0.95 0.86 0.99 0.61 0.98 0.91 0.76
 1.   0.99 0.98 0.77 1.   0.93 0.93 0.84 0.97 1.   1.   1.   0.93]
TEST: [  5  39  46  52  68 102 104 122 162 164 166 167 223 228 230 234 249 258
 277 280 281 291 326 331 332 338 342 366 374 375 381 383 389 401 413 421
 423 431 443 444 460 465 471 481 493 498 507 516 526 537 551 572 575 581
 586 604 6

In [26]:
rf_y_set = []

for i in range(20):
    rf_y, f_sm_y, rf_cw_y, rf_smcw_y = [], [], [], []
    output_rf = train_model(X, y, dict_x, dict_y)

    # sum model training
    rf_y = output_rf.train_based()
    #rf_cw_y = output_rf.train_weight()
    #rf_smcw_y = output_rf.train_sm_weight()
    
    final_result = Evaluation(predict_change(rf_y), y)

    rf_y_set = np.append(rf_y_set, final_result.matrix())
    #print(len(rf_y_set))
    
rf_y_np = np.array(rf_y_set)

rf_y_np = rf_y_np.reshape(-1,15)
rf_y_np.shape

TEST: [  1   5  11  12  34  36  39  46  49  52  56  68  71  93 102 104 112 119
 122 125 126 140 145 152 157 162 164 166 167 183 190 194 198 205 209 210
 211 214 223 228 230 233 234 237 249 257 258 264 272 277 280 281 291 298
 303 323 326 328 331 332 338 340 342 354 366 368 373 374 375 380 381 383
 389 399 401 403 409 412 413 421 422 423 431 432 434 443 444 447 456 460
 461 464 465 468 471 474 476 481 491 493 496 498 506 507 516 518 520 523
 526 528 532 537 538 547 551 553 556 572 575 581 586 589 604 606 611 613
 624 629 632 645 651 652 653 658 664 665 666 673 694 700 701 702 706 707
 708 709 718 723 735 744 745 754 757 766 767 769 774 780 783 789 799 800
 806 816 822 825]
[1.   0.96 0.62 0.86 0.89 0.95 0.68 0.85 0.86 1.   1.   0.84 0.91 0.81
 0.94 1.   0.85 0.81 1.   0.97 1.   0.89 0.93 0.95 0.99 1.   0.93 1.
 0.98 0.99 0.82 0.99 0.94 0.87 0.86 1.   1.   0.97 0.9  0.92 0.92 0.73
 0.99 0.87 0.85 1.   0.45 0.72 0.77 0.92 0.93 0.63 0.99 0.97 0.97 0.97
 0.97 0.76 0.94 0.92 0.88 0.72 0.84 0

  return f(*args, **kwargs)


[1.   0.98 0.68 0.9  0.92 0.86 0.62 0.82 0.89 1.   0.98 0.89 0.97 0.89
 0.94 1.   0.87 0.8  1.   0.95 1.   0.77 0.94 0.98 0.99 0.98 0.94 0.99
 0.97 0.99 0.77 0.96 0.91 0.87 0.85 1.   1.   0.97 0.92 0.93 0.93 0.81
 0.99 0.9  0.92 1.   0.43 0.78 0.86 0.92 0.95 0.62 0.98 0.93 0.89 0.97
 0.96 0.87 0.93 0.89 0.92 0.74 0.87 0.95 0.78 0.96 0.88 0.99 0.89 0.98
 0.98 0.76 0.84 0.96 0.97 0.91 0.98 0.9  0.96 0.88 1.   0.53 0.8  0.83
 0.56 0.93 0.99 0.92 0.99 0.81 0.83 0.96 0.96 0.97 1.   0.85 0.95 1.
 0.96 0.92 0.91 0.95 0.91 0.98 0.84 0.98 0.86 0.84 0.92 0.84 0.88 0.53
 0.9  0.9  0.72 0.81 0.97 0.93 0.73 0.89 0.86 0.66 0.99 0.98 0.98 1.
 0.9  0.82 0.95 1.   1.   0.68 1.   0.97 0.99 0.95 0.99 0.86 0.93 0.77
 0.93 0.97 1.   0.98 0.94 1.   0.99 0.94 0.96 0.96 0.26 0.96 0.79 0.98
 0.98 0.78 1.   0.99 1.   0.87 1.   1.   0.62 0.36 0.95 0.93]
TEST: [  0   3  10  13  16  20  22  23  24  25  26  32  43  59  65  67  72  73
  83  87  89  94  96  97 100 117 120 121 132 134 146 150 158 175 180 189
 192 195 

  return f(*args, **kwargs)


[  6  65   5 752]
공통 
AUC : 0.6796658418144269
AUPRC : 0.9528777598062302
Optimized precision : 0.006096100693626181

function 사용
Accuracy : 0.9154589371980676
Precision(pb) : 0.9336768221038984
Recall(pb) : 0.8208388375165125
F1 score(pb) : 0.8736293609800961
MCC : 0.19051030658901236
G-mean : 0.2897393165673664

Confusion_matrix 사용 
 [[  6  65]
 [  5 752]]
Accuracy : 0.9154589371980676
Precision : 0.9204406364749081
Recall : 0.9933949801849405
F1 score : 0.9555273189326556
MCC : 0.20256792093008907
G-mean : 0.2897393165673664
TEST: [  1   5  11  12  34  36  39  46  49  52  56  68  71  93 102 104 112 119
 122 125 126 140 145 152 157 162 164 166 167 183 190 194 198 205 209 210
 211 214 223 228 230 233 234 237 249 257 258 264 272 277 280 281 291 298
 303 323 326 328 331 332 338 340 342 354 366 368 373 374 375 380 381 383
 389 399 401 403 409 412 413 421 422 423 431 432 434 443 444 447 456 460
 461 464 465 468 471 474 476 481 491 493 496 498 506 507 516 518 520 523
 526 528 532 537 538 5

  return f(*args, **kwargs)


[  8  63   5 752]
공통 
AUC : 0.6636649487413252
AUPRC : 0.9491376505848188
Optimized precision : 0.03359230199640227

function 사용
Accuracy : 0.9178743961352657
Precision(pb) : 0.9326073959671307
Recall(pb) : 0.8275753979804311
F1 score(pb) : 0.8769577109046378
MCC : 0.2389040138144472
G-mean : 0.33456214482997443

Confusion_matrix 사용 
 [[  8  63]
 [  5 752]]
Accuracy : 0.9178743961352657
Precision : 0.9226993865030675
Recall : 0.9933949801849405
F1 score : 0.9567430025445293
MCC : 0.24967376149911882
G-mean : 0.3345621448299744
TEST: [  1   5  11  12  34  36  39  46  49  52  56  68  71  93 102 104 112 119
 122 125 126 140 145 152 157 162 164 166 167 183 190 194 198 205 209 210
 211 214 223 228 230 233 234 237 249 257 258 264 272 277 280 281 291 298
 303 323 326 328 331 332 338 340 342 354 366 368 373 374 375 380 381 383
 389 399 401 403 409 412 413 421 422 423 431 432 434 443 444 447 456 460
 461 464 465 468 471 474 476 481 491 493 496 498 506 507 516 518 520 523
 526 528 532 537 538 54

  return f(*args, **kwargs)


[1.   0.98 0.66 0.87 0.95 0.92 0.77 0.83 0.95 1.   0.99 0.86 0.87 0.86
 0.95 1.   0.81 0.9  1.   0.94 1.   0.8  0.95 0.98 0.98 1.   0.97 1.
 0.94 1.   0.77 0.99 0.89 0.83 0.82 1.   1.   0.95 0.93 0.86 0.96 0.7
 0.98 0.9  0.88 1.   0.4  0.69 0.81 0.97 0.93 0.62 0.98 0.95 0.94 0.97
 0.99 0.87 0.91 0.89 0.9  0.68 0.84 0.9  0.75 1.   0.91 0.97 0.87 0.98
 0.98 0.76 0.79 0.95 0.99 0.86 0.96 0.94 0.94 0.92 1.   0.56 0.74 0.78
 0.58 0.98 1.   0.99 0.99 0.79 0.85 0.96 0.99 1.   1.   0.9  0.93 1.
 0.98 0.93 0.93 0.95 0.92 0.99 0.89 0.98 0.85 0.93 0.93 0.86 0.84 0.53
 0.89 0.95 0.76 0.74 1.   0.94 0.82 0.8  0.87 0.66 1.   0.98 0.96 1.
 0.92 0.84 0.93 0.99 1.   0.71 1.   1.   0.99 0.92 0.99 0.84 0.92 0.77
 0.92 0.99 0.99 1.   0.94 0.99 0.99 0.94 0.93 0.95 0.18 0.8  0.78 0.96
 0.94 0.82 0.99 0.99 0.97 0.87 1.   0.99 0.58 0.36 0.97 0.97]
TEST: [  0   3  10  13  16  20  22  23  24  25  26  32  43  59  65  67  72  73
  83  87  89  94  96  97 100 117 120 121 132 134 146 150 158 175 180 189
 192 195 196

  return f(*args, **kwargs)


[  6  65   8 749]
공통 
AUC : 0.659106554784453
AUPRC : 0.9476817056407604
Optimized precision : 0.006435023480850452

function 사용
Accuracy : 0.9118357487922706
Precision(pb) : 0.9319562327137783
Recall(pb) : 0.8251639667199704
F1 score(pb) : 0.8753148498814426
MCC : 0.16057374204460284
G-mean : 0.28916080154780077

Confusion_matrix 사용 
 [[  6  65]
 [  8 749]]
Accuracy : 0.9118357487922706
Precision : 0.9201474201474201
Recall : 0.9894319682959049
F1 score : 0.9535327816677276
MCC : 0.179281754768974
G-mean : 0.28916080154780077
TEST: [  1   5  11  12  34  36  39  46  49  52  56  68  71  93 102 104 112 119
 122 125 126 140 145 152 157 162 164 166 167 183 190 194 198 205 209 210
 211 214 223 228 230 233 234 237 249 257 258 264 272 277 280 281 291 298
 303 323 326 328 331 332 338 340 342 354 366 368 373 374 375 380 381 383
 389 399 401 403 409 412 413 421 422 423 431 432 434 443 444 447 456 460
 461 464 465 468 471 474 476 481 491 493 496 498 506 507 516 518 520 523
 526 528 532 537 538 54

  return f(*args, **kwargs)


[1.   0.97 0.65 0.91 0.96 0.9  0.7  0.85 0.9  1.   0.98 0.87 0.9  0.8
 0.99 1.   0.91 0.87 1.   0.92 1.   0.88 0.95 0.98 0.95 1.   0.96 1.
 0.96 1.   0.79 0.99 0.94 0.9  0.85 0.99 1.   0.95 0.93 0.86 0.91 0.76
 0.97 0.97 0.94 1.   0.47 0.79 0.83 0.98 0.99 0.6  0.96 0.93 0.94 0.98
 0.98 0.83 0.91 0.89 0.91 0.64 0.88 0.95 0.77 0.96 0.91 0.99 0.83 0.95
 0.97 0.82 0.83 0.98 0.95 0.92 0.93 0.89 0.94 0.86 1.   0.57 0.75 0.75
 0.66 0.92 1.   0.94 1.   0.75 0.78 0.97 0.97 0.97 1.   0.84 0.97 1.
 0.97 0.91 0.95 0.96 0.94 0.99 0.91 0.98 0.91 0.94 0.83 0.83 0.86 0.44
 0.93 0.96 0.77 0.76 0.99 0.93 0.77 0.87 0.87 0.55 0.99 1.   0.97 1.
 0.88 0.84 0.93 1.   1.   0.62 1.   0.95 1.   0.93 1.   0.86 0.98 0.75
 0.96 0.98 1.   0.99 0.93 0.98 0.96 0.88 0.93 0.95 0.26 0.94 0.85 0.96
 0.96 0.88 1.   0.99 1.   0.88 1.   1.   0.57 0.31 0.9  0.94]
TEST: [  0   3  10  13  16  20  22  23  24  25  26  32  43  59  65  67  72  73
  83  87  89  94  96  97 100 117 120 121 132 134 146 150 158 175 180 189
 192 195 196

  return f(*args, **kwargs)


[0.99 0.96 0.68 0.9  0.98 0.86 0.7  0.9  0.93 1.   0.97 0.8  0.91 0.88
 0.94 1.   0.88 0.83 1.   0.95 1.   0.84 0.95 0.98 0.99 1.   0.94 1.
 0.99 1.   0.8  0.98 0.93 0.78 0.81 1.   1.   0.99 0.9  0.9  0.89 0.73
 0.99 0.93 0.89 1.   0.42 0.8  0.85 0.95 0.99 0.63 0.97 0.96 0.97 0.99
 0.96 0.8  0.96 0.91 0.91 0.73 0.9  0.92 0.72 0.96 0.87 0.96 0.83 0.99
 0.96 0.81 0.81 0.98 0.96 0.92 0.93 0.88 0.94 0.8  0.99 0.48 0.74 0.76
 0.65 0.95 0.99 0.94 1.   0.8  0.88 0.96 0.96 0.99 0.99 0.86 0.95 1.
 0.96 0.91 0.9  0.89 0.87 0.98 0.91 0.99 0.9  0.9  0.94 0.76 0.89 0.47
 0.92 0.89 0.86 0.81 1.   0.97 0.82 0.8  0.83 0.57 0.97 0.97 0.98 0.98
 0.94 0.76 0.89 1.   1.   0.67 1.   0.99 0.98 0.97 0.98 0.81 0.92 0.75
 0.91 0.97 1.   0.97 0.95 0.99 0.97 0.88 0.93 0.97 0.22 0.9  0.77 0.95
 0.93 0.8  0.99 0.99 1.   0.89 1.   1.   0.56 0.3  0.89 0.92]
TEST: [  0   3  10  13  16  20  22  23  24  25  26  32  43  59  65  67  72  73
  83  87  89  94  96  97 100 117 120 121 132 134 146 150 158 175 180 189
 192 195 

  return f(*args, **kwargs)


[  6  65   7 750]
공통 
AUC : 0.6509200513517033
AUPRC : 0.9462396908440116
Optimized precision : 0.006321771630795837

function 사용
Accuracy : 0.9130434782608695
Precision(pb) : 0.931655097212988
Recall(pb) : 0.8277992969572129
F1 score(pb) : 0.8766620345885601
MCC : 0.1695082855427888
G-mean : 0.2893537684038104

Confusion_matrix 사용 
 [[  6  65]
 [  7 750]]
Accuracy : 0.9130434782608695
Precision : 0.9202453987730062
Recall : 0.9907529722589168
F1 score : 0.9541984732824428
MCC : 0.18614482184946052
G-mean : 0.2893537684038104
TEST: [  1   5  11  12  34  36  39  46  49  52  56  68  71  93 102 104 112 119
 122 125 126 140 145 152 157 162 164 166 167 183 190 194 198 205 209 210
 211 214 223 228 230 233 234 237 249 257 258 264 272 277 280 281 291 298
 303 323 326 328 331 332 338 340 342 354 366 368 373 374 375 380 381 383
 389 399 401 403 409 412 413 421 422 423 431 432 434 443 444 447 456 460
 461 464 465 468 471 474 476 481 491 493 496 498 506 507 516 518 520 523
 526 528 532 537 538 547

  return f(*args, **kwargs)


[  7  64   6 751]
공통 
AUC : 0.6619625281411057
AUPRC : 0.9493125772634221
Optimized precision : 0.020149633189773808

function 사용
Accuracy : 0.9154589371980676
Precision(pb) : 0.9326305944210043
Recall(pb) : 0.8321877169021337
F1 score(pb) : 0.8795508524641404
MCC : 0.204206149678618
G-mean : 0.3127460796424784

Confusion_matrix 사용 
 [[  7  64]
 [  6 751]]
Accuracy : 0.9154589371980676
Precision : 0.9214723926380368
Recall : 0.9920739762219286
F1 score : 0.955470737913486
MCC : 0.2178673860412754
G-mean : 0.31274607964247836
TEST: [  1   5  11  12  34  36  39  46  49  52  56  68  71  93 102 104 112 119
 122 125 126 140 145 152 157 162 164 166 167 183 190 194 198 205 209 210
 211 214 223 228 230 233 234 237 249 257 258 264 272 277 280 281 291 298
 303 323 326 328 331 332 338 340 342 354 366 368 373 374 375 380 381 383
 389 399 401 403 409 412 413 421 422 423 431 432 434 443 444 447 456 460
 461 464 465 468 471 474 476 481 491 493 496 498 506 507 516 518 520 523
 526 528 532 537 538 547 

  return f(*args, **kwargs)


[1.   0.99 0.71 0.88 0.97 0.82 0.7  0.88 0.93 1.   1.   0.87 0.88 0.9
 0.98 1.   0.91 0.91 1.   0.94 1.   0.86 0.98 0.98 1.   1.   0.95 0.99
 0.99 0.99 0.81 0.99 0.92 0.88 0.86 1.   1.   0.92 0.93 0.97 0.97 0.8
 0.99 0.91 0.92 1.   0.39 0.73 0.83 0.95 0.95 0.71 0.99 0.94 0.92 0.96
 0.99 0.86 0.91 0.93 0.94 0.69 0.89 0.95 0.75 0.98 0.87 0.98 0.78 1.
 0.99 0.77 0.89 0.95 0.96 0.94 0.96 0.93 0.96 0.9  1.   0.46 0.79 0.73
 0.64 0.96 1.   0.92 0.96 0.77 0.89 0.97 0.97 0.98 1.   0.87 0.95 1.
 0.99 0.88 0.89 0.9  0.93 0.97 0.92 0.96 0.89 0.9  0.93 0.7  0.93 0.43
 0.91 0.92 0.81 0.78 0.99 0.93 0.74 0.74 0.81 0.59 0.99 1.   0.95 1.
 0.89 0.86 0.93 1.   1.   0.63 1.   0.97 0.98 0.93 1.   0.9  0.93 0.65
 0.89 0.97 1.   0.99 0.89 0.98 0.98 0.89 0.88 0.99 0.19 0.95 0.85 0.97
 0.98 0.87 1.   0.98 1.   0.85 1.   1.   0.67 0.25 0.9  0.97]
TEST: [  0   3  10  13  16  20  22  23  24  25  26  32  43  59  65  67  72  73
  83  87  89  94  96  97 100 117 120 121 132 134 146 150 158 175 180 189
 192 195 196 

  return f(*args, **kwargs)


[  6  65   6 751]
공통 
AUC : 0.6655255177033136
AUPRC : 0.94889439538073
Optimized precision : 0.006208797708997964

function 사용
Accuracy : 0.9142512077294686
Precision(pb) : 0.9318647795068122
Recall(pb) : 0.8365918097754292
F1 score(pb) : 0.8816619498361512
MCC : 0.17941656991522875
G-mean : 0.2895466066580737

Confusion_matrix 사용 
 [[  6  65]
 [  6 751]]
Accuracy : 0.9142512077294686
Precision : 0.9203431372549019
Recall : 0.9920739762219286
F1 score : 0.9548633184996821
MCC : 0.19384487036273623
G-mean : 0.2895466066580737
TEST: [  1   5  11  12  34  36  39  46  49  52  56  68  71  93 102 104 112 119
 122 125 126 140 145 152 157 162 164 166 167 183 190 194 198 205 209 210
 211 214 223 228 230 233 234 237 249 257 258 264 272 277 280 281 291 298
 303 323 326 328 331 332 338 340 342 354 366 368 373 374 375 380 381 383
 389 399 401 403 409 412 413 421 422 423 431 432 434 443 444 447 456 460
 461 464 465 468 471 474 476 481 491 493 496 498 506 507 516 518 520 523
 526 528 532 537 538 547

  return f(*args, **kwargs)


[1.   0.98 0.68 0.9  0.96 0.92 0.76 0.87 0.89 1.   0.99 0.87 0.89 0.84
 0.98 0.99 0.85 0.87 0.99 0.97 1.   0.82 0.94 0.98 0.99 0.99 0.95 1.
 0.96 0.99 0.85 1.   0.92 0.89 0.8  0.99 0.98 0.98 0.92 0.94 0.9  0.8
 0.99 0.92 0.9  1.   0.41 0.71 0.88 0.98 0.99 0.63 0.99 0.94 0.98 0.97
 0.97 0.85 0.96 0.9  0.91 0.67 0.87 0.92 0.78 0.97 0.94 1.   0.91 0.99
 0.97 0.74 0.84 0.93 0.94 0.88 0.94 0.91 0.94 0.82 1.   0.62 0.75 0.79
 0.67 0.98 0.99 0.97 0.99 0.83 0.87 0.97 0.99 1.   1.   0.91 0.95 1.
 0.95 0.89 0.9  0.89 0.95 0.96 0.91 0.98 0.88 0.92 0.95 0.77 0.89 0.44
 0.92 0.94 0.82 0.84 1.   0.93 0.71 0.89 0.88 0.61 0.99 0.98 0.94 0.98
 0.91 0.85 0.92 1.   0.99 0.73 1.   0.99 0.99 0.97 0.99 0.8  0.95 0.77
 0.92 1.   1.   0.98 0.91 0.97 0.99 0.85 0.91 0.97 0.23 0.95 0.8  0.98
 0.96 0.88 0.99 0.97 0.99 0.84 0.99 1.   0.52 0.32 0.91 0.96]
TEST: [  0   3  10  13  16  20  22  23  24  25  26  32  43  59  65  67  72  73
  83  87  89  94  96  97 100 117 120 121 132 134 146 150 158 175 180 189
 192 195 1

  return f(*args, **kwargs)


[  8  63   6 751]
공통 
AUC : 0.6582506930619385
AUPRC : 0.9474247746829547
Optimized precision : 0.033735004013899866

function 사용
Accuracy : 0.9166666666666666
Precision(pb) : 0.9319419477780918
Recall(pb) : 0.8304860846785931
F1 score(pb) : 0.8782938141072618
MCC : 0.22748620224235377
G-mean : 0.3343396225939631

Confusion_matrix 사용 
 [[  8  63]
 [  6 751]]
Accuracy : 0.9166666666666666
Precision : 0.9226044226044227
Recall : 0.9920739762219286
F1 score : 0.9560789306174411
MCC : 0.24045655231691782
G-mean : 0.33433962259396305
TEST: [  1   5  11  12  34  36  39  46  49  52  56  68  71  93 102 104 112 119
 122 125 126 140 145 152 157 162 164 166 167 183 190 194 198 205 209 210
 211 214 223 228 230 233 234 237 249 257 258 264 272 277 280 281 291 298
 303 323 326 328 331 332 338 340 342 354 366 368 373 374 375 380 381 383
 389 399 401 403 409 412 413 421 422 423 431 432 434 443 444 447 456 460
 461 464 465 468 471 474 476 481 491 493 496 498 506 507 516 518 520 523
 526 528 532 537 538 

  return f(*args, **kwargs)


[  8  63   8 749]
공통 
AUC : 0.6636835544309451
AUPRC : 0.9486608031066219
Optimized precision : 0.03402143431815221

function 사용
Accuracy : 0.9142512077294686
Precision(pb) : 0.9331557603667706
Recall(pb) : 0.8261740992119527
F1 score(pb) : 0.8764122492982271
MCC : 0.20768205987017752
G-mean : 0.3338941332254215

Confusion_matrix 사용 
 [[  8  63]
 [  8 749]]
Accuracy : 0.9142512077294686
Precision : 0.9224137931034483
Recall : 0.9894319682959049
F1 score : 0.9547482472912683
MCC : 0.2246735403515386
G-mean : 0.33389413322542144
TEST: [  1   5  11  12  34  36  39  46  49  52  56  68  71  93 102 104 112 119
 122 125 126 140 145 152 157 162 164 166 167 183 190 194 198 205 209 210
 211 214 223 228 230 233 234 237 249 257 258 264 272 277 280 281 291 298
 303 323 326 328 331 332 338 340 342 354 366 368 373 374 375 380 381 383
 389 399 401 403 409 412 413 421 422 423 431 432 434 443 444 447 456 460
 461 464 465 468 471 474 476 481 491 493 496 498 506 507 516 518 520 523
 526 528 532 537 538 54

  return f(*args, **kwargs)


[1.   0.93 0.69 0.91 0.98 0.92 0.71 0.85 0.93 0.98 1.   0.87 0.94 0.83
 0.99 1.   0.89 0.83 1.   0.96 1.   0.8  0.97 0.98 0.96 1.   0.99 0.99
 0.98 1.   0.85 0.98 0.94 0.88 0.8  1.   0.99 0.94 0.91 0.94 0.91 0.73
 1.   0.91 0.92 1.   0.47 0.7  0.83 0.96 0.94 0.67 0.96 0.93 0.95 0.95
 0.97 0.84 0.94 0.92 0.88 0.72 0.88 0.94 0.77 0.98 0.89 0.99 0.81 0.95
 0.98 0.73 0.81 0.97 0.97 0.94 0.92 0.91 0.92 0.83 1.   0.47 0.72 0.74
 0.6  0.94 0.99 0.96 0.99 0.77 0.9  0.98 0.94 1.   1.   0.86 0.95 1.
 0.95 0.91 0.87 0.93 0.94 0.96 0.89 0.97 0.94 0.91 0.91 0.82 0.91 0.47
 0.91 0.95 0.74 0.8  0.96 0.86 0.79 0.87 0.89 0.67 1.   0.98 0.96 1.
 0.98 0.82 0.91 1.   1.   0.71 1.   0.96 1.   0.96 0.99 0.77 0.92 0.66
 0.93 0.97 1.   0.96 0.89 0.97 0.99 0.93 0.97 0.95 0.27 0.91 0.89 0.99
 0.94 0.79 1.   0.97 0.99 0.77 1.   1.   0.6  0.34 0.95 0.94]
TEST: [  0   3  10  13  16  20  22  23  24  25  26  32  43  59  65  67  72  73
  83  87  89  94  96  97 100 117 120 121 132 134 146 150 158 175 180 189
 192 195 

  return f(*args, **kwargs)


[1.   0.99 0.67 0.92 0.95 0.91 0.69 0.93 0.95 1.   0.99 0.86 0.89 0.86
 0.97 0.99 0.85 0.85 1.   0.93 0.99 0.82 0.97 0.98 0.98 0.99 0.95 1.
 0.96 1.   0.84 0.98 0.89 0.91 0.85 0.99 1.   0.98 0.94 0.91 0.93 0.7
 0.99 0.91 0.88 0.99 0.42 0.74 0.87 0.96 0.96 0.58 0.98 0.9  0.95 0.98
 0.97 0.84 0.95 0.9  0.88 0.74 0.89 0.96 0.77 0.98 0.94 1.   0.84 1.
 0.99 0.8  0.81 0.9  0.95 0.91 0.91 0.94 0.94 0.89 1.   0.6  0.76 0.77
 0.66 0.93 1.   0.97 0.96 0.77 0.87 0.96 0.97 0.98 1.   0.83 0.94 1.
 0.98 0.94 0.95 0.88 0.9  0.98 0.88 0.96 0.9  0.95 0.91 0.79 0.88 0.46
 0.9  0.94 0.79 0.73 0.99 0.94 0.78 0.85 0.84 0.67 1.   0.98 0.93 1.
 0.89 0.9  0.95 1.   1.   0.74 1.   0.99 1.   0.92 0.96 0.87 0.93 0.71
 0.89 0.97 1.   1.   0.94 0.99 1.   0.92 0.92 0.98 0.21 0.88 0.87 0.98
 0.93 0.84 1.   0.99 0.99 0.88 1.   1.   0.61 0.38 0.92 0.87]
TEST: [  0   3  10  13  16  20  22  23  24  25  26  32  43  59  65  67  72  73
  83  87  89  94  96  97 100 117 120 121 132 134 146 150 158 175 180 189
 192 195 196 2

  return f(*args, **kwargs)


[  7  64   8 749]
공통 
AUC : 0.6661022940815302
AUPRC : 0.9478316874532787
Optimized precision : 0.020406782483787998

function 사용
Accuracy : 0.9130434782608695
Precision(pb) : 0.9339205023738395
Recall(pb) : 0.8233099260701291
F1 score(pb) : 0.8751339691353417
MCC : 0.18479275133283987
G-mean : 0.3123293624958452

Confusion_matrix 사용 
 [[  7  64]
 [  8 749]]
Accuracy : 0.9130434782608695
Precision : 0.9212792127921279
Recall : 0.9894319682959049
F1 score : 0.954140127388535
MCC : 0.2026041008588967
G-mean : 0.31232936249584514
TEST: [  1   5  11  12  34  36  39  46  49  52  56  68  71  93 102 104 112 119
 122 125 126 140 145 152 157 162 164 166 167 183 190 194 198 205 209 210
 211 214 223 228 230 233 234 237 249 257 258 264 272 277 280 281 291 298
 303 323 326 328 331 332 338 340 342 354 366 368 373 374 375 380 381 383
 389 399 401 403 409 412 413 421 422 423 431 432 434 443 444 447 456 460
 461 464 465 468 471 474 476 481 491 493 496 498 506 507 516 518 520 523
 526 528 532 537 538 54

  return f(*args, **kwargs)


[1.   0.98 0.67 0.9  0.94 0.92 0.71 0.84 0.93 1.   1.   0.79 0.93 0.81
 0.96 0.99 0.94 0.88 0.99 0.98 0.99 0.84 0.92 0.99 0.97 0.98 0.98 1.
 0.98 1.   0.8  0.99 0.92 0.83 0.79 0.99 1.   0.94 0.89 0.89 0.87 0.75
 0.99 0.87 0.91 1.   0.4  0.76 0.8  0.95 0.93 0.62 0.96 0.95 0.94 0.96
 0.96 0.81 0.94 0.92 0.91 0.66 0.82 0.93 0.74 0.92 0.91 0.98 0.81 0.99
 0.96 0.78 0.75 0.94 0.93 0.85 0.95 0.95 0.96 0.87 1.   0.64 0.74 0.73
 0.62 0.94 0.98 0.96 1.   0.77 0.87 0.98 0.97 1.   1.   0.9  0.97 0.99
 0.98 0.93 0.93 0.96 0.87 0.98 0.85 0.97 0.96 0.87 0.9  0.77 0.89 0.54
 0.92 0.9  0.74 0.81 0.99 0.97 0.76 0.82 0.87 0.68 0.99 0.97 0.97 1.
 0.92 0.8  0.9  1.   0.99 0.76 1.   0.98 1.   0.98 1.   0.87 0.94 0.7
 0.94 0.97 0.99 0.98 0.91 0.99 0.98 0.92 0.85 0.92 0.2  0.89 0.82 0.97
 0.96 0.87 0.99 0.97 1.   0.86 1.   1.   0.61 0.31 0.93 0.91]
TEST: [  0   3  10  13  16  20  22  23  24  25  26  32  43  59  65  67  72  73
  83  87  89  94  96  97 100 117 120 121 132 134 146 150 158 175 180 189
 192 195 1

  return f(*args, **kwargs)


[  6  65   5 752]
공통 
AUC : 0.657646008149292
AUPRC : 0.9469804894154108
Optimized precision : 0.006096100693626181

function 사용
Accuracy : 0.9154589371980676
Precision(pb) : 0.9323027731308152
Recall(pb) : 0.8302845755994895
F1 score(pb) : 0.8783412780952489
MCC : 0.19051030658901236
G-mean : 0.2897393165673664

Confusion_matrix 사용 
 [[  6  65]
 [  5 752]]
Accuracy : 0.9154589371980676
Precision : 0.9204406364749081
Recall : 0.9933949801849405
F1 score : 0.9555273189326556
MCC : 0.20256792093008907
G-mean : 0.2897393165673664
TEST: [  1   5  11  12  34  36  39  46  49  52  56  68  71  93 102 104 112 119
 122 125 126 140 145 152 157 162 164 166 167 183 190 194 198 205 209 210
 211 214 223 228 230 233 234 237 249 257 258 264 272 277 280 281 291 298
 303 323 326 328 331 332 338 340 342 354 366 368 373 374 375 380 381 383
 389 399 401 403 409 412 413 421 422 423 431 432 434 443 444 447 456 460
 461 464 465 468 471 474 476 481 491 493 496 498 506 507 516 518 520 523
 526 528 532 537 538 54

  return f(*args, **kwargs)


(20, 15)

In [16]:
def new_result(data):
    auc = data[:,0]
    auprc = data[:,1]
    op = data[:,2]
    acc = data[:,3]
    precision = data[:,4]
    recall = data[:,5]
    f1 = data[:,6]
    mcc = data[:,7]
    g_mean = data[:,8]
    confu_acc = data[:,9]
    confu_precision = data[:,10]
    confu_recall = data[:,11]
    confu_f1 = data[:,12]
    confu_mcc = data[:,13]
    confu_g_mean = data[:,14]

    print('공통 \nAUC :\n',"[평균 :",format(auc.mean(), ".5f"), "] [표준편차 :",format(auc.std(), ".5f"),"]") # pb
    print("AUPRC :\n", "[평균 :",format(auprc.mean(), ".5f"), "] [표준편차 :",format(auprc.std(), ".5f"),"]")
    print("Optimized precision :\n","[평균 :",format(op.mean(), ".5f"), "] [표준편차 :",format(op.std(), ".5f"),"]")

    print("\nfunction 사용\nAccuracy :\n","[평균 :",format(acc.mean(), ".5f"), "] [표준편차 :",format(acc.std(), ".5f"),"]")
    print("Precision(pb) :\n", "[평균 :",format(precision.mean(), ".5f"), "] [표준편차 :",format(precision.std(), ".5f"),"]")
    print("Recall(pb) :\n", "[평균 :",format(recall.mean(), ".5f"), "] [표준편차 :",format(recall.std(), ".5f"),"]")
    print("F1 score(pb) :\n","[평균 :",format(f1.mean(), ".5f"), "] [표준편차 :",format(f1.std(), ".5f"),"]")
    print("MCC :\n", "[평균 :",format(mcc.mean(), ".5f"), "] [표준편차 :",format(mcc.std(), ".5f"),"]")
    print("G-mean :\n", "[평균 :",format(g_mean.mean(), ".5f"), "] [표준편차 :",format(g_mean.std(), ".5f"),"]")
    
    print("\nConfusion matrix 사용 \nAccuracy :\n", "[평균 :",format(confu_acc.mean(), ".5f"), "] [표준편차 :",format(confu_acc.std(), ".5f"),"]")
    print("Precision :\n", "[평균 :",format(confu_precision.mean(), ".5f"), "] [표준편차 :",format(confu_precision.std(), ".5f"),"]")
    print("Recall :\n", "[평균 :",format(confu_recall.mean(), ".5f"), "] [표준편차 :",format(confu_recall.std(), ".5f"),"]")
    print("F1 score :\n", "[평균 :",format(confu_f1.mean(), ".5f"), "] [표준편차 :",format(confu_f1.std(), ".5f"),"]")
    print("MCC :\n", "[평균 :",format(confu_mcc.mean(), ".5f"), "] [표준편차 :",format(confu_mcc.std(), ".5f"),"]")
    print("G-mean :\n", "[평균 :",format(confu_g_mean.mean(), ".5f"), "] [표준편차 :",format(confu_g_mean.std(), ".5f"),"]")
    

In [27]:
new_result(rf_y_np)

공통 
AUC :
 [평균 : 0.66133 ] [표준편차 : 0.00712 ]
AUPRC :
 [평균 : 0.94823 ] [표준편차 : 0.00175 ]
Optimized precision :
 [평균 : 0.01663 ] [표준편차 : 0.01230 ]

function 사용
Accuracy :
 [평균 : 0.91449 ] [표준편차 : 0.00182 ]
Precision(pb) :
 [평균 : 0.93263 ] [표준편차 : 0.00081 ]
Recall(pb) :
 [평균 : 0.82600 ] [표준편차 : 0.00638 ]
F1 score(pb) :
 [평균 : 0.87607 ] [표준편차 : 0.00336 ]
MCC :
 [평균 : 0.19265 ] [표준편차 : 0.02448 ]
G-mean :
 [평균 : 0.30633 ] [표준편차 : 0.02022 ]

Confusion matrix 사용 
Accuracy :
 [평균 : 0.91449 ] [표준편차 : 0.00182 ]
Precision :
 [평균 : 0.92114 ] [표준편차 : 0.00101 ]
Recall :
 [평균 : 0.99135 ] [표준편차 : 0.00164 ]
F1 score :
 [평균 : 0.95495 ] [표준편차 : 0.00097 ]
MCC :
 [평균 : 0.20762 ] [표준편차 : 0.02269 ]
G-mean :
 [평균 : 0.30633 ] [표준편차 : 0.02022 ]
