In [None]:
# This code is used to evaluate the performance of model

# Import packages
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score,precision_score,f1_score,recall_score, accuracy_score, roc_curve, auc,average_precision_score, precision_recall_curve, precision_recall_curve
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from scipy import interp
import torch
import os
import csv
from time import time
import pandas as pd
from glob import glob
import torch.nn.functional as F
from torch import nn
from sklearn.model_selection import KFold
from sklearn.metrics import average_precision_score
import statistics as s

# Import data: extract data from database
from google.colab import drive
# this will prompt for authorization.
drive.mount('/content/drive')

path = '/content/drive/My Drive/personal_files/topnet/data/stage_2/'
path_result = '/content/drive/My Drive/personal_files/topnet/results/'

## TOP-Net

In [None]:
# Part 0. define model and set random seed to get the same repeat results
_seed = 10  # 7,34,12,10
torch.manual_seed(_seed)            # 为CPU设置随机种子
torch.cuda.manual_seed(_seed)       # 为当前GPU设置随机种子
torch.cuda.manual_seed_all(_seed)   # 为所有GPU设置随机种子
np.random.seed(_seed)
torch.backends.cudnn.deterministic = True  # 可防止数值不稳定
torch.backends.cudnn.benchmark = False  # true 可提速, False 使得实验可重复


class TOPNet(nn.Module):

    def __init__(self, input_size):
        super(TOPNet, self).__init__()
        self.lstm = nn.LSTM(input_size, 32, batch_first=True, bidirectional=True)
        self.tanh = nn.Tanh()
        self.dense1 = nn.Linear(32*2*21, 8)
        self.dense2 = nn.Linear(8, 2)

    def forward(self, x):
        out, (hn, cn) = self.lstm(x)
        out = out.contiguous().view(x.size(0), 1, -1)
        out = self.dense1(out)
        out = self.tanh(out)  # need to test whether needing?
        out = self.dense2(out)
        return out

In [None]:
# Part 1. train the model (using cv result - hyper-parameters) and evaluate model
if __name__ == '__main__':
    # Part 1.0 prepare data
    # load positive and negative data
    str_pos = path + 'ed_pos_6slid_features.npz'
    str_neg = path + 'ed_negslid_features.npz'
    x_pos = np.load(str_pos)['arr_0']
    x_neg = np.load(str_neg)['arr_0']

    # load personal information (map, get the needed patients)
    cohort_info = pd.read_csv(path + 'cohort_demographic.csv')
    mapping_gender = {'M': 0, 'F': 1}
    mapping_at = {'EMERGENCY': 0, 'ELECTIVE': 1, 'URGENT': 2}
    mapping_fc = {'MICU': 0, 'SICU': 1, 'CCU': 2, 'CSRU': 3, 'TSICU': 4}
    cohort_info = cohort_info.replace(
        {'gender': mapping_gender, 'admission_type': mapping_at, 'first_careunit': mapping_fc})
    id_total = np.unique(np.vstack((x_pos, x_neg)).reshape(-1, 32)[:, -1]).astype(int)
    cohort_info = cohort_info[cohort_info.subject_id.isin(id_total)].reset_index(drop=True).drop(['bmi'], axis=1)

    # add the personal info of x_pos, x_neg
    def data_add_feature(data):
        data_new = []
        for m in range(data.shape[0]):
            data_each, data_each_add = [], []
            data_each = data[m]
            data_each_add = np.reshape(
                cohort_info.loc[cohort_info.subject_id == int(data_each[0, -1])].values.tolist()[0][1:],
                (-1, 5))  # add 5 features
            data_each_add = np.repeat(data_each_add, 21, axis=0)
            data_each = np.concatenate((data_each, data_each_add), 1)
            if m == 0:
                data_new = data_each
            else:
                data_new = np.dstack([data_new, data_each])
        data_new = data_new.transpose(2, 0, 1)
        return data_new

    x_pos_new, x_neg_new = [], []
    x_pos_new = data_add_feature(x_pos)  # (, 21, 32 + 5) 0:30 - features | 31: subject_id | 32 - end: personal info
    x_neg_new = data_add_feature(x_neg)

    # merge the positive and negative data
    x_all = np.vstack((x_pos_new, x_neg_new))
    x_all_label = np.zeros(x_pos_new.shape[0]) + 1
    x_all_label_temp = np.zeros(x_neg_new.shape[0])
    x_all_label = np.append(x_all_label, x_all_label_temp)

    # create 80% train and 20% test sets to train and evaluate model
    x_all_train, x_test, y_all_train, y_test = train_test_split(x_all, x_all_label, test_size=0.2, random_state=0)

    # scaled
    x_train_mean = np.mean(x_all_train, axis=0)
    x_train_std = np.std(x_all_train, axis=0)
    x_train_scaled = (x_all_train - x_train_mean) / x_train_std
    x_test_scaled = (x_test - x_train_mean) / x_train_std

    fe_num = 16  # 11 (hr) | 16 (hr + rr) | 21 (hr + rr + spo2) | 32-36: personal info
    fe_id = list(range(0, fe_num)) + [16, 17, 18, 19, 20] + [32, 33, 34, 35, 36]  # select feature index + [16, 17, 18, 19, 20] 
    x_train = torch.FloatTensor(x_train_scaled[:, :, fe_id])
    x_test = torch.FloatTensor(x_test_scaled[:, :, fe_id])
    y_train = torch.LongTensor(y_all_train)
    y_test = torch.LongTensor(y_test)

    # hyper-parameters setting
    lr = 0.0002
    epochs = 20
    bs = 64
    model = TOPNet(input_size=len(fe_id))
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    # train model
    for epoch in range(epochs):
        model.train()
        shuffle_idxs = [i for i in range((x_train.shape[0] - 1) // bs + 1)]
        np.random.shuffle(shuffle_idxs)  # random sample to train
        for i in range((x_train.shape[0] - 1) // bs + 1):
            start_i = shuffle_idxs[i] * bs
            end_i = start_i + bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            outputs = model(xb).view(xb.size(0), 2)
            loss = criterion(outputs, yb)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            outputs = model(x_test).view(x_test.size(0), 2)
            probas_ls = torch.nn.functional.softmax(outputs, dim=1)
            _, predicted_m = torch.max(outputs.data, 1)
            fpr_ls, tpr_ls, thresholds_ROC = roc_curve(y_test, probas_ls[:, 1])
            roc_auc_ls = auc(fpr_ls, tpr_ls)
            optimal_idx = np.argmax(tpr_ls - fpr_ls)
            optimal_threshold = thresholds_ROC[optimal_idx]
            sensitivity_ls = tpr_ls[optimal_idx]
            specificity_ls = 1 - fpr_ls[optimal_idx]
            data_pred = np.zeros(len(probas_ls[:, 1]))
            data_pred[probas_ls[:, 1] >= optimal_threshold] = 1
            accuracy_ls = accuracy_score(y_test, data_pred)
            F1_ls = f1_score(y_test, data_pred)
            pr_each = average_precision_score(y_test, probas_ls[:, 1])
            precision_ls = precision_score(y_test, data_pred)

    print("auc:", round(100*roc_auc_ls,1), "ap:", round(100*pr_each,1), "acc:", round(100*accuracy_ls, 1), 
          "sen:", round(100*sensitivity_ls,1), "spe:", round(100*specificity_ls,1), "f1:", round(100*F1_ls,1), "pre:", round(100*precision_ls, 1))
    # create table to save results
    data_save = pd.DataFrame(np.array([y_test.numpy(), probas_ls[:,1].numpy()]).transpose(1,0), columns=['true_label', 'pred'])
    # data_save.to_csv(path_result + 'topnet_6h_all.csv', index=False)

## CNN

In [None]:
# Part 0. define model
class DeePCNN(nn.Module):

    def __init__(self):
        super(DeePCNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.out = nn.Linear(16*4*4, 2)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        out = self.out(x)
        return out

In [None]:
# Part 1. train the model (using cv result - hyper-parameters) and evaluate model
if __name__ == '__main__':
    # Part 1.0 prepare data
    # load positive and negative data
    str_pos = path + 'ed_pos_6slid_features.npz'
    str_neg = path + 'ed_negslid_features.npz'
    x_pos = np.load(str_pos)['arr_0']
    x_neg = np.load(str_neg)['arr_0']

    # load personal information (map, get the needed patients)
    cohort_info = pd.read_csv(path + 'cohort_demographic.csv')
    mapping_gender = {'M': 0, 'F': 1}
    mapping_at = {'EMERGENCY': 0, 'ELECTIVE': 1, 'URGENT': 2}
    mapping_fc = {'MICU': 0, 'SICU': 1, 'CCU': 2, 'CSRU': 3, 'TSICU': 4}
    cohort_info = cohort_info.replace(
        {'gender': mapping_gender, 'admission_type': mapping_at, 'first_careunit': mapping_fc})
    id_total = np.unique(np.vstack((x_pos, x_neg)).reshape(-1, 32)[:, -1]).astype(int)
    cohort_info = cohort_info[cohort_info.subject_id.isin(id_total)].reset_index(drop=True).drop(['bmi'], axis=1)

    # add the personal info of x_pos, x_neg
    def data_add_feature(data):
        data_new = []
        for m in range(data.shape[0]):
            data_each, data_each_add = [], []
            data_each = data[m]
            data_each_add = np.reshape(
                cohort_info.loc[cohort_info.subject_id == int(data_each[0, -1])].values.tolist()[0][1:],
                (-1, 5))  # add 5 features
            data_each_add = np.repeat(data_each_add, 21, axis=0)
            data_each = np.concatenate((data_each, data_each_add), 1)
            if m == 0:
                data_new = data_each
            else:
                data_new = np.dstack([data_new, data_each])
        data_new = data_new.transpose(2, 0, 1)
        return data_new

    x_pos_new, x_neg_new = [], []
    x_pos_new = data_add_feature(x_pos)  # (, 21, 32 + 5) 0:30 - features | 31: subject_id | 32 - end: personal info
    x_neg_new = data_add_feature(x_neg)

    # merge the positive and negative data
    x_all = np.vstack((x_pos_new, x_neg_new))
    x_all_label = np.zeros(x_pos_new.shape[0]) + 1
    x_all_label_temp = np.zeros(x_neg_new.shape[0])
    x_all_label = np.append(x_all_label, x_all_label_temp)

    # create 80% train and 20% test sets to train and evaluate model
    x_all_train, x_test, y_all_train, y_test = train_test_split(x_all, x_all_label, test_size=0.2, random_state=0)

    # scaled
    x_train_mean = np.mean(x_all_train, axis=0)
    x_train_std = np.std(x_all_train, axis=0)
    x_train_scaled = (x_all_train - x_train_mean) / x_train_std
    x_test_scaled = (x_test - x_train_mean) / x_train_std

    fe_num = 21  # 11 (hr) | 16 (hr + rr) | 21 (hr + rr + spo2) | 32-36: personal info
    fe_id = list(range(0, fe_num)) # + [32, 33, 34, 35, 36]  # select feature index + [16, 17, 18, 19, 20]
    
    x_input_train, x_input_train_add, x_input_test, x_input_test_add = [], [], [], []
    x_input_train = x_train_scaled[:, :, fe_id]
    x_input_train_add = np.zeros((x_input_train.shape[0], 1, x_input_train.shape[2]))
    x_input_train = np.hstack([x_input_train_add, x_input_train])
    x_input_train_add = np.zeros((x_input_train.shape[0], x_input_train.shape[1], 1))
    x_input_train = np.dstack([x_input_train_add, x_input_train]).reshape(-1, 1, 22, 22)

    x_input_test = x_test_scaled[:, :, fe_id]
    x_input_test_add = np.zeros((x_input_test.shape[0], 1, x_input_test.shape[2]))
    x_input_test = np.hstack([x_input_test_add, x_input_test])
    x_input_test_add = np.zeros((x_input_test.shape[0], x_input_test.shape[1], 1))
    x_input_test = np.dstack([x_input_test_add, x_input_test]).reshape(-1, 1, 22, 22)

    x_train = torch.FloatTensor(x_input_train)
    x_test = torch.FloatTensor(x_input_test)
    y_train = torch.LongTensor(y_all_train)
    y_test = torch.LongTensor(y_test)

    # hyper-parameters setting
    lr = 0.0002
    epochs = 20
    bs = 64
    model = DeePCNN()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    # train model
    for epoch in range(epochs):
        model.train()
        shuffle_idxs = [i for i in range((x_train.shape[0] - 1) // bs + 1)]
        np.random.shuffle(shuffle_idxs)  # random sample to train
        for i in range((x_train.shape[0] - 1) // bs + 1):
            start_i = shuffle_idxs[i] * bs
            end_i = start_i + bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            outputs = model(xb).view(xb.size(0), 2)
            loss = criterion(outputs, yb)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            outputs = model(x_test).view(x_test.size(0), 2)
            probas_ls = torch.nn.functional.softmax(outputs, dim=1)
            _, predicted_m = torch.max(outputs.data, 1)
            fpr_ls, tpr_ls, thresholds_ROC = roc_curve(y_test, probas_ls[:, 1])
            roc_auc_ls = auc(fpr_ls, tpr_ls)
            optimal_idx = np.argmax(tpr_ls - fpr_ls)
            optimal_threshold = thresholds_ROC[optimal_idx]
            sensitivity_ls = tpr_ls[optimal_idx]
            specificity_ls = 1 - fpr_ls[optimal_idx]
            data_pred = np.zeros(len(probas_ls[:, 1]))
            data_pred[probas_ls[:, 1] >= optimal_threshold] = 1
            accuracy_ls = accuracy_score(y_test, data_pred)
            F1_ls = f1_score(y_test, data_pred)
            pr_each = average_precision_score(y_test, probas_ls[:, 1])
            precision_ls = precision_score(y_test, data_pred)

    print("auc:", round(100*roc_auc_ls,1), "ap:", round(100*pr_each,1), "acc:", round(100*accuracy_ls, 1), 
          "sen:", round(100*sensitivity_ls,1), "spe:", round(100*specificity_ls,1), "f1:", round(100*F1_ls,1), "precision:", round(100*precision_ls,1))
    # create table to save results
    data_save = pd.DataFrame(np.array([y_test.numpy(), probas_ls[:,1].numpy()]).transpose(1,0), columns=['true_label', 'pred'])
    data_save.to_csv(path_result + 'cnn_6h.csv', index=False)

## LSTM

In [None]:
# Part 0. define model
class DeePLSTM(nn.Module):

    def __init__(self,input_size):
        super(DeePLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, 16, batch_first=True, bidirectional=False)
        # self.conv1 = nn.Conv1d(1, 4, kernel_size=16, stride=16, padding=6)
        self.dense1 = nn.Linear(16*21, 2)  # nn.Linear(1344, 8)
        # self.dense2 = nn.Linear(8, 2)

    def forward(self, x):
        out, (hn, cn) = self.lstm(x)
        out = out.contiguous().view(x.size(0), 1, -1)
        out = self.dense1(out)
        return out

In [None]:
# Part 1. train the model (using cv result - hyper-parameters) and evaluate model
if __name__ == '__main__':
    # Part 1.0 prepare data
    # load positive and negative data
    str_pos = path + 'ed_pos_6slid_features.npz'
    str_neg = path + 'ed_negslid_features.npz'
    x_pos = np.load(str_pos)['arr_0']
    x_neg = np.load(str_neg)['arr_0']

    # load personal information (map, get the needed patients)
    cohort_info = pd.read_csv(path + 'cohort_demographic.csv')
    mapping_gender = {'M': 0, 'F': 1}
    mapping_at = {'EMERGENCY': 0, 'ELECTIVE': 1, 'URGENT': 2}
    mapping_fc = {'MICU': 0, 'SICU': 1, 'CCU': 2, 'CSRU': 3, 'TSICU': 4}
    cohort_info = cohort_info.replace(
        {'gender': mapping_gender, 'admission_type': mapping_at, 'first_careunit': mapping_fc})
    id_total = np.unique(np.vstack((x_pos, x_neg)).reshape(-1, 32)[:, -1]).astype(int)
    cohort_info = cohort_info[cohort_info.subject_id.isin(id_total)].reset_index(drop=True).drop(['bmi'], axis=1)

    # add the personal info of x_pos, x_neg
    def data_add_feature(data):
        data_new = []
        for m in range(data.shape[0]):
            data_each, data_each_add = [], []
            data_each = data[m]
            data_each_add = np.reshape(
                cohort_info.loc[cohort_info.subject_id == int(data_each[0, -1])].values.tolist()[0][1:],
                (-1, 5))  # add 5 features
            data_each_add = np.repeat(data_each_add, 21, axis=0)
            data_each = np.concatenate((data_each, data_each_add), 1)
            if m == 0:
                data_new = data_each
            else:
                data_new = np.dstack([data_new, data_each])
        data_new = data_new.transpose(2, 0, 1)
        return data_new

    x_pos_new, x_neg_new = [], []
    x_pos_new = data_add_feature(x_pos)  # (, 21, 32 + 5) 0:30 - features | 31: subject_id | 32 - end: personal info
    x_neg_new = data_add_feature(x_neg)

    # merge the positive and negative data
    x_all = np.vstack((x_pos_new, x_neg_new))
    x_all_label = np.zeros(x_pos_new.shape[0]) + 1
    x_all_label_temp = np.zeros(x_neg_new.shape[0])
    x_all_label = np.append(x_all_label, x_all_label_temp)

    # create 80% train and 20% test sets to train and evaluate model
    x_all_train, x_test, y_all_train, y_test = train_test_split(x_all, x_all_label, test_size=0.2, random_state=0)

    # scaled
    x_train_mean = np.mean(x_all_train, axis=0)
    x_train_std = np.std(x_all_train, axis=0)
    x_train_scaled = (x_all_train - x_train_mean) / x_train_std
    x_test_scaled = (x_test - x_train_mean) / x_train_std

    fe_num = 21  # 11 (hr) | 16 (hr + rr) | 21 (hr + rr + spo2) | 32-36: personal info
    fe_id = list(range(0, fe_num)) # + [32, 33, 34, 35, 36]  # select feature index + [16, 17, 18, 19, 20] 
    x_train = torch.FloatTensor(x_train_scaled[:, :, fe_id])
    x_test = torch.FloatTensor(x_test_scaled[:, :, fe_id])
    y_train = torch.LongTensor(y_all_train)
    y_test = torch.LongTensor(y_test)

    # hyper-parameters setting
    lr = 0.0002
    epochs = 20
    bs = 64
    model = DeePLSTM(input_size=len(fe_id))
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    # train model
    for epoch in range(epochs):
        model.train()
        shuffle_idxs = [i for i in range((x_train.shape[0] - 1) // bs + 1)]
        np.random.shuffle(shuffle_idxs)  # random sample to train
        for i in range((x_train.shape[0] - 1) // bs + 1):
            start_i = shuffle_idxs[i] * bs
            end_i = start_i + bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            outputs = model(xb).view(xb.size(0), 2)
            loss = criterion(outputs, yb)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            outputs = model(x_test).view(x_test.size(0), 2)
            probas_ls = torch.nn.functional.softmax(outputs, dim=1)
            _, predicted_m = torch.max(outputs.data, 1)
            fpr_ls, tpr_ls, thresholds_ROC = roc_curve(y_test, probas_ls[:, 1])
            roc_auc_ls = auc(fpr_ls, tpr_ls)
            optimal_idx = np.argmax(tpr_ls - fpr_ls)
            optimal_threshold = thresholds_ROC[optimal_idx]
            sensitivity_ls = tpr_ls[optimal_idx]
            specificity_ls = 1 - fpr_ls[optimal_idx]
            data_pred = np.zeros(len(probas_ls[:, 1]))
            data_pred[probas_ls[:, 1] >= optimal_threshold] = 1
            accuracy_ls = accuracy_score(y_test, data_pred)
            F1_ls = f1_score(y_test, data_pred)
            pr_each = average_precision_score(y_test, probas_ls[:, 1])
            precision_ls = precision_score(y_test, data_pred)

    print("auc:", round(100*roc_auc_ls,1), "ap:", round(100*pr_each,1), "acc:", round(100*accuracy_ls, 1), 
          "sen:", round(100*sensitivity_ls,1), "spe:", round(100*specificity_ls,1), "f1:", round(100*F1_ls,1), "precision:", round(100*precision_ls,1))
    # create table to save results
    data_save = pd.DataFrame(np.array([y_test.numpy(), probas_ls[:,1].numpy()]).transpose(1,0), columns=['true_label', 'pred'])
    # data_save.to_csv(path_result + 'lstm_6h.csv', index=False)

## ML models

In [None]:
# Import packages
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.neural_network import MLPClassifier
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

**XGBoost**

In [None]:
params = {'learning_rate': 0.025, 'max_depth': 7, 'n_estimators': 400, 'objective': 'binary:logistic', 'subsample': 0.85}
columns_name = ['hr_mean', 'hr_std', 'hr_slope', 'hr_abs_engergy', 'hr_c3', 'hr_c2', 'hr_q_07', 'hr_sum', 'all_autocorrelation', 'hr_quantiles_01', 'hr_quantiles_03', 'resp_mean',
                'resp_std', 'resp_slope', 'resp_abs_engergy', 'resp_c3', 'spo2_mean', 'spo2_std', 'spo2_slope', 'spo2_abs_engergy', 'spo2_c3']
x_train_t, x_test_t, y_train_t, y_test_t = train_test_split(x_all_train, y_all_train, test_size=0.2, random_state=0)
clf_XG = xgb.XGBClassifier()
clf_XG.fit(x_train_t, y_train_t, early_stopping_rounds=80, eval_metric="auc", eval_set=[(x_test_t, y_test_t)])
predicted_m, probas_ls = [], []
predicted_m = clf_XG.predict(x_test)
probas_ls = clf_XG.predict_proba(x_test)
fpr_ls, tpr_ls, thresholds_ROC = roc_curve(y_test, probas_ls[:, 1])
roc_auc_ls = auc(fpr_ls, tpr_ls)
optimal_idx = np.argmax(tpr_ls - fpr_ls)
optimal_threshold = thresholds_ROC[optimal_idx]
sensitivity_ls = tpr_ls[optimal_idx]
specificity_ls = 1 - fpr_ls[optimal_idx]
data_pred = np.zeros(len(probas_ls[:, 1]))
data_pred[probas_ls[:, 1] >= optimal_threshold] = 1
accuracy_ls = accuracy_score(y_test, data_pred)
F1_ls = f1_score(y_test, data_pred)
pr_each = average_precision_score(y_test, probas_ls[:, 1])
precision_ls = precision_score(y_test, data_pred)

print("auc:", round(100*roc_auc_ls,1), "ap:", round(100*pr_each,1), "acc:", round(100*accuracy_ls, 1), 
      "sen:", round(100*sensitivity_ls,1), "spe:", round(100*specificity_ls,1), "f1:", round(100*F1_ls,1), "precision:", round(100*precision_ls,1))
# create table to save results
data_save = pd.DataFrame(np.array([y_test, probas_ls[:,1]]).transpose(1,0), columns=['true_label', 'pred'])
# data_save.to_csv(path_result + 'xgboost_6h.csv', index=False)

# plot feature importance
features_import = pd.DataFrame()
features_import = pd.concat([pd.DataFrame(columns_name), pd.DataFrame(clf_XG.feature_importances_)], axis=1)
features_import.columns = ['features_name', 'values']
features_import.sort_values(by='values', ascending=False, inplace=True)
features_import = features_import[features_import['values'] != 0]
features_import.to_csv(path_result + 'xgboost_feature_ranking_2h.csv', index=False)

**MLP**

In [None]:
x_train_mean = np.mean(x_all_train, axis=0)
x_train_std = np.std(x_all_train, axis=0)
x_train_scaled = (x_all_train - x_train_mean) / x_train_std
x_test_scaled = (x_test - x_train_mean) / x_train_std
clf_NN_bs = MLPClassifier()
clf_NN_bs = clf_NN_bs.fit(x_train_scaled, y_all_train)
predicted_m, probas_ls = [], []
predicted_m = clf_NN_bs.predict(x_test_scaled)
probas_ls = clf_NN_bs.predict_proba(x_test_scaled)
fpr_ls, tpr_ls, thresholds_ROC = roc_curve(y_test, probas_ls[:, 1])
roc_auc_ls = auc(fpr_ls, tpr_ls)
optimal_idx = np.argmax(tpr_ls - fpr_ls)
optimal_threshold = thresholds_ROC[optimal_idx]
sensitivity_ls = tpr_ls[optimal_idx]
specificity_ls = 1 - fpr_ls[optimal_idx]
data_pred = np.zeros(len(probas_ls[:, 1]))
data_pred[probas_ls[:, 1] >= optimal_threshold] = 1
accuracy_ls = accuracy_score(y_test, data_pred)
F1_ls = f1_score(y_test, data_pred)
pr_each = average_precision_score(y_test, probas_ls[:, 1])
precision_ls = precision_score(y_test, data_pred)

print("auc:", round(100*roc_auc_ls,1), "ap:", round(100*pr_each,1), "acc:", round(100*accuracy_ls, 1), 
      "sen:", round(100*sensitivity_ls,1), "spe:", round(100*specificity_ls,1), "f1:", round(100*F1_ls,1), "precision:", round(100*precision_ls,1))
# create table to save results
data_save = pd.DataFrame(np.array([y_test, probas_ls[:,1]]).transpose(1,0), columns=['true_label', 'pred'])
data_save.to_csv(path_result + 'mlp_6h.csv', index=False)

**RF**

In [None]:
clf_RF_bs = RandomForestClassifier()
clf_RF_bs = clf_RF_bs.fit(x_all_train, y_all_train)
predicted_m, probas_ls = [], []
predicted_m = clf_RF_bs.predict(x_test)
probas_ls = clf_RF_bs.predict_proba(x_test)
fpr_ls, tpr_ls, thresholds_ROC = roc_curve(y_test, probas_ls[:, 1])
roc_auc_ls = auc(fpr_ls, tpr_ls)
optimal_idx = np.argmax(tpr_ls - fpr_ls)
optimal_threshold = thresholds_ROC[optimal_idx]
sensitivity_ls = tpr_ls[optimal_idx]
specificity_ls = 1 - fpr_ls[optimal_idx]
data_pred = np.zeros(len(probas_ls[:, 1]))
data_pred[probas_ls[:, 1] >= optimal_threshold] = 1
accuracy_ls = accuracy_score(y_test, data_pred)
F1_ls = f1_score(y_test, data_pred)
pr_each = average_precision_score(y_test, probas_ls[:, 1])
precision_ls = precision_score(y_test, data_pred)

print("auc:", round(100*roc_auc_ls,1), "ap:", round(100*pr_each,1), "acc:", round(100*accuracy_ls, 1), 
      "sen:", round(100*sensitivity_ls,1), "spe:", round(100*specificity_ls,1), "f1:", round(100*F1_ls,1), "precision:", round(100*precision_ls,1))
# create table to save results
data_save = pd.DataFrame(np.array([y_test, probas_ls[:,1]]).transpose(1,0), columns=['true_label', 'pred'])
data_save.to_csv(path_result + 'rf_6h.csv', index=False)