In [18]:
import os
from sklearn.decomposition import PCA
import random
from numpy.random import seed

seed(1)
import numpy as np
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Input, Activation, BatchNormalization, Flatten
from keras.callbacks import EarlyStopping, LearningRateScheduler
from sklearn.preprocessing import label_binarize
from sklearn.metrics import auc
from sklearn.metrics import roc_auc_score

from tensorflow.keras.callbacks import EarlyStopping

In [19]:
vector_size = 3390
event_num = 2
droprate = 0.3

In [20]:
def DNN():
    print("________DNN_________")
    train_input = Input(shape=(vector_size,), name='Inputlayer')
    train_in = Dense(512, activation='relu')(train_input)
    train_in = BatchNormalization()(train_in)
    train_in = Dropout(droprate)(train_in)

    train_in = Dense(256, activation='relu')(train_in)
    train_in = BatchNormalization()(train_in)
    train_in = Dropout(droprate)(train_in)

    train_in = Dense(event_num)(train_in)
    out = Activation('softmax')(train_in)

    model = Model(train_input, out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()

    return model

In [21]:
def readdata():
    path = 'pcaTrue.txt'
    path1 = 'pcaFalse.txt'

    all_matrix = []
    all_labels = []

    fp = open(path, 'r')
    fp_false = open(path1, 'r')
    lines = fp.readlines()
    for line in lines:
        line = line.split(" ")
        truedata = []
        for i in range(0, len(line)):
            truedata.append(float(line[i]))
        all_matrix.append(truedata)
        all_labels.append(0)

    lines2 = fp_false.readlines()
    for line in lines2:
        line = line.split(" ")
        falsedata = []
        for i in range(0, len(line)):
            falsedata.append(float(line[i]))
        all_matrix.append(falsedata)
        all_labels.append(1)
    cc = list(zip(all_matrix, all_labels))
    random.shuffle(cc)
    all_matrix[:], all_labels[:] = zip(*cc)
    return all_matrix, all_labels

In [22]:
def evaluate(pred_type, pred_score, y_test, event_num):
    y_one_hot = label_binarize(y_test, np.arange(event_num + 1))
    y_one_hot = y_one_hot[:, [0, 1]]

    result_auc_micro = roc_auc_score(y_one_hot, pred_score, average='micro')
    result_auc_macro = roc_auc_score(y_one_hot, pred_score, average='macro')
    return result_auc_micro, result_auc_macro

In [23]:
def get_index(label_matrix, event_num, seed, CV):
    index_all_class = np.zeros(len(label_matrix))
    for j in range(event_num):
        index = np.where(label_matrix == j)
        kf = KFold(n_splits=CV, shuffle=True, random_state=seed)
        k_num = 0
        for train_index, test_index in kf.split(range(len(index[0]))):
            index_all_class[index[0][test_index]] = k_num
            k_num += 1
    return index_all_class

In [24]:
def cross_validation(feature_matrix, label_matrix, clf_type, event_num, seed, CV):
    y_true = np.array([])
    y_pred = np.array([])
    y_score = np.zeros((0, event_num), dtype=float)
    label_matrix = np.array(label_matrix)
    feature_matrix = np.array(feature_matrix)
    index_all_class = get_index(label_matrix, event_num, seed, CV)

    matrix = []
    print("_____cross_validation_____")

    for k in range(CV):
        train_index = np.where(index_all_class != k)
        test_index = np.where(index_all_class == k)
        pred = np.zeros((len(test_index[0]), event_num), dtype=float)

        x_train = feature_matrix[train_index]
        x_test = feature_matrix[test_index]
        y_train = label_matrix[train_index]
        y_test = label_matrix[test_index]

        y_train_one_hot = np.array(y_train)
        y_train_one_hot = (np.arange(y_train_one_hot.max() + 1) == y_train[:, None]).astype(dtype='float32')

        y_test_one_hot = np.array(y_test)
        y_test_one_hot = (np.arange(y_test_one_hot.max() + 1) == y_test[:, None]).astype(dtype='float32')

        if clf_type == 'DNN':
            dnn = DNN()
            early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='auto')
            dnn.fit(x_train, y_train_one_hot, batch_size=64, epochs=100, validation_data=(x_test, y_test_one_hot),
                    callbacks=[early_stopping])

            pred += dnn.predict(x_test)
        else:
            print("_______ERROR___________")
        pred_score = pred / 1
        pred_type = np.argmax(pred_score, axis=1)
        y_true = np.hstack((y_true, y_test))
        y_pred = np.hstack((y_pred, pred_type))
        y_score = np.row_stack((y_score, pred_score))

        wfp = open(str(k) + '.txt', 'w')
        for i in range(len(y_test)):
            res = str(pred_score[i][0]) + ' ' + str(pred_score[i][1]) + ' ' + str(y_test[i]) + '\n'
            wfp.write(res)
        wfp.close()

        #########evaluate auc###########
        result_micro, result_macro = evaluate(pred_type, pred_score, y_test, event_num)
        print("idx, auc_micro, auc_macro: ", k, result_micro, result_macro)
    result_all_micro, result_all_macro = evaluate(y_pred, y_score, y_true, event_num)
    print("auc_micro_all, auc_macro_all: ", result_all_micro, result_all_macro)

In [25]:
def main():
    seed = 0
    CV = 10
    all_matrix, all_labels = readdata()
    cross_validation(all_matrix, all_labels, 'DNN', event_num, seed, CV)

In [26]:
main()

_____cross_validation_____
________DNN_________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Inputlayer (InputLayer)      (None, 3390)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 512)               1736192   
_________________________________________________________________
batch_normalization_v1_4 (Ba (None, 512)               2048      
_________________________________________________________________
dropout_4 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 256)               131328    
_________________________________________________________________
batch_normalization_v1_5 (Ba (None, 256)               1024      
_________________________________________________________________
dropout_5 (Dropout)         

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
idx, auc_micro, auc_macro:  2 0.9136937841568425 0.8871541981032027
________DNN_________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Inputlayer (InputLayer)      (None, 3390)              0         
_________________________________________________________________
dense_15 (Dense)             (None, 512)               1736192   
_________________________________________________________________
batch_normalization_v1_10 (B (None, 512)               2048      
_________________________________________________________________
dropout_10 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 256

Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
idx, auc_micro, auc_macro:  4 0.899763460103203 0.865574089224212
________DNN_________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Inputlayer (InputLayer)      (None, 3390)              0         
_________________________________________________________________
dense_21 (Dense)             (None, 512)               1736192   
_________________________________________________________________
batch_normalization_v1_14 (B (None, 512)               2048      
_________________________________________________________________
dropout_14 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_22 (Dense)             (None, 256)            

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
idx, auc_micro, auc_macro:  6 0.9000192602460497 0.8670701653385047
________DNN_________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Inputlayer (InputLayer)      (None, 3390)              0         
_________________________________________________________________
dense_27 (Dense)             (None, 512)               1736192   
_________________________________________________________________
batch_normalization_v1_18 (B (None, 512)               2048      
_________________________________________________________________
dropout_18 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_28 (Dense)             (None, 256

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
idx, auc_micro, auc_macro:  8 0.9183784377532923 0.8936353008312377
________DNN_________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Inputlayer (InputLayer)      (None, 3390)              0         
_________________________________________________________________
dense_33 (Dense)             (None, 512)               1736192   
_________________________________________________________________
batch_normalization_v1_22 (B (None, 512)               2048      
_________________________________________________________________
dropout_22 (Dropout)         (None, 512)               0         
_________________________________________________________________
