In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from keras.layers import Input, Dense, Conv1D, Flatten, MaxPooling1D, Conv2D, MaxPooling2D, AveragePooling2D, Dropout, Reshape, normalization
from keras.models import Model
from keras.utils import to_categorical
import keras.backend as K
from keras.layers.recurrent import LSTM
from sklearn import metrics
import random

  _nan_object_mask = _nan_object_array != _nan_object_array
Using TensorFlow backend.


In [7]:
def precision(y_true, y_pred):
    # Calculates the precision
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def recall(y_true, y_pred):
    # Calculates the recall
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def f1(test_Y, pre_test_y):
    """F1-score"""
    Precision = precision(test_Y, pre_test_y)
    Recall = recall(test_Y, pre_test_y)
    f1 = 2 * ((Precision * Recall) / (Precision + Recall + K.epsilon()))
    return f1 

def TP(test_Y,pre_test_y):
    TP = K.sum(K.round(K.clip(test_Y * pre_test_y, 0, 1)))#TP
    return TP

def FN(test_Y,pre_test_y):
    TP = K.sum(K.round(K.clip(test_Y * pre_test_y, 0, 1)))#TP
    P=K.sum(K.round(K.clip(test_Y, 0, 1)))
    FN = P-TP #FN=P-TP
    return FN

def TN(test_Y,pre_test_y):
    TN=K.sum(K.round(K.clip((test_Y-K.ones_like(test_Y))*(pre_test_y-K.ones_like(pre_test_y)), 0, 1)))#TN
    return TN

def FP(test_Y,pre_test_y):
    N = (-1)*K.sum(K.round(K.clip(test_Y-K.ones_like(test_Y), -1, 0)))#N
    TN=K.sum(K.round(K.clip((test_Y-K.ones_like(test_Y))*(pre_test_y-K.ones_like(pre_test_y)), 0, 1)))#TN
    FP=N-TN
    return FP

def dnn_model(train_X, train_Y, test_X, test_Y, lr, epoch, batch_size):
    train_X = np.expand_dims(train_X, 2)
    test_X = np.expand_dims(test_X, 2)
    inputs = Input(shape = (train_X.shape[1], train_X.shape[2]))
    x = Conv1D(32, kernel_size = 3, strides = 1, padding = 'valid', activation = 'relu')(inputs)
    x = MaxPooling1D(pool_size = 2, strides = 2, padding = 'same')(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(32, activation = 'relu')(x)
    x = Dense(16, activation = 'relu')(x)
    x = Dense(8, activation = 'relu')(x)
    predictions = Dense(1, activation = 'sigmoid')(x)
    model = Model(inputs = inputs, outputs = predictions)
    print("model")
    model.compile(optimizer = 'RMSprop',
                  loss = 'mean_squared_error',
                  metrics = ['acc',precision,recall,f1,TP,FN,TN,FP])
    print("compile")
    model.fit(train_X, train_Y, epochs = epoch, batch_size = 32, validation_data = (test_X, test_Y), shuffle = True)
    model.save('CNN_model.h5')
    pre_test_y = model.predict(test_X, batch_size = 50)
    pre_train_y = model.predict(train_X, batch_size = 50)
    test_auc = metrics.roc_auc_score(test_Y, pre_test_y)
    train_auc = metrics.roc_auc_score(train_Y, pre_train_y)
    print("train_auc: ", train_auc)
    print("test_auc: ", test_auc) 
    return test_auc



In [8]:

data = np.array(pd.read_csv("3_train_vecs.csv"))
pos_number = 20 # NOTE: the number of postive sample in train file
#CNN_model = 'CNN_model.h5'

X1 = data[0:pos_number, 1:]
Y1 = data[0:pos_number, 0]
X2 = data[pos_number:, 1:]
Y2 = data[pos_number:, 0]
X = np.concatenate([X1, X2], 0)
Y = np.concatenate([Y1, Y2], 0)
#Y = Y.reshape((Y.shape[0], -1))
print (X)
print ("X.shape: ", X.shape)
print ("Y.shape: ", Y.shape)

lr = 0.4
epoch = 20
batch_size = 32
kf = KFold(n_splits = 3, shuffle = True, random_state = 42)
#kf = KFold(n_splits = 5, shuffle = False)
kf = kf.split(X)

test_aucs = []
for i, (train_fold, validate_fold) in enumerate(kf):
    print("\n\ni: ", i)
    test_auc = dnn_model(X[train_fold], Y[train_fold], X[validate_fold], Y[validate_fold], lr, epoch, batch_size)
    test_aucs.append(test_auc)
w = open("train_Result.txt", "w")
for j in test_aucs: 
    w.write(str(j) + ',')
w.write('\n')
w.write(str(np.mean(test_aucs)) + '\n')
w.close()


[[ 0.387706    0.235774    0.0419513  ...,  0.0660476  -0.122586    0.0697366 ]
 [ 0.0954705   0.361979   -0.0432098  ..., -0.12312    -0.131958    0.0933789 ]
 [ 0.0558099   0.299304   -0.0562219  ..., -0.0769578  -0.114194    0.0707509 ]
 ..., 
 [ 0.234206   -0.0604085  -0.0320356  ...,  0.221071    0.0604408  -0.052408  ]
 [ 0.168922    0.115964   -0.0967158  ..., -0.00971257  0.144195   -0.0897304 ]
 [ 0.127001    0.0762867   0.0180975  ..., -0.287323    0.149178   -0.124711  ]]
X.shape:  (40, 200)
Y.shape:  (40,)


i:  0
model
compile
Train on 26 samples, validate on 14 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
train_auc:  0.964285714286
test_auc:  0.875


i:  1
model
compile
Train on 27 samples, validate on 13 samples
Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
train_auc:  0.912087912088
test_auc:  1.0


i:  2
model
compile
Train on 27 samples, validate on 13 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
train_auc:  0.952777777778
test_auc:  0.8375
