In [1]:
import numpy as np
from datetime import datetime
from sklearn.model_selection import KFold
import pandas as pd
import keras
from keras.models import Model, load_model
from keras.layers import Dense,Input,GRU,Dropout,concatenate,Permute,Reshape,Lambda,RepeatVector,merge,MaxPooling1D,Embedding,Activation,Conv1D,Flatten
import pickle
from keras.layers.normalization import BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.regularizers import l2
from keras.initializers import RandomUniform
from keras.optimizers import *
from sklearn.metrics import roc_auc_score, mean_squared_error,roc_curve
from keras.backend import eval
from keras import backend as K

Using TensorFlow backend.


In [8]:
learning_rate = 0.001
n_hidden = 200
drop_out = 0.2
time_length = 365
n_feature = 553

In [9]:
def get_rnn_model():
    X_input = Input(shape=(time_length,n_feature), name='x_main', dtype='float32')
    x_dmgs = Input(shape=(3,), name='x_dmgs', dtype='float32')
    
    gru_h = GRU(n_hidden,activation='tanh', recurrent_activation='sigmoid', 
                use_bias=True, kernel_initializer='glorot_uniform', 
                recurrent_initializer='orthogonal', bias_initializer='zeros')(X_input)
    gru_h = Dropout(drop_out)(gru_h)
    comb = concatenate([gru_h,x_dmgs])
    h_t = Dense(1,activation='sigmoid')(comb)
    
    
    model = Model(inputs=[X_input, x_dmgs], outputs = h_t)
    
    opt = keras.optimizers.adam(lr=learning_rate)

    
    model.compile(loss='binary_crossentropy', optimizer=opt,metrics=['accuracy'])

    return model

In [10]:
def get_cnn_model():
    X_input = Input(shape=(time_length,n_feature), name='x_main', dtype='float32')
    x_dmgs = Input(shape=(3,), name='x_dmgs', dtype='float32')
       
    preds = Conv1D(3,16,activation='relu')(X_input)
    preds = BatchNormalization()(preds)
    preds = Dropout(0.5)(preds)
    preds = MaxPooling1D(pool_size=2)(preds)
    preds = BatchNormalization()(preds)
    preds = Flatten()(preds)
    preds = Dropout(0.5)(preds)
    comb = concatenate([preds,x_dmgs])
    preds = Dense(32,activation='relu')(comb)
    preds = Dropout(0.5)(preds)
    preds = Dense(1,activation='sigmoid')(preds)

    model = Model(inputs=[X_input, x_dmgs], outputs=preds)

    # initiate RMSprop optimizer
    opt = keras.optimizers.adam(lr=learning_rate)

    # Let's train the model using RMSprop
    model.compile(loss='binary_crossentropy', optimizer=opt,metrics=['accuracy'])

    return model

In [11]:
def get_pooling_model(ptype):
    X_input = Input(shape=(time_length,n_feature), name='x_main', dtype='float32')
    x_dmgs = Input(shape=(3,), name='x_dmgs', dtype='float32')
    
    gru_h = GRU(n_hidden,activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', 
                recurrent_initializer='orthogonal', bias_initializer='zeros',return_sequences=True)(X_input)
    
    gru_out = Lambda(lambda x: x[:,-1,:])(gru_h)
    gru_out = Reshape((n_hidden,))(gru_out)
    
    if ptype == 'max':
        max_gru = MaxPooling1D(pool_size=time_length)(gru_h)
        max_gru = Reshape((n_hidden,))(max_gru)
        gru_h = concatenate([gru_out,max_gru])
    
    elif ptype == 'avg':
        avg_gru = Lambda(lambda x: K.mean(x,axis=-2))(gru_h)
        avg_gru = Reshape((n_hidden,))(avg_gru)
        gru_h = concatenate([gru_out,avg_gru])
    
    elif ptype == 'min':
        min_gru = Lambda(lambda x: -x)(gru_h)
        min_gru = MaxPooling1D(pool_size=time_length)(min_gru)
        min_gru = Lambda(lambda x: -x)(min_gru)
        min_gru = Reshape((n_hidden,))(min_gru)
        gru_h = concatenate([gru_out,min_gru])
        
    elif ptype == 'bpv':
        max_gru = MaxPooling1D(pool_size=time_length)(gru_h)
        max_gru = Reshape((n_hidden,))(max_gru)
        avg_gru = Lambda(lambda x: K.mean(x,axis=-2))(gru_h)
        avg_gru = Reshape((n_hidden,))(avg_gru)
        min_gru = Lambda(lambda x: -x)(gru_h)
        min_gru = MaxPooling1D(pool_size=time_length)(min_gru)
        min_gru = Lambda(lambda x: -x)(min_gru)
        min_gru = Reshape((n_hidden,))(min_gru)
        gru_h = concatenate([gru_out,max_gru,avg_gru,min_gru])
        
    elif ptype == 'maxmin':
        max_gru = MaxPooling1D(pool_size=time_length)(gru_h)
        max_gru = Reshape((n_hidden,))(max_gru)
        min_gru = Lambda(lambda x: -x)(gru_h)
        min_gru = MaxPooling1D(pool_size=time_length)(min_gru)
        min_gru = Lambda(lambda x: -x)(min_gru)
        min_gru = Reshape((n_hidden,))(min_gru)
        gru_h = concatenate([gru_out,max_gru,min_gru])
        
    #gru_h = concatenate([gru_out,max_gru,avg_gru])
    gru_h = Dropout(drop_out)(gru_h)
    
    comb = concatenate([gru_h,x_dmgs])
    h_t = Dense(1,activation='sigmoid')(comb)
    
    
    model = Model(inputs=[X_input, x_dmgs], outputs = h_t)
    
    opt = keras.optimizers.adam(lr=learning_rate)

    model.compile(loss='binary_crossentropy', optimizer=opt,metrics=['accuracy'])

    return model

In [None]:
bsize = 500
max_epoch = 8
model_folder0 = 'rnn_mh_553_one_year/'
model_folder1 = 'rnn_mh_pooling_553_one_year/'

!mkdir $model_folder0
!mkdir $model_folder1


kf = KFold(n_splits=5, shuffle=False)
cv_counter = 0

for train_index, test_index in kf.split(np.arange(54)):
    cv_counter += 1
    if cv_counter > 2:
        print(test_index)
        with open(model_folder0+'cv_results.txt','a') as out0:
            with open(model_folder1+'cv_results.txt','a') as out1:
                out0.write("##".join(['cv order',str(cv_counter),'leave out index',str(test_index)])+'\n\n')
                out1.write("##".join(['cv order',str(cv_counter),'leave out index',str(test_index)])+'\n\n')
        model0 = get_rnn_model()
        model1 = get_pooling_model('bpv')
        val_loss_list0,val_loss_list1,val_loss_list2,val_loss_list3 = [],[],[],[]

        for k in range(max_epoch):
            train_loss0,train_loss1,train_loss2,train_loss3 = np.array([0.0,0.0]),np.array([0.0,0.0]),np.array([0.0,0.0]),np.array([0.0,0.0])
            val_loss0,val_loss1,val_loss2,val_loss3 = np.array([0.0,0.0]),np.array([0.0,0.0]),np.array([0.0,0.0]),np.array([0.0,0.0])
            y_test_all0,y_test_all1,y_test_all2,y_test_all3 = None,None,None,None
            y_pred_all0,y_pred_all1,y_pred_all2,y_pred_all3 = None,None,None,None

            for i in train_index:
                loadata = np.load('slice_data/0317data'+str(i)+'.npz')
                dmg_data = np.load('Data/additional_fields/0317data'+str(i)+'_additionalFields.npz')
                x_dmg=dmg_data['values']
                if i == 53:
                    dmg_53 = np.zeros((9000,4))
                    for mm in range(9000):
                        dmg_53[mm,:] = x_dmg[mm]
                    x_dmg = dmg_53
                x_dmg[:,2] = abs(x_dmg[:,3]-2)
                x_dmg = x_dmg[:,:3]
                x = loadata['InputX3D']
                y = loadata['Output3D']
                x = x[:,time_length:,:]
                #x=np.concatenate((x[:,:,:505],x[:,:,539:]),axis=2)
                for j in range(0,x_dmg.shape[0],bsize):
                    batch_loss0 = model0.train_on_batch({'x_main':x[j:j+bsize,:,:],'x_dmgs':x_dmg[j:j+bsize]}, y[j:j+bsize,0])
                    train_loss0 = np.add(train_loss0,batch_loss0)
                    batch_loss1 = model1.train_on_batch({'x_main':x[j:j+bsize,:,:],'x_dmgs':x_dmg[j:j+bsize]}, y[j:j+bsize,0])
                    train_loss1 = np.add(train_loss1,batch_loss1)
                print(i, datetime.now())

            model0.save(model_folder0+'cv_model_'+str(cv_counter)+'_'+str(k)+'.h5')
            model1.save(model_folder1+'cv_model_'+str(cv_counter)+'_'+str(k)+'.h5')

            train_loss0 = np.divide(train_loss0,(len(train_index)*10000/bsize))
            train_loss1 = np.divide(train_loss1,(len(train_index)*10000/bsize))

            print('model0:','epoch',k,'train_loss:',train_loss0)
            print('model1:','epoch',k,'train_loss:',train_loss1)
            with open(model_folder0+'cv_results.txt','a') as out0:
                with open(model_folder1+'cv_results.txt','a') as out1:
                    out0.write("\t".join([str(datetime.now()),'epoch',str(k),'train loss',str(train_loss0)])+'\n')
                    out1.write("\t".join([str(datetime.now()),'epoch',str(k),'train loss',str(train_loss1)])+'\n')


            if k > max_epoch-2:
                for i in test_index:
                    loadata = np.load('slice_data/0317data'+str(i)+'.npz')
                    x=loadata['InputX3D']
                    y=loadata['Output3D']
                    x = x[:,time_length:,:]
                    #x=np.concatenate((x[:,:,:505],x[:,:,539:]),axis=2)
                    dmg_data = np.load('Data/additional_fields/0317data'+str(i)+'_additionalFields.npz')
                    x_val_dmg=dmg_data['values']
                    if i == 53:
                        dmg_53 = np.zeros((9000,4))
                        for mm in range(9000):
                            dmg_53[mm,:] = x_val_dmg[mm]
                        x_val_dmg = dmg_53
                        x = x[:9000]
                        y = y[:9000]
                    x_val_dmg[:,2] = abs(x_val_dmg[:,3]-2)
                    x_val_dmg = x_val_dmg[:,:3]
                    y_test_all = (y[:,0] if i==test_index[0] else np.append(y_test_all, y[:,0]))
                    batch_loss0 = model0.evaluate({'x_main':x,'x_dmgs':x_val_dmg},y[:,0])
                    batch_loss1 = model1.evaluate({'x_main':x,'x_dmgs':x_val_dmg},y[:,0])

                    val_loss0 = np.add(val_loss0,batch_loss0)
                    val_loss1 = np.add(val_loss1,batch_loss1)

                    print(i, datetime.now())
                    y_pred0 = model0.predict({'x_main':x,'x_dmgs':x_val_dmg})
                    y_pred1 = model1.predict({'x_main':x,'x_dmgs':x_val_dmg})
                    np.savez_compressed(model_folder0+'pred_y_'+str(cv_counter)+'_'+str(k)+'_'+str(i),y_pred=y_pred0)
                    np.savez_compressed(model_folder1+'pred_y_'+str(cv_counter)+'_'+str(k)+'_'+str(i),y_pred=y_pred1)
                    y_pred_all0 = (y_pred0 if i==test_index[0] else np.append(y_pred_all0, y_pred0))
                    y_pred_all1 = (y_pred1 if i==test_index[0] else np.append(y_pred_all1, y_pred1))

                val_loss0 = np.divide(val_loss0,len(test_index))
                val_loss1 = np.divide(val_loss1,len(test_index))
                val_loss_list0.append(val_loss0[0])
                val_loss_list1.append(val_loss1[0])
                pred_auc0 = roc_auc_score(y_test_all, y_pred_all0)
                pred_auc1 = roc_auc_score(y_test_all, y_pred_all1)

                print('model0: ','epoch',k,'validation loss',val_loss0,'validation auc',pred_auc0)
                print('model1: ','epoch',k,'validation loss',val_loss1,'validation auc',pred_auc1)
                with open(model_folder0+'cv_results.txt','a') as out0:
                    with open(model_folder1+'cv_results.txt','a') as out1:
                        out0.write("\t".join([str(datetime.now()),'epoch',str(k),'validation loss',str(val_loss0),'val_auc',str(pred_auc0)])+'\n')
                        out1.write("\t".join([str(datetime.now()),'epoch',str(k),'validation loss',str(val_loss1),'val_auc',str(pred_auc1)])+'\n')


mkdir: cannot create directory ‘rnn_mh_553_one_year/’: File exists
mkdir: cannot create directory ‘rnn_mh_pooling_553_one_year/’: File exists
[22 23 24 25 26 27 28 29 30 31 32]
0 2018-08-07 22:50:27.900928
1 2018-08-07 22:50:55.084029
2 2018-08-07 22:51:48.127836
3 2018-08-07 22:52:16.336751
4 2018-08-07 22:52:56.477918
5 2018-08-07 22:53:49.190007
6 2018-08-07 22:54:16.004137
7 2018-08-07 22:54:59.777632
8 2018-08-07 22:55:40.944707
9 2018-08-07 22:56:07.872005
10 2018-08-07 22:57:10.560159
11 2018-08-07 22:57:40.844971
12 2018-08-07 22:58:14.933369
13 2018-08-07 22:59:05.240695
14 2018-08-07 22:59:51.573394
15 2018-08-07 23:00:42.665730
16 2018-08-07 23:01:21.048834
17 2018-08-07 23:02:09.151418
18 2018-08-07 23:03:07.303459
19 2018-08-07 23:03:34.519237
20 2018-08-07 23:04:09.974832
21 2018-08-07 23:04:56.664673


In [None]:
for train_index, test_index in kf.split(np.arange(54)):
    cv_counter += 1
    if cv_counter <= 2:
        