# Check model
1. Load the postprocessing dataset
2. Modify each label is for each sample, not each timestep
3. Run the model

In [None]:
import tensorflow as tf
import numpy as np
import collections
import os
import argparse
import datetime as dt
import random
import scipy.io as spio
import pandas as pd
import matplotlib.pyplot as plt
import datetime
 
import keras
from keras import optimizers
import keras.backend as K
from keras.models import Sequential, Model, load_model
from keras.layers import GRU, LSTM, Dense, TimeDistributed, Flatten, Input, Dropout, Reshape, Concatenate
from keras.layers.convolutional import Conv1D, MaxPooling1D, Conv2D, MaxPooling2D
from sklearn.utils import shuffle, class_weight
from sklearn.metrics import confusion_matrix, roc_auc_score, f1_score, fbeta_score
from sklearn.model_selection import train_test_split

# Load dataset

In [None]:
class xt_dataset:
    def __init__(self, n_timestep, n_feature, dataset_id):
        self.n_timestep = n_timestep
        self.n_feature = n_feature
        self.id = dataset_id
    
    # ==========================================================
    # Load postprocessing dataset from file
    # and store them in arrays X_train, y_train, X_test, y_test
    # ==========================================================
    def load(self, dir_in, fileID, batch_size, stateful, dnn):
        print('---- INFO: Load dataset:')        
        self.X_train = []
        self.y_train = []
        self.X_val = []
        self.X_test = []
        self.y_test = []
        
        file_train = os.path.join(dir_in, 'Xtrain_' + str(fileID) + '.csv')
        self.X_train = pd.read_csv(file_train, delimiter=",", header=None).values
        file_train = os.path.join(dir_in, 'ytrain_' + str(fileID) + '.csv')
        self.y_train = pd.read_csv(file_train, delimiter=",", header=None).values
        
        file_test = os.path.join(dir_in, 'Xtest_' + str(fileID) + '.csv')    
        self.X_test = pd.read_csv(file_test, delimiter=",", header=None).values
        file_test = os.path.join(dir_in, 'ytest_' + str(fileID) + '.csv')
        self.y_test = pd.read_csv(file_test, delimiter=",", header=None).values
        
        # ------------------ 
        n_sample = int(self.y_train.shape[0])
        print('------ Debug: n_sample =', n_sample)
        
        # Dense
        if dnn == 'dense':
            self.X_train = self.X_train.reshape(n_sample, self.n_feature) 
        # RNN
        elif dnn == 'rnn':            
            self.X_train = self.X_train.reshape(n_sample, self.n_timestep, self.n_feature)
            if stateful == True:
                n_sample = int(n_sample/batch_size) * batch_size
                self.X_train = self.X_train[0:n_sample]
                self.y_train = self.y_train[0:n_sample]
        # CNN
        elif dnn == 'cnn2d':
            self.X_train = self.X_train.reshape(n_sample, self.n_timestep, 16, int(self.n_feature/16))
        
        self.y_train = self.y_train.reshape(n_sample, 1)

        # ------------------
        n_sample = int(self.y_test.shape[0])
        print('------ Debug: n_sample =', n_sample)
        
        # Dense
        if dnn == 'dense':
            self.X_test = self.X_test.reshape(n_sample, self.n_feature) 
        # RNN
        elif dnn == 'rnn':            
            self.X_test = self.X_test.reshape(n_sample, self.n_timestep, self.n_feature)   
            
            if stateful == True:
                n_sample = int(n_sample/batch_size) * batch_size
                self.X_test = self.X_test[0:n_sample]
                self.y_test = self.y_test[0:n_sample]
        # CNN
        elif dnn == 'cnn2d':
            self.X_test = self.X_test.reshape(n_sample, self.n_timestep, 16, int(self.n_feature/16))

        self.y_test = self.y_test.reshape(n_sample, 1)  

        # ------------------
        print('------ Shapes of X_train, y_train, X_test, y_test =', \
              self.X_train.shape, self.y_train.shape, self.X_test.shape, self.y_test.shape)     
        
# ===================================================
# Test function
#subject = xt_dataset(256, 16, 'Patient_1')  # timesteps, features
#subject.load('D:\Romanlab\XT_DataSet\dataset3\RAW_EU_TESTCASE_5_16T', 0, 64)

In [None]:
import keras
from sklearn.metrics import roc_auc_score

# https://github.com/keunwoochoi/keras_callbacks_example
class xt_best_model(keras.callbacks.Callback):
    
    def config(self, batch_size, best_model_name, X, y):
        self.max_auc = -1
        self.max_sens = -1
        self.max_f1score = -1
        self.max_f2score = -1
        self.min_loss = 1
        self.prev_loss = 1
        self.max_acc = 1
        
        self.test_train_max_auc = -1 
        self.test_max_auc = -1
        self.test_max_sens = -1
        self.test_max_f1score = -1
        self.test_max_f2score = -1
        self.test_min_loss = 1
        self.test_max_acc = 1
        
        self.X = X
        self.y = y
        self.best_model_name = best_model_name
        self.bs = batch_size
        
    def on_train_begin(self, logs={}):
        self.aucs = []
        self.losses = []


    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        #self.losses.append(logs.get('loss'))
        
        # ----- Train -----
        y_pred = self.model.predict(self.X, batch_size=self.bs)
        
        dim = self.y.shape 
        y_pred_ = y_pred.reshape(dim[0] * dim[1])
        y_pred_ = np.round(y_pred_)
        y_ = self.y.reshape(dim[0] * dim[1])        

        current_auc = roc_auc_score(y_, y_pred_)        
        current_tn, current_fp, current_fn, current_tp = confusion_matrix(y_, y_pred_).ravel()
        current_sens = current_tp/(current_tp + current_fn)
        
        current_f1score = f1_score(y_, y_pred_)  
        current_f2score = fbeta_score(y_, y_pred_, beta=2)  
        
        current_loss = logs.get('loss')
        
        
        # Check AUC
        #elif self.max_f2score == current_f2score:  
        flag = 0
        #if (self.max_auc < current_auc) or (self.max_auc == current_auc and self.min_loss > current_loss):
        #if (current_loss < self.prev_loss) or (current_loss >= self.prev_loss and self.max_auc < current_auc):
        #if (current_loss < self.min_loss) or ((current_loss / self.min_loss < 2) and current_auc >= self.max_auc):
        #if (current_loss < self.min_loss or (current_loss >= self.min_loss and current_loss/self.min_loss < 1.1)) \
        #        and current_auc >= self.max_auc:
        if (current_loss < self.min_loss) or (current_loss >= self.min_loss and self.max_auc < current_auc):
            print('Update new train model: AUC =', self.max_auc, '-->', current_auc)
            self.max_f2score = current_f2score
            self.max_f1score = current_f1score
            self.max_auc = current_auc
            self.max_sens = current_sens
            self.min_loss = current_loss
            #self.model.save(self.best_model_name)  
            flag = 1
            
        self.prev_loss = current_loss
        
        print('TRAIN: Current SENS =', current_sens, ' Max SENS =', self.max_sens, \
               ' Current AUC =', current_auc, ' Max AUC =', self.max_auc, \
                ' Current F1 =', current_f1score, ' Max F1 =', self.max_f1score, \
                 ' Current F2 =', current_f2score, ' Max F2 =', self.max_f2score, \
                  ' Current Loss =', current_loss, ' Min Loss =', self.min_loss \
             )
        
        # ---- Test ----
        X = self.validation_data[0]
        y = self.validation_data[1]
        

        y_pred = self.model.predict(X, batch_size=self.bs)
        dim = y.shape 
        y_pred_ = y_pred.reshape(dim[0] * dim[1])
        y_pred_ = np.round(y_pred_)
        y_ = y.reshape(dim[0] * dim[1])        

        current_loss = logs.get('val_loss')
        current_acc = logs.get('val_acc')
        
        current_auc = roc_auc_score(y_, y_pred_)        
        current_tn, current_fp, current_fn, current_tp = confusion_matrix(y_, y_pred_).ravel()
        current_sens = current_tp/(current_tp + current_fn)
        
        current_f1score = f1_score(y_, y_pred_)  
        current_f2score = fbeta_score(y_, y_pred_, beta=2) 
        
        #if self.test_max_auc < current_auc:
        #    print('Update new test AUC: AUC =', self.test_max_auc, '-->', current_auc)
        #    self.test_max_f2score = current_f2score
        #    self.test_max_f1score = current_f1score
        #    self.test_max_auc = current_auc
        #    self.test_max_sens = current_sens
        #if flag == 1:
        #    self.test_train_max_auc = current_auc
        #    flag = 0
        
        #if (current_loss < self.prev_loss) or (current_loss >= self.prev_loss and self.max_auc < current_auc):
        #if (current_loss < self.min_loss) or ((current_loss / self.min_loss < 2) and current_auc >= self.max_auc):
        #if (current_loss < self.min_loss or (current_loss >= self.min_loss and current_loss/self.min_loss < 1.1)) \
        #        and current_auc >= self.max_auc:
        #if (current_loss < self.test_min_loss) or (current_loss >= self.test_min_loss and self.test_max_auc < current_auc):
        if self.test_max_auc < current_auc:
            print('Update new test model: AUC =', self.test_max_auc, '-->', current_auc)
            self.test_max_f2score = current_f2score
            self.test_max_f1score = current_f1score
            self.test_max_auc = current_auc
            self.test_max_sens = current_sens
            self.test_max_acc = current_acc
            self.model.save(self.best_model_name)
            
        print('TEST: Current SENS =', current_sens, ' Max SENS =', self.test_max_sens, \
               ' Current AUC =', current_auc, ' Max AUC: ', self.test_max_auc, \
                ' Current F1 =', current_f1score, ' Max F1: ', self.test_max_f1score, \
                 ' Current F2 =', current_f2score, ' Max F2: ', self.test_max_f2score, \
                  ' Current Acc =', current_acc, ' Max Acc =', self.test_max_acc )
                  #' AUC at train max =', self.test_train_max_auc)     
            
        return

    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return

## Model 10
- Two/Three hidden layers
- Hyperparameters: batch_size, number of hidden units/layer 1, 2, 3
- Class weight enable: 0/1
- CNN algorithm as front-end
- RNN algorithms: LSTM, GRU

In [None]:
class xt_model:
    # ==========================================
    def config(self, kfold, batch_size, rnn_algo, class_weight, n_hlayer, n_hunit, rnn_dropout):
        print('---- INFO: Set model:')       
        self.kfold = kfold
        self.bs = batch_size
        self.rnn_algo = rnn_algo   # LSTM, GRU
        self.cw = class_weight   # 1: turn on, 0: turn off
        self.n_hlayer = n_hlayer
        
        self.n_hunit = []
        self.rnn_dropout = []
        for i in range (0, self.n_hlayer):
            self.n_hunit.append(n_hunit[i])
            self.rnn_dropout.append(rnn_dropout[i])
            
        print('------ ' \
              'K-fold =', self.kfold, ', Batch size =', self.bs, \
              ', RNN algorithm =', self.rnn_algo, ', Class weight enable =', \
              self.cw, ', # hidden layers =', self.n_hunit, ', RNN dropout =', self.rnn_dropout),
               
    # ==========================================
    # Model v0_7a
    def run(self, dataset, k, stateful_rnn, dnn, model_suffix, X_ftest, y_ftest):
        print('---- INFO: Running: id.k.bs.cw.hu =', dataset.id, k, self.bs, self.cw, self.n_hunit),

        # ----------------------------------------------------
        # Create model
        # ----------------------------------------------------
        if stateful_rnn == True:
            input_layer = Input(batch_shape=(self.bs, dataset.n_timestep, dataset.n_feature))
        else:
            # Dense
            if dnn == 'dense':
                input_layer = Input(shape=(dataset.n_feature,))
            # RNN
            elif dnn == 'rnn':            
                input_layer = Input(shape=(dataset.n_timestep, dataset.n_feature))
            # CNN 2D
            elif dnn == 'cnn2d':
                input_layer = Input(shape=(dataset.n_timestep, 16, int(dataset.n_feature/16)))
                                                                       
        if dnn == 'dense':        
            dense1 = Dense(dataset.n_feature, activation='relu')(input_layer) 
            dropout1 = Dropout(0.3)(dense1)
            #dense3 = Dense(dataset.n_feature, activation='relu')(dropout1)
            #dropout3 = Dropout(0.3)(dense3)
            #dense4 = Dense(dataset.n_feature, activation='relu')(dropout3)
            #dropout4 = Dropout(0.3)(dense4)
            dense2 = Dense(1, activation='sigmoid')(dropout1)
            model = Model(inputs=input_layer, outputs=dense2)
            model.summary()
            
        elif dnn == 'rnn':
            if stateful_rnn == True:
                rnn2 = LSTM(32, stateful=True)(input_layer)
                dropout2 = Dropout(0.4)(rnn2)
                
            elif self.rnn_algo == 'LSTM':
                #rnn1 = LSTM(32, return_sequences=True)(input_layer)
                #dropout1 = Dropout(0.4)(rnn1)
                rnn2 = LSTM(80)(input_layer) # batche size 64, # timestep (1), # features (80)
                dropout2 = Dropout(0.4)(rnn2)
            #elif self.rnn_algo == 'GRU':
            #    rnn1 = GRU(32)(maxpool11)
            #else:
            #    rnn1 = SimpleRNN(32)(maxpool11)        
            dense1 = Dense(1, activation='sigmoid')(dropout2)
            model = Model(inputs=input_layer, outputs=dense1)
            model.summary()
            
        
        elif dnn == 'cnn2d':
            #conv1 = Conv2D(16, kernel_size=(1, 1), activation='relu')(input_layer) # (16 x 1) x 6 -> 16 x 16
            #conv2 = Conv2D(32, kernel_size=(1, 2), strides=(1, 2), activation='relu')(conv1) # 16 x 16 -> 8 x 32
            #conv3 = Conv2D(64, kernel_size=(1, 2), strides=(1, 2), activation='relu')(conv2) # 8 x 32 -> 4 x 64
            #conv4 = Conv2D(128, kernel_size=(1, 2), strides=(1, 2), activation='relu')(conv3) # 4 x 64 -> 2 x 128
            
            conv1 = Conv2D(32, kernel_size=(1, 1), activation='relu')(input_layer) # 32 x 16            
            conv2 = Conv2D(32, kernel_size=(1, 2), activation='relu')(conv1) # 32 x 15            
            conv3 = Conv2D(32, kernel_size=(1, 2), activation='relu')(conv2) # 32 x 14
            maxpool3 = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(conv3) # 32 x 7
            
            conv4 = Conv2D(64, kernel_size=(1, 2), activation='relu')(maxpool3) # 64 x 6
            conv5 = Conv2D(64, kernel_size=(1, 2), activation='relu')(conv4) # 64 x 5
            conv6 = Conv2D(64, kernel_size=(1, 2), activation='relu')(conv5) # 64 x 4
            maxpool6 = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(conv6) # 64 x 2
            
            flatten1 = Flatten()(maxpool6) # 128
            dropout1 = Dropout(0.4)(flatten1)
            dense1 = Dense(128, activation='relu')(dropout1)
            dropout2 = Dropout(0.4)(dense1)
            dense2 = Dense(1, activation='sigmoid')(dropout2)
            model = Model(inputs=input_layer, outputs=dense2)
            model.summary()
        
        
        
        # ==========================================
        # define roc_callback, inspired by https://github.com/keras-team/keras/issues/6050#issuecomment-329996505
        # Ref: https://stackoverflow.com/questions/41032551/
        # how-to-compute-receiving-operating-characteristic-roc-and-auc-in-keras
        #def recall(y_true, y_pred):
        #    # reset the local variables
        #    #tf.local_variables_initializer()
        #    # any tensorflow metric
        #    dim = tf.shape(y_true)
        #    y_true = tf.reshape(y_true, [(dim[0] * dim[1]), dim[2]])
        #    y_pred = tf.round(tf.reshape(y_pred, [(dim[0] * dim[1]), dim[2]]))
        #    #value, update_op = tf.metrics.auc(predictions=y_pred, labels=y_true)
        #    value, update_op = tf.metrics.recall(predictions=y_pred, labels=y_true)
            
        #    # find all variables created for this metric
        #    metric_vars = [i for i in tf.local_variables() if 'recall' in i.name.split('/')[1]]

        #    # Add metric variables to GLOBAL_VARIABLES collection.
        #    # They will be initialized for new session.
        #    for v in metric_vars:
        #        tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

        #    # force to update metric values
        #    with tf.control_dependencies([update_op]):
        #        value = tf.identity(value)
        #        return value
    
        # ==========================================
        #adam = optimizers.Adam(lr=0.0001)
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
        #model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[recall])
        
        # Calculate class weight
        # Ref: https://datascience.stackexchange.com/questions/13490/how-to-set-class-weights-
        # for-imbalanced-classes-in-keras
        if self.cw == 1:
            dim = dataset.y_train.shape 
            y_train_ = dataset.y_train.reshape(dim[0] * dim[1])
        else:
            y_train_ = [0, 1]
        
        class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train_), y_train_)
        #class_weights = [1, 10]
        print('------ Class_weights =', class_weights)
        
        # ----------------------------------------------------
        # Create call_back
        # ----------------------------------------------------
        now = datetime.datetime.now()
        sub_name = 'date--id.k.bs.cw.hu1.hu2.hu3--' \
            + (now.strftime("%Y.%m.%d.%H.%M--")) + dataset.id + '.' \
            + str(k) + '.' + str(self.bs) + '.' + str(self.cw) + '.' \
            + str(self.n_hunit[0]) + '.' + str(self.n_hunit[1]) + '.' + str(self.n_hunit[2])
        
        best_model_name = './Model_10a/best-model--' + model_suffix
        graph_name = './Graph_10a/' + sub_name

        # train LSTM
        # Ref: https://keras.io/callbacks/
        my_callback = xt_best_model()
        my_callback.config(self.bs, best_model_name, dataset.X_train, dataset.y_train)
         
        callbacks = [
            keras.callbacks.EarlyStopping(monitor='loss', #'val_loss', 
                                          patience=5000, 
                                          min_delta=0.001,
                                          verbose=1,
                                          mode='min'),
            #keras.callbacks.ModelCheckpoint(filepath=best_model_name, 
            #                                monitor='val_recall', #'val_loss', 
            #                                save_best_only=True, 
            #                                verbose=1,
            #                                mode='max'),
            keras.callbacks.TensorBoard(log_dir=graph_name, 
                                        histogram_freq=0, 
                                        write_graph=True, 
                                        write_images=True,
                                        write_grads=False),
            my_callback
        ]

        # ----------------------------------------------------
        # Train model
        # ----------------------------------------------------
        # Ref: https://keras.io/models/model/
        history = model.fit(dataset.X_train, 
                              dataset.y_train, 
                              epochs=3000, 
                              batch_size=self.bs, 
                              verbose=1, 
                              shuffle=False,
                              #shuffle='batch',
                              callbacks=callbacks,
                              class_weight=class_weights,
                              validation_data=(dataset.X_test, dataset.y_test))
                              #validation_split=0.2)
#         for i in range (0, 5):
#             history = model.fit(dataset.X_train, 
#                                   dataset.y_train, 
#                                   epochs=5, 
#                                   batch_size=self.bs, 
#                                   verbose=1, 
#                                   #shuffle=False,
#                                   shuffle='batch',
#                                   callbacks=callbacks,
#                                   class_weight=class_weights,
#                                   validation_data=(dataset.X_test, dataset.y_test))
#                                   #validation_split=0.2)

#             history = model.fit(dataset.X_train, 
#                                   dataset.y_train, 
#                                   epochs=195, 
#                                   batch_size=self.bs, 
#                                   verbose=1, 
#                                   shuffle=False,
#                                   #shuffle='batch',
#                                   callbacks=callbacks,
#                                   class_weight=class_weights,
#                                   validation_data=(dataset.X_test, dataset.y_test))
#                                   #validation_split=0.2)
                
        # plot history
        #pyplot.plot(history.history['loss'], label='train')
        #pyplot.plot(history.history['val_loss'], label='test')
        #pyplot.legend()
        #pyplot.show()

        # ----------------------------------------------------
        # Save results
        # ----------------------------------------------------
        #print('my_callback.losses=', my_callback.losses)
        #print('my_callback.aucs=', my_callback.aucs)

        self.train_loss, self.train_acc, self.test_loss, self.test_acc = [0, 0, 0, 0]
        self.tp, self.fn, self.tn, self.fp = [0, 0, 0, 0]
        self.sens, self.spec, self.auc, self.f1score, self.f2score = [0, 0, 0, 0, 0]
        
        # Load best model
        #best_model = load_model(best_model_name, custom_objects={'recall': recall})
        best_model = load_model(best_model_name)
        
        # Loss, Accuracy
        # Ref: https://keras.io/models/model/
        self.train_loss, self.train_acc = best_model.evaluate(dataset.X_train,
                                                              dataset.y_train,
                                                              batch_size=self.bs)
        self.test_loss, self.test_acc = best_model.evaluate(dataset.X_test, 
                                                            dataset.y_test,
                                                            batch_size=self.bs)
        
        # Ref: https://keras.io/models/model/
        y_pred = best_model.predict(dataset.X_test, 
                                    batch_size=self.bs, 
                                    verbose=0, 
                                    steps=None)

        # Flatten y_test and y_pred
        #print('------ Debug: y_test.shape =', y_test.shape)
        dim = dataset.y_test.shape 
        y_pred_ = y_pred.reshape(dim[0] * dim[1])
        y_pred_ = np.round(y_pred_)
        y_test_ = dataset.y_test.reshape(dim[0] * dim[1])

        # Save pred, test to file
        result_name = './Results_10a/test-pred--' + sub_name
        np.savetxt(result_name, [y_test_, y_pred_], delimiter=' ', fmt = '%.1f')    

        # TN, FP, FN, TP, AUC, sensitivity, specificity, AUC
        # Ref: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html
        self.tn, self.fp, self.fn, self.tp = confusion_matrix(y_test_, y_pred_).ravel()
        self.sens = self.tp/(self.tp + self.fn)
        self.spec = self.tn/(self.fp + self.tn)
        self.auc = roc_auc_score(y_test_, y_pred_)
        self.f1score = f1_score(y_test_, y_pred_)
        self.f2score = fbeta_score(y_test_, y_pred_, beta=2)

# Run

In [None]:
## ------------------------------------------------
# User setting
filter_en = 0
stateful_rnn = False # decide LSTM stateful or stateless
test_subject_id = 'Patient_1' # Dog_1, ..., Patient_1, ... , Dog_ (full), Patient_ (full)
model_suffix = '1E_T'
dir_in = os.path.join('D:\Romanlab\XT_DataSet\dataset3', 'RAW_EU_TESTCASE_' + model_suffix)  

# ------------------------------------
# Initialize
if filter_en == 1:
    n_band = 5
else:
    n_band = 1
#
test_subject = xt_dataset(1, 80, test_subject_id) # timesteps, features
model = xt_model()

# ------------------------------------ 
# Results are stored in result array that contains 13 dimensions:
# train_loss, train_acc, test_loss, test_acc, tp, fn, tn, fp, sensitivity, specificity, auc, f1-score, f2-score
results = []

kfold = 5
bs =  [ 64]
hu1 = [ 8, 16, 32, 64, 32, 36]
hu2 = [ 8, 16, 32, 64, 36, 8]
hu3 = [ 8, 16, 32]
dnn = 'dense'

X_ftest = []
y_ftest = []
#X_ftest = np.loadtxt(os.path.join(dir_in, 'X_96_96'), delimiter=' ')
#y_ftest = np.loadtxt(os.path.join(dir_in, 'y_96_96'))
#dim = X_ftest.shape
#X_ftest = X_ftest.reshape(int(dim[0]/256), 256, 16)
#y_ftest = y_ftest.reshape(y_ftest.shape[0], 1)
#print('X_ftest.shape, y_ftest.shape =', X_ftest.shape, y_ftest.shape)

for i in range (2, 3):    
    # kfold, batch_size, rnn_algo, class_weight, n_hlayer, n_hunit
    model.config(kfold, bs[0], 'LSTM', 1, 3, [hu1[i], 0, 0], [0, 0, 0])
    
    for j in range (0, 1):  
        for k in range (0, 3):  
            #
            test_subject.load(dir_in, j, bs[0], stateful_rnn, dnn)
            model.run(test_subject, j, stateful_rnn, dnn, \
                      (model_suffix + '_1' + dnn + '_' + str(j) + str(k)), X_ftest, y_ftest)

            #
            result = [model.train_loss, model.train_acc, model.test_loss, model.test_acc, \
                      model.tp, model.fn, model.tn, model.fp, \
                      model.sens, model.spec, model.auc, model.f1score, model.f2score]
            results.append(result)

            #
            print(result)
            np.savetxt('./Results_10a/20180924_result09a.csv', results, delimiter=',')   # 

# Evaluate full model with each test

In [None]:
def xt_k_of_n(y_pred, y_test, tsuf, k, n):
    print('------ Debug: tsuf, k, n =', tsuf, k, n)
    
    rec_73 = [3105, 3226]
    rec_75 = [3367, 3481]
    rec_96 = [2718, 2836]
    rec_101 = [623, 744]
    rec_120 = [2041, 2198]
    rec_122 = [324, 447]
    rec_124 = [2772, 2893]
    rec_126 = [1791, 1935]
    
    if tsuf == '73_73':
        rec = rec_73
    elif tsuf == '75_75':
        rec = rec_75
    elif tsuf == '96_96':
        rec = rec_96
    elif tsuf == '101_101':
        rec = rec_101
    elif tsuf == '120_120':
        rec = rec_120
    elif tsuf == '122_122':
        rec = rec_122
    elif tsuf == '124_124':
        rec = rec_124
    elif tsuf == '126_126':
        rec = rec_126
    else:
        rec = [0, 0]
    
    M = y_pred.shape[0]
    y_pred_tmp = np.zeros(M)
    if k > 1 and n > 1:
#         print('------ Debug: FILTER')
        for i in range (0, M):
            if (i >= n-1) and (np.sum(y_pred[i-n:i]) >= k):
                y_pred_tmp[i] = 1
    else:
        y_pred_tmp = y_pred
    
#     print('------ Debug: LATENCY')
    latency = 0
    if rec != [0, 0]:
#         print('------ Debug: ', rec)
        for i in range(rec[0], rec[1]):
#             print('------ Debug: ', y_test[i], y_pred_tmp[i])
            if (y_test[i] == 1) and (y_pred_tmp[i] == 0):
                latency = latency + 1
            elif (y_test[i] == y_pred_tmp[i]):
                break;
    
    print('------ Debug: FP')
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred_tmp, labels=[0,1]).ravel()

    return latency, fp, y_pred_tmp

def xt_eval_subset(X_test, y_test, batch_size, tsuf, k, n, best_model_name):        
    # Load best model
    best_model = load_model(best_model_name)

    # Loss, Accuracy
    # Ref: https://keras.io/models/model/
    test_loss, test_acc = best_model.evaluate(X_test, 
                                              y_test,
                                              batch_size=batch_size)

    # Ref: https://keras.io/models/model/
    y_pred = best_model.predict(X_test, 
                                verbose=0,
                                batch_size=batch_size,
                                steps=None)

    # Flatten y_test and y_pred
    #print('------ Debug: y_test.shape =', y_test.shape)
    dim = y_test.shape 
    y_pred_ = y_pred.reshape(dim[0] * dim[1])
    
    np.savetxt('./Results_10a/tmp.csv', y_pred_, delimiter=',', fmt='%.4f')        
        
    y_pred_ = np.round(y_pred_)
    y_test_ = y_test.reshape(dim[0] * dim[1])
    
    # ==============================================
    fig=plt.figure(figsize=(18, 6), dpi= 100, facecolor='w', edgecolor='k')

    start = 1
    end = 3600 #y_test.shape[0]
    t = range (start, end)

    print('=== Raw results === ')

    plt.subplot(2, 1, 1)
    #plt.fill_between(t, y_test[start:end], facecolor='yellow')
    plt.fill_between(t, y_pred_[start:end], facecolor='g', step="post", label='y_pred')
    plt.plot(t, y_test_[start:end], color='r', drawstyle='steps-post', label='y_true') 
    #plt.plot(t, y_pred[start:end], color='b', linestyle=':')
    plt.title('y_test vs y_pred')
    plt.ylabel('Class')
    plt.legend()
    #fig.savefig('D:\\Romanlab\\XT_Projects\\04_rnn\\report_20180819\\results\\REC75_LSTM_NoShuffle.pdf')
    # ==============================================


    # TN, FP, FN, TP, AUC, sensitivity, specificity, AUC
    # Ref: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html
    tn, fp, fn, tp = confusion_matrix(y_test_, y_pred_).ravel()
    sens = tp/(tp + fn)
    spec = tn/(fp + tn)
    auc = roc_auc_score(y_test_, y_pred_)
    
    print('test_loss, test_acc, tp, fn, tn, fp, sens, spec, auc =', test_loss, test_acc, tp, fn, tn, fp, sens, spec, auc)
    
    latency, fp, y_pred_tmp = xt_k_of_n(y_pred_, y_test_, tsuf, k, n)
    print('latency, fp =', latency, fp)
    
#     plt.subplot(2, 1, 2)
#     #plt.fill_between(t, y_test[start:end], facecolor='yellow')
#     plt.fill_between(t, y_pred_tmp[start:end], facecolor='g', step="post", label='y_pred')
#     plt.plot(t, y_test_[start:end], color='r', drawstyle='steps-post', label='y_true') 
#     #plt.plot(t, y_pred[start:end], color='b', linestyle=':')
#     plt.title('y_test vs y_pred_tmp')
#     plt.ylabel('Class')
#     plt.legend()
    
    return test_loss, test_acc, tp, fn, tn, fp, sens, spec, auc 
       
# ==================================================
# Test function
# Test function
model_suffix = '1E_T'

if model_suffix == '1_T':
    test_suffix = ['73_73']
elif model_suffix == '2_T':
    test_suffix = ['75_75']
elif model_suffix == '3_T':
    test_suffix = ['96_96']
elif model_suffix == '4_T':
    test_suffix = ['101_101']
elif model_suffix == '5_T':
    test_suffix = ['120_120']
elif model_suffix == '6_T':
    test_suffix = ['122_122']
elif model_suffix == '7_T':
    test_suffix = ['124_124']
elif model_suffix == '8_T':
    test_suffix = ['126_126']
elif model_suffix == '1D_T' or model_suffix == '2D_T' or model_suffix == '3D_T':
    test_suffix = ['122_122', '124_124', '126_126']
elif model_suffix == '1E_T' or model_suffix == '2E_T' or model_suffix == '3E_T':
    test_suffix = ['122_122', '124_124', '126_126']
#test_suffix = ['148_148']

print('INFO: Start')

n_channel = 80
n_timestep = 1
batch_size = 64
dnn = 'dense'
k, n = 3, 5

# ---------------------
results = []
for tsuf in test_suffix:
    file_Xin = os.path.join('D:\Romanlab\XT_DataSet\dataset3\RAW_EU_TEST_A', 'X_' + tsuf + '.csv')
    file_yin = os.path.join('D:\Romanlab\XT_DataSet\dataset3\RAW_EU_TEST_A', 'y_' + tsuf + '.csv')
        
    #
    X_test = pd.read_csv(file_Xin, delimiter=",", header=None).values
    y_test = pd.read_csv(file_yin, delimiter=",", header=None).values

    #
    dim = X_test.shape[0]
    X_test = X_test.reshape((int(dim/n_timestep), n_timestep, n_channel))
    y_test = y_test.reshape((int(dim/n_timestep), 1))

    #
    for msuf in range (0, 3):
        file_Xmean = os.path.join('D:\Romanlab\XT_DataSet\dataset3\RAW_EU_TESTCASE_' + model_suffix, 'Xmean_0.csv')
        file_Xstd = os.path.join('D:\Romanlab\XT_DataSet\dataset3\RAW_EU_TESTCASE_' + model_suffix, 'Xstd_0.csv')
        
        #
        X_mean = pd.read_csv(file_Xmean, delimiter=",", header=None).values 
        X_mean = X_mean.T
        X_std = pd.read_csv(file_Xstd, delimiter=",", header=None).values
        X_std = X_std.T
        
        X_test_tmp = (X_test - X_mean) / X_std
        
        #np.savetxt('D:\Romanlab\XT_DataSet\dataset3\RAW_EU_TESTCASE_1_16T\\tmp', X_test_tmp, delimiter=' ', fmt='%.2f')
        if dnn == 'rnn':
            X_test_tmp = X_test_tmp.reshape(int(dim/n_timestep), n_timestep, n_channel)
        elif dnn == 'dense':
            X_test_tmp = X_test_tmp.reshape(dim, n_channel)
        elif dnn == 'cnn2d':
            X_test_tmp = X_test_tmp.reshape(int(dim/n_timestep), n_timestep, 16, int(n_channel/16))
        
        #print(X_mean, X_std, X_test)
    
        print('--- Debug: tsuf, msuf =', tsuf, msuf)
        result = xt_eval_subset(X_test_tmp, y_test, batch_size, tsuf, k, n, \
                       './Model_10a/best-model--' + model_suffix + '_1' + dnn + '_0' + str(msuf)) 
                    #'./Model_10a/best-model--2_T_rnn_00_shuffle_False') 
        results.append(result)

np.savetxt('./Results_10a/20180805_result08a.csv', results, delimiter=',')        
print('INFO: Done')

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split, KFold

X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
y = np.array([1, 2, 3, 4, 5])
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False)
print(X)
print(X_train)
print(X_test)

X_train = []
X_test = []
y_train = []
y_test = []
kf = KFold(n_splits=5)
for train_index, test_index in kf.split(X): 
    X_train.append(X[train_index])
    y_train.append(y[train_index])   
    X_test.append(X[test_index])
    y_test.append(y[test_index])

print('----------')
print(X_train[4])
print(X_test[4])

In [None]:
from sklearn import preprocessing
import numpy as np
X_train = np.array([[ 1., -1.,  2.],
                    [ 2.,  0.,  0.],
                    [ 0.,  1., -1.]])
scaler = preprocessing.StandardScaler().fit(X_train)                                      
X_scaled = scaler.transform(X_train)      
print(scaler)
print(scaler.mean_)                                      
print(scaler.scale_) 
print(X_scaled)