In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

from keras.models import Sequential, load_model, Model
from keras.layers import *
from keras import optimizers
from keras import utils
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau
import keras

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math

import tensorflow as tf

# Importing matplotlib to plot images.
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

# Importing SK-learn to calculate precision and recall
import sklearn
from sklearn import metrics
from sklearn.model_selection import train_test_split, cross_val_score, LeaveOneGroupOut
from sklearn.utils import shuffle 

# Used for graph export
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import graph_io
from keras import backend as K

import pickle as pkl
import h5py

from pathlib import Path
import os.path
import sys
import datetime
import time



In [None]:
import telegram
from keras.callbacks import Callback
from callbacks import TelegramCallback
from callbacks.TelegramData import TelegramData


# create callback
config = {
    'token': TelegramData.Token,   # paste your bot token
    'telegram_id': TelegramData.ID ,
    'model_name': "CNN_Cross"# paste your telegram_id
}

tg_callback = TelegramCallback(config)

In [None]:
class LoggingTensorBoard(TensorBoard):    

    def __init__(self, log_dir, settings_str_to_log, **kwargs):
        super(LoggingTensorBoard, self).__init__(log_dir, **kwargs)

        self.settings_str = settings_str_to_log

    def on_train_begin(self, logs=None):
        TensorBoard.on_train_begin(self, logs=logs)

        tensor =  tf.convert_to_tensor(self.settings_str)
        summary = tf.summary.text ("Run_Settings", tensor)

        with  tf.Session() as sess:
            s = sess.run(summary)
            self.writer.add_summary(s)

In [None]:
dfAll = pd.read_pickle("PklData/df_blobs.pkl")

In [None]:
dfAll.userID.unique()

In [None]:
# the data, split between train and test sets

def createDataSets(train_ids, test_ids):

    df_train = dfAll[dfAll.userID.isin(train_ids)]
    df_test = dfAll[dfAll.userID.isin(test_ids)]

    df_test = df_test.reset_index()
    df_train = df_train.reset_index()

    df_train2 = df_train[['Blobs', 'InputMethod']].copy()
    df_test2 = df_test[['Blobs', 'InputMethod']].copy()

    x_train = np.vstack(df_train2.Blobs)
    x_test = np.vstack(df_test2.Blobs)
    y_train = df_train2.InputMethod.values
    y_test = df_test2.InputMethod.values

    x_train = x_train.reshape(-1, 27, 15, 1)
    x_test = x_test.reshape(-1, 27, 15, 1)

    global num_classes
    num_classes = 2
    y_train_one_hot = utils.to_categorical(df_train2.InputMethod, num_classes)
    y_test_one_hot = utils.to_categorical(df_test2.InputMethod, num_classes)
    
    return (x_train, y_train_one_hot, x_test, y_test_one_hot)

In [None]:
# If GPU is not available: 
# GPU_USE = '/cpu:0'
# config = tf.ConfigProto(device_count = {"GPU": 0})


# If GPU is available: 
config = tf.ConfigProto()
config.log_device_placement = True
config.allow_soft_placement = True
config.gpu_options.allocator_type = 'BFC'

# Limit the maximum memory used
config.gpu_options.per_process_gpu_memory_fraction = 0.2

# set session config
tf.keras.backend.set_session(tf.Session(config=config))

In [None]:
def cnn_model():
    with(tf.device("/gpu:0")):
        optimizer = optimizers.Adam(lr=0.001)
        #optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.1)
        #init=tf.global_variables_initializer()

        model = Sequential()
        model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(27,15,1), kernel_regularizer=regularizers.l2(0.01)))
        model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.01)))
        model.add(BatchNormalization(axis=-1))
        model.add(MaxPooling2D(pool_size=(2,2), strides=None, padding='same', data_format='channels_last'))
        model.add(Dropout(0.50))

        model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.01)))
        model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.01)))
        model.add(BatchNormalization(axis=-1))
        model.add(MaxPooling2D(pool_size=(2,2), strides=None, padding='same', data_format='channels_last'))
        model.add(Dropout(0.50))

        model.add(Flatten())
        model.add(Dense(256, activation='relu', kernel_regularizer=keras.regularizers.L1L2(0.02, 0.15), use_bias=True))
        model.add(Dropout(0.55))
        model.add(Dense(128, activation='relu', kernel_regularizer=keras.regularizers.L1L2(0.02, 0.15), use_bias=True))
        model.add(Dropout(0.50))
        model.add(Dense(num_classes, activation='softmax'))

        ####TENSORBOARD
        config = ""
        for layer in model.layers:
            config += str(layer.output).split('\"')[1].split("/")[0] + str(layer.output_shape) + "\n\n"
        config += "batchsize: " + str(batch_size) + "\n\n" + "epochs: " + str(epochs) + "\n\n"
        #### END TENSORBOARD

        model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [None]:
tf.get_default_graph()

readable_timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
tensorflowfolder = "/srv/share/tensorboardfiles/KnuckleFinger_Robin_" + readable_timestamp

#set early stopping criteria
pat = 5 #this is the number of epochs with no improvment after which the training will stop

logger = LoggingTensorBoard(settings_str_to_log = config, log_dir=tensorflowfolder, histogram_freq=0, write_graph=True, write_images=True, update_freq = 'epoch')
model_checkpoint = ModelCheckpoint("./ModelSnapshots/KnuckleFinger_Robin_CrossValidation" + readable_timestamp + ".h5", monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', 
                                            patience=10, 
                                            verbose=1, 
                                            factor=0.95, 
                                            min_lr=0.00001)
storer = ModelCheckpoint("./ModelSnapshots/KnuckleFinger_Jan_" + readable_timestamp + ".h5", monitor='val_loss', verbose=0,
                         save_best_only=True, save_weights_only=False, mode='auto', period=1)

def fit_and_evaluate(x_train, y_train_one_hot, x_test, x_test_one_hot, EPOCHS=500, BATCH_SIZE=500):
    model = None
    model = cnn_model()
    model.summary()
    history = model.fit(x_train, y_train_one_hot,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    verbose=1,
                    validation_data=(x_test, y_test_one_hot),
                    callbacks=[tg_callback, model_checkpoint, learning_rate_reduction])
    
    #print("Val Score: ", model.evaluate(val_x, val_y))
    return history

In [None]:
n_folds = [3,4,7,8,9,11,16]
epochs=500
batch_size=500

#save the model history in a list after fitting so that we can plot later
model_history = []

readable_timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
result_file = open("CrossValid/results_%s.csv" % readable_timestamp, "a")
result_file.write("TrainIds;TestIds;Acc;Loss;Lr;ValAcc;ValLoss\n")
result_file.flush()

for i in n_folds:
    test_ids = [i]
    
    train_ids = np.setdiff1d(dfAll.userID.unique(), test_ids)
     
    print("Training on Fold: ",i, "\n test_ids: ", test_ids, "\n train_ids: ", train_ids)
    x_train, y_train_one_hot, x_test, y_test_one_hot = createDataSets(train_ids, test_ids)
    #t_x, val_x, t_y, val_y = train_test_split(x_train, y_train, test_size=0.1, random_state = np.random.randint(1,1000, 1)[0])
    ####END
    cur_hist = fit_and_evaluate(x_train, y_train_one_hot, x_test, y_test_one_hot, epochs, batch_size)
    model_history.append(cur_hist)
    
    hist_result = str(train_ids) + ";" + str(test_ids) + ";"
    hist_result += str(cur_hist.history["acc"]) + ";"
    hist_result += str(cur_hist.history["loss"]) + ";"
    hist_result += str(cur_hist.history["lr"]) + ";"
    hist_result += str(cur_hist.history["val_acc"]) + ";"
    hist_result += str(cur_hist.history["val_loss"]) + "\n"
    
    result_file.write(hist_result)
    result_file.flush()
    
    print("======="*12, end="\n\n\n")

result_file.close()

In [None]:
# use model for inference to get test accuracy
y_test_pred = model.predict(x_test)
y_test_pred = np.argmax(y_test_pred, axis=1)

print ('\n Summary of the precision, recall, F1 score for each class:')
print (sklearn.metrics.classification_report(y_test, y_test_pred))

print ('\n Confusion matrix: ')
print (sklearn.metrics.confusion_matrix(y_test, y_test_pred))