In [None]:
# FACT HRC (FACT-support): example ML model implementation
# Binary classification on handover timings (when to execute an action primitive):
# Predict whether or not a data instance is within 5s before start of episode / arm reaching to participant / arm tucking
# Use balanced dataset with randomly sampled not-init time steps

# import the required modules
from __future__ import print_function
import pandas as pd
import numpy as np
import csv
import datetime
import statistics
import itertools

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
tf.config.experimental.set_visible_devices(devices=gpus[0], device_type='GPU')
tf.config.experimental.set_memory_growth(device=gpus[0], enable=True)

import keras
from keras import backend as K
from keras.models import *
from keras.layers import *
from keras import Input, Model
from keras.callbacks import EarlyStopping
from keras.optimizers import Adamax, SGD, Adam
from keras import initializers
from keras.metrics import *
from keras import regularizers

from sklearn.metrics import confusion_matrix,f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize,LabelEncoder

import matplotlib.pyplot as plt
import seaborn as sns

# turn off the warnings, be careful when use this
#import warnings
#warnings.filterwarnings("ignore")

In [None]:
# hyper parameters
batch_size = 8 # for estimating error gradient
# number of total epochs to train the model
nb_epoch = 100
# to prevent over-fitting
early_stopping = EarlyStopping(monitor='val_loss', patience=20)
# label and feature columns in the episodic data
feat_cols = 48 # only use keypoints (x,y,z,conf) features
feat_cols_end = feat_cols + 100
# number of classes
nb_class = 2 # {'0', '1'}
# 5-fold CV
cv_list = ['cv_1.csv', 'cv_2.csv', 'cv_3.csv', 'cv_4.csv', 'cv_5.csv']

In [None]:
# reshape panda.DataFrame to Keras style: (batch_size, time_step, nb_features)
def reshape_data(data, n_prev):
    docX = []
    # add rows of 0s if there are too few rows for time step padding
    df0 = pd.DataFrame(0.0, index=np.arange(n_prev), columns=data.columns)
    if len(data) < n_prev:
        data = pd.concat([data,df0])
    # time step padding
    for i in range(len(data)):
        if i < (len(data)-n_prev):
            docX.append(data[i:i+n_prev])
        else: # the frames in the last window use the same context
            docX.append(data[(len(data)-n_prev):len(data)])
    alsX = np.array(docX)
    return alsX

# one-hot encoding of the class labels
def one_hot(labels):
    labels_converted = []
    for label in labels:
        if label == 0:
            label_converted = [1,0]
        elif label == 1:
            label_converted = [0,1]
        labels_converted.append(label_converted)
    labels_converted = np.asarray(labels_converted)
    return labels_converted

# construct data
def feature_read(df_file, feat_cols, label_col):
    # read in data
    data = pd.read_csv(df_file, header=0)
    
    # creating feature set
    x_all = data.iloc[:, feat_cols:feat_cols_end]

    # creating label arrays
    y_all = one_hot(data[label_col])
    
    return x_all, y_all

In [None]:
# define AlexNet model
# Network structure inspired by: https://github.com/eweill/keras-deepcv/blob/master/models/classification/alexnet.py
def Alex(X_trn, cnn_size=[128,256], fc_size=512, l1=0.01, nb_class=3):
    # Initialize model
    alexnet = Sequential()

    # Layer 1
    alexnet.add(Conv2D(cnn_size[0], (11, 11), input_shape=(X_trn.shape[1], X_trn.shape[2],1),
                       padding='same', kernel_regularizer=regularizers.l1_l2(l1=l1, l2=0.01)))
    alexnet.add(BatchNormalization())
    alexnet.add(Activation('relu'))
    alexnet.add(MaxPooling2D(pool_size=(2, 2)))

    # Layer 2
    alexnet.add(Conv2D(cnn_size[0], (5, 5), padding='same'))
    alexnet.add(BatchNormalization())
    alexnet.add(Activation('relu'))
    alexnet.add(MaxPooling2D(pool_size=(2, 2)))

    # Layer 3
    alexnet.add(ZeroPadding2D((1, 1)))
    alexnet.add(Conv2D(cnn_size[0], (3, 3), padding='same'))
    alexnet.add(BatchNormalization())
    alexnet.add(Activation('relu'))
    alexnet.add(MaxPooling2D(pool_size=(2, 2)))

    # Layer 4
    alexnet.add(ZeroPadding2D((1, 1)))
    alexnet.add(Conv2D(cnn_size[1], (3, 3), padding='same'))
    alexnet.add(BatchNormalization())
    alexnet.add(Activation('relu'))

    # Layer 5
    alexnet.add(ZeroPadding2D((1, 1)))
    alexnet.add(Conv2D(cnn_size[1], (3, 3), padding='same'))
    alexnet.add(BatchNormalization())
    alexnet.add(Activation('relu'))
    alexnet.add(MaxPooling2D(pool_size=(2, 2)))

    # Layer 6
    alexnet.add(ZeroPadding2D((1, 1)))
    alexnet.add(Conv2D(cnn_size[1], (3, 3), padding='same'))
    alexnet.add(BatchNormalization())
    alexnet.add(Activation('relu'))
    alexnet.add(MaxPooling2D(pool_size=(2, 2)))

    # Layer 7 (Dense)
    alexnet.add(Flatten())
    alexnet.add(Dense(fc_size))
    alexnet.add(BatchNormalization())
    alexnet.add(Activation('relu'))
    alexnet.add(Dropout(0.5))

    # Layer 8 (output)
    alexnet.add(Dense(nb_class))
    alexnet.add(BatchNormalization())
    alexnet.add(Activation('softmax'))

    return alexnet

In [None]:
# CNN 1D + Dense
def CNN1D(X_trn, cnn_size=[128,256], fc_size=512, l1=0.01, nb_class=3):
    # Initialize model
    cnn = Sequential()
    
    # input layer
    # X_trn.shape = (sampe number, time step, feature dimension)
    cnn.add(Input(shape=(X_trn.shape[1], X_trn.shape[2])))
    cnn.add(BatchNormalization()) # normalisation layer
    cnn.add(Dropout(rate=0.5)) # dropout layer
    
    # CNN layers
    cnn.add(Conv1D(filters=cnn_size[0], kernel_size=3, activation='relu', padding='same'))
    cnn.add(BatchNormalization())
    cnn.add(Conv1D(filters=cnn_size[0], kernel_size=3, activation='relu', padding='same'))
    cnn.add(BatchNormalization())
    cnn.add(Conv1D(filters=cnn_size[0], kernel_size=3, activation='relu', padding='same'))
    cnn.add(BatchNormalization())
    cnn.add(Conv1D(filters=cnn_size[1], kernel_size=3, activation='relu', padding='same'))
    cnn.add(BatchNormalization())
    cnn.add(Conv1D(filters=cnn_size[1], kernel_size=3, activation='relu', padding='same'))
    cnn.add(BatchNormalization())
    cnn.add(Conv1D(filters=cnn_size[1], kernel_size=3, activation='relu', padding='same'))
    cnn.add(BatchNormalization())

    # Dense layer
    cnn.add(Flatten())
    cnn.add(Dense(fc_size))
    cnn.add(BatchNormalization())
    cnn.add(Dropout(0.5))

    # ouptut layer
    cnn.add(Dense(nb_class))
    cnn.add(BatchNormalization())
    cnn.add(Activation('softmax'))

    return cnn

In [None]:
# CNN 1D + LSTM
def CNN_LSTM(X_trn, cnn_size=[128,256], fc_size=512, l1=0.01, nb_class=3):
    # Initialize model
    alexLSTM = Sequential()
    
    # input layer
    # X_trn.shape = (sampe number, time step, feature dimension)
    alexLSTM.add(Input(shape=(X_trn.shape[1], X_trn.shape[2], 1)))
    alexLSTM.add(BatchNormalization()) # normalisation layer
    alexLSTM.add(Dropout(rate=0.5)) # dropout layer
    
    # CNN layers
    alexLSTM.add(TimeDistributed(Conv1D(filters=cnn_size[0], kernel_size=3, activation='relu', padding='same')))
    alexLSTM.add(TimeDistributed(BatchNormalization()))
    alexLSTM.add(TimeDistributed(Conv1D(filters=cnn_size[0], kernel_size=3, activation='relu', padding='same')))
    alexLSTM.add(TimeDistributed(BatchNormalization()))
    alexLSTM.add(TimeDistributed(Conv1D(filters=cnn_size[0], kernel_size=3, activation='relu', padding='same')))
    alexLSTM.add(TimeDistributed(BatchNormalization()))
    alexLSTM.add(TimeDistributed(Conv1D(filters=cnn_size[1], kernel_size=3, activation='relu', padding='same')))
    alexLSTM.add(TimeDistributed(BatchNormalization()))
    alexLSTM.add(TimeDistributed(Conv1D(filters=cnn_size[1], kernel_size=3, activation='relu', padding='same')))
    alexLSTM.add(TimeDistributed(BatchNormalization()))
    alexLSTM.add(TimeDistributed(Conv1D(filters=cnn_size[1], kernel_size=3, activation='relu', padding='same')))
    alexLSTM.add(TimeDistributed(BatchNormalization()))

    # LSTM layer
    alexLSTM.add(TimeDistributed(Flatten()))
    alexLSTM.add(LSTM(units=fc_size, kernel_regularizer=regularizers.L1L2(l1=l1,l2=0.0)))
    alexLSTM.add(BatchNormalization())
    alexLSTM.add(Dropout(0.5))

    # ouptut layer
    alexLSTM.add(Dense(nb_class))
    alexLSTM.add(BatchNormalization())
    alexLSTM.add(Activation('softmax'))

    return alexLSTM

In [None]:
# FC + LSTM
def FC_LSTM(X_trn, cnn_size=[128,256], fc_size=512, l1=0.01, nb_class=3):
    # Initialize model
    FClstm = Sequential()
    
    # input layer
    # X_trn.shape = (sampe number, time step, feature dimension)
    FClstm.add(Input(shape=(X_trn.shape[1], X_trn.shape[2], 1)))
    FClstm.add(BatchNormalization()) # normalisation layer
    FClstm.add(Dropout(rate=0.5)) # dropout layer

    # Layer 1
    FClstm.add(TimeDistributed(Dense(cnn_size[0])))
    FClstm.add(TimeDistributed(BatchNormalization()))
    FClstm.add(TimeDistributed(Activation('relu')))

    # Layer 2
    FClstm.add(TimeDistributed(Dense(cnn_size[0])))
    FClstm.add(TimeDistributed(BatchNormalization()))
    FClstm.add(TimeDistributed(Activation('relu')))

    # Layer 3
    FClstm.add(TimeDistributed(Dense(cnn_size[0])))
    FClstm.add(TimeDistributed(BatchNormalization()))
    FClstm.add(TimeDistributed(Activation('relu')))

    # Layer 4
    FClstm.add(TimeDistributed(Dense(cnn_size[1])))
    FClstm.add(TimeDistributed(BatchNormalization()))
    FClstm.add(TimeDistributed(Activation('relu')))

    # Layer 5
    FClstm.add(TimeDistributed(Dense(cnn_size[1])))
    FClstm.add(TimeDistributed(BatchNormalization()))
    FClstm.add(TimeDistributed(Activation('relu')))

    # Layer 6
    FClstm.add(TimeDistributed(Dense(cnn_size[1])))
    FClstm.add(TimeDistributed(BatchNormalization()))
    FClstm.add(TimeDistributed(Activation('relu')))
    FClstm.add(TimeDistributed(Dropout(0.5)))

    # Layer 7 (LSTM)
    FClstm.add(TimeDistributed(Flatten()))
    FClstm.add(LSTM(units=fc_size, kernel_regularizer=regularizers.L1L2(l1=l1,l2=0.0)))
    FClstm.add(BatchNormalization())
    FClstm.add(Dropout(0.5))

    # Layer 8 (output)
    FClstm.add(Dense(nb_class))
    FClstm.add(BatchNormalization())
    FClstm.add(Activation('softmax'))

    return FClstm

In [None]:
# use fully connected dense layers instead of CNN2D in AlexNet structure
def FC_Alex(X_trn, cnn_size=[128,256], fc_size=512, l1=0.01, nb_class=3):
    # Initialize model
    FCnet = Sequential()

    # Layer 1
    FCnet.add(Dense(cnn_size[0], input_shape=[X_trn.shape[1]], kernel_regularizer=regularizers.l1_l2(l1=l1, l2=0.01)))
    FCnet.add(BatchNormalization())
    FCnet.add(Activation('relu'))

    # Layer 2
    FCnet.add(Dense(cnn_size[0]))
    FCnet.add(BatchNormalization())
    FCnet.add(Activation('relu'))

    # Layer 3
    FCnet.add(Dense(cnn_size[0]))
    FCnet.add(BatchNormalization())
    FCnet.add(Activation('relu'))

    # Layer 4
    FCnet.add(Dense(cnn_size[1]))
    FCnet.add(BatchNormalization())
    FCnet.add(Activation('relu'))

    # Layer 5
    FCnet.add(Dense(cnn_size[1]))
    FCnet.add(BatchNormalization())
    FCnet.add(Activation('relu'))

    # Layer 6
    FCnet.add(Dense(cnn_size[1]))
    FCnet.add(BatchNormalization())
    FCnet.add(Activation('relu'))
    FCnet.add(Dropout(0.5))

    # Layer 7 (Dense)
    FCnet.add(Dense(fc_size))
    FCnet.add(BatchNormalization())
    FCnet.add(Activation('relu'))
    FCnet.add(Dropout(0.5))

    # Layer 8 (output)
    FCnet.add(Dense(nb_class))
    FCnet.add(BatchNormalization())
    FCnet.add(Activation('softmax'))

    return FCnet

In [None]:
# evaluate model performance and print results
def model_eval(model, X_tst, Y_tst, log_f, batch_size, label_time_pad=False):
    # evaluate model on testing set
    loss, acc = model.evaluate(X_tst, Y_tst, batch_size=batch_size, verbose=1)
    y_tst_pred = model.predict(X_tst)
    
    # get confusion matrix and metrics
    # when the labels are time-padded
    if label_time_pad:
        y_tst_non_category = [ np.argmax(t[0]) for t in Y_tst ]
        y_tst_predict_non_category = [ np.argmax(t[0]) for t in y_tst_pred ]
    else:
        y_tst_non_category = np.argmax(Y_tst, axis=1)
        y_tst_predict_non_category = np.argmax(y_tst_pred, axis=1)
    
    print('\nConfusion Matrix on test set')
    print(confusion_matrix(y_tst_non_category, y_tst_predict_non_category).astype(int))
    tst_f1 = f1_score(y_tst_non_category, y_tst_predict_non_category, average='weighted')
    tst_f1_unweighted = f1_score(y_tst_non_category, y_tst_predict_non_category, average='macro')
    print(f'\nWeighted F1-score on test set = {tst_f1:.3%}')
    print(f'\nUnweighted F1-score on test set = {tst_f1_unweighted:.3%}')
    print(f'\naccuracy on test set = {acc:.3%}')
    
    # print to log files
    with open(log_f, 'a') as logfile:
        logfile.write('Confusion Matrix on test set\n')
        logfile.write(str(confusion_matrix(y_tst_non_category, y_tst_predict_non_category).astype(int)))
        logfile.write(f'\nWeighted F1-score on test set = {tst_f1:.3%}')
        logfile.write(f'\nUnweighted F1-score on test set = {tst_f1_unweighted:.3%}')
        logfile.write(f'\naccuracy on test set = {acc:.3%}')
    
    return tst_f1, tst_f1_unweighted, acc

In [None]:
# get the best parameter set
def GS_summary(cv_list, para_list):
    # find the highest score
    best = cv_list.index(max(cv_list))
    best_count = best + 1
    result = cv_list[best]
    para = para_list[best]
    return best_count, result, para

In [None]:
# Grid search training with 5-fold CV
def Training_GS_CV(model_s, c_mode_trn, c_mode_tst, para_list, total, file_log, model_name, cv_list, cb=True, save_model=False):
    # lists to store metrics
    f1_cv_list_weighted = []
    f1_cv_list_unweighted = []
    acc_cv_list = []
    count = 1

    # print data configuration
    print(f'========= Model: {model_s}; Data config: {c_mode_trn} as trn and {c_mode_tst} as tst =========\n')
    with open(file_log, 'a') as outfile:
        outfile.write(f'========= Model: {model_s}; Data config: {c_mode_trn} as trn and {c_mode_tst} as tst =========\n')
    
    # grid search with given parameter list
    for para in para_list:
        # parameters being used in this iteration
        time_step = para[0]
        cnn_size = para[1]
        fc_size = para[2]
        l1 = para[3]
        lr = para[4]

        # print information about the run
        print(f'\n------------------ No. {count} of {total} ------------------')
        print(f'\nHyperparameters: l1_weight = {l1}, learning_rate = {lr}')
        print(f'\nParameters: time_step = {time_step}, cnn_size = {cnn_size}, fc_size = {fc_size}')
        with open(file_log, 'a') as outfile:
            outfile.write(f'\n------------------ No. {count} of {total} ------------------')
            outfile.write(f'\nHyperparameters: l1_weight = {l1}, learning_rate = {lr}')
            outfile.write(f'\nParameters: time_step = {time_step}, cnn_size = {cnn_size}, fc_size = {fc_size}')

        # 5-fold cross-validation
        # for computing cv average
        f1_weighted = []
        f1_unweighted = []
        acc = []
        f1_weighted_avg = 0.0
        f1_unweighted_avg = 0.0
        acc_avg = 0.0
        f1_weighted_std = 0.0
        f1_unweighted_std = 0.0
        acc_std = 0.0

        # loop over the 5 folds
        for cv in cv_list:
            # input data files
            df_file_trn = 'data/ML/combined_' + c_mode_trn + '_ML_trn_' + cv
            df_file_tst = 'data/ML/combined_' + c_mode_tst + '_ML_tst_' + cv

            # information about the cross-validation
            print(f'\n=== fold: {cv} ===\n')
            with open(file_log, 'a') as outfile:
                outfile.write(f'\n=== fold: {cv} ===\n')

            # read in data
            x_all_trn, y_all_trn = feature_read(df_file_trn, feat_cols, label_col)
            x_all_tst, y_all_tst = feature_read(df_file_tst, feat_cols, label_col)

            # building selected model
            if model_s == 'Alex':
                # time step padding
                X_trn = reshape_data(x_all_trn, time_step)
                X_tst = reshape_data(x_all_tst, time_step)
                # reshape data for Conv2D: https://stackoverflow.com/a/43897173
                X_trn = X_trn.reshape(X_trn.shape[0],X_trn.shape[1],X_trn.shape[2],1)
                X_tst = X_tst.reshape(X_tst.shape[0],X_tst.shape[1],X_tst.shape[2],1)
                # no time step padding for labels
                Y_trn = y_all_trn
                Y_tst = y_all_tst
                label_time_pad = False
                # build model
                model = Alex(X_trn, cnn_size, fc_size, l1, nb_class)
            elif model_s == 'CNN1D':
                # time step padding for LSTM
                X_trn = reshape_data(x_all_trn, time_step)
                X_tst = reshape_data(x_all_tst, time_step)
                Y_trn = y_all_trn
                Y_tst = y_all_tst
                label_time_pad = False
                # build model
                model = CNN1D(X_trn, cnn_size, fc_size, l1, nb_class)
            elif model_s == 'CNN_LSTM':
                # time step padding for LSTM
                X_trn = reshape_data(x_all_trn, time_step)
                X_tst = reshape_data(x_all_tst, time_step)
                Y_trn = y_all_trn
                Y_tst = y_all_tst
                label_time_pad = False
                # build model
                model = CNN_LSTM(X_trn, cnn_size, fc_size, l1, nb_class)
            elif model_s == 'FC_LSTM':
                # time step padding for LSTM
                X_trn = reshape_data(x_all_trn, time_step)
                X_tst = reshape_data(x_all_tst, time_step)
                Y_trn = y_all_trn
                Y_tst = y_all_tst
                label_time_pad = False
                # build model
                model = FC_LSTM(X_trn, cnn_size, fc_size, l1, nb_class)
            elif model_s == 'FC_Alex':
                # no reshaping in fully connected
                X_trn = x_all_trn
                X_tst = x_all_tst
                Y_trn = y_all_trn
                Y_tst = y_all_tst
                label_time_pad = False
                model = FC_Alex(X_trn, cnn_size, fc_size, l1, nb_class)
            else:
                print('Unknown model!')

            # training
            opt_func = Adam(learning_rate=lr)
            model.compile(optimizer=opt_func, loss='binary_crossentropy', metrics=['accuracy']) # binary classification
            model.summary()
            with open(file_log, 'a') as outfile:
                model.summary(print_fn=lambda x: outfile.write(x + '\n'))
            if cb: # with early stopping callback
                hist = model.fit(X_trn, Y_trn, batch_size=batch_size, epochs=nb_epoch, 
                                 callbacks=[early_stopping], verbose=1, validation_data=(X_tst, Y_tst))
            else: # no early stopping
                hist = model.fit(X_trn, Y_trn, batch_size=batch_size, epochs=nb_epoch, 
                                 verbose=1, validation_data=(X_tst, Y_tst))


            # evaluation
            tst_f1, tst_f1_unweighted, tst_acc = model_eval(model, X_tst, Y_tst, file_log, batch_size, label_time_pad)
            f1_weighted.append(tst_f1)
            f1_unweighted.append(tst_f1_unweighted)
            acc.append(tst_acc)
            # saving model.h5 or not
            if save_model:
                model.save(model_name)
                

            # visualise the training
            fig = plt.figure()
            plt.subplot(2,1,1)
            plt.plot(hist.history['accuracy'])
            plt.plot(hist.history['val_accuracy'])
            plt.title('model accuracy')
            plt.ylabel('accuracy')
            plt.xlabel('epoch')
            plt.legend(['train', 'test'], loc='lower right')

            plt.subplot(2,1,2)
            plt.plot(hist.history['loss'])
            plt.plot(hist.history['val_loss'])
            plt.title('model loss')
            plt.ylabel('loss')
            plt.xlabel('epoch')
            plt.legend(['train', 'test'], loc='upper right')

            plt.tight_layout()
            plt.show()

        if len(cv_list) > 1: # running CV
            # calculate CV average and std
            f1_weighted_avg = statistics.mean(f1_weighted)
            f1_unweighted_avg = statistics.mean(f1_unweighted)
            acc_avg = statistics.mean(acc)
            f1_weighted_std = statistics.stdev(f1_weighted)
            f1_unweighted_std = statistics.stdev(f1_unweighted)
            acc_std = statistics.stdev(acc)
            print(f'\n*** CV Summary (No. {count} of {total}) ***\n')
            print(f'\nHyperparameters: batch_size = {batch_size}, learning_rate = {lr}')
            print(f'\nParameters: time_step = {time_step}, cnn_size = {cnn_size}, fc_size = {fc_size}')
            print(
            f'\nMean and std of all metrics for predicting {label_col}:\n \
            Accuracy.avg = {acc_avg:.3%}, Accuracy.std = {acc_std:.3%}\n \
            F1_weighted.avg = {f1_weighted_avg:.3%}, F1_weighted.std = {f1_weighted_std:.3%}\n \
            F1_unweighted.avg = {f1_unweighted_avg:.3%}, F1_unweighted.std = {f1_unweighted_std:.3%}\n')
            print('------------------------------------\n\n')
            with open(file_log, 'a') as outfile:
                outfile.write(f'\n*** CV Summary (No. {count} of {total}) ***\n')
                outfile.write(f'\nHyperparameters: batch_size = {batch_size}, learning_rate = {lr}')
                outfile.write(f'\nParameters: time_step = {time_step}, cnn_size = {cnn_size}, fc_size = {fc_size}')
                outfile.write(
                f'\nMean and std of all metrics for predicting {label_col}:\n \
                Accuracy.avg = {acc_avg:.3%}, Accuracy.std = {acc_std:.3%}\n \
                F1_weighted.avg = {f1_weighted_avg:.3%}, F1_weighted.std = {f1_weighted_std:.3%}\n \
                F1_unweighted.avg = {f1_unweighted_avg:.3%}, F1_unweighted.std = {f1_unweighted_std:.3%}\n')
                outfile.write('------------------------------------\n\n')
        else: # not running cv (e.g., train-test split for once) then don't compute cv mean and std
            f1_weighted_avg = f1_weighted[0]
            f1_unweighted_avg = f1_unweighted[0]
            acc_avg = acc[0]
            print(f'\n*** CV Summary (No. {count} of {total}) ***\n')
            print(f'\nHyperparameters: batch_size = {batch_size}, learning_rate = {lr}')
            print(f'\nParameters: time_step = {time_step}, cnn_size = {cnn_size}, fc_size = {fc_size}')
            print(
            f'\nAll metrics for predicting {label_col}:\n \
            Accuracy = {acc_avg:.3%}, F1_weighted = {f1_weighted_avg:.3%}, F1_unweighted = {f1_unweighted_avg:.3%}\n')
            print('------------------------------------\n\n')
            with open(file_log, 'a') as outfile:
                outfile.write(f'\n*** CV Summary (No. {count} of {total}) ***\n')
                outfile.write(f'\nHyperparameters: batch_size = {batch_size}, learning_rate = {lr}')
                outfile.write(f'\nParameters: time_step = {time_step}, cnn_size = {cnn_size}, fc_size = {fc_size}')
                outfile.write(
                f'\nAll metrics for predicting {label_col}:\n \
                Accuracy = {acc_avg:.3%}, F1_weighted = {f1_weighted_avg:.3%}, F1_unweighted = {f1_unweighted_avg:.3%}\n')
                outfile.write('------------------------------------\n\n')

        f1_cv_list_weighted.append(f1_weighted_avg)
        f1_cv_list_unweighted.append(f1_unweighted_avg)
        acc_cv_list.append(acc_avg)
        count = count + 1

    return f1_cv_list_weighted, f1_cv_list_unweighted, acc_cv_list

In [None]:
# lists of available models and classification tasks
model_s_list = ['Alex', 'CNN_LSTM', 'CNN1D', 'FC_LSTM', 'FC_Alex']
classifier_list = ['arm', 'base', 'fin']

# train all models in a loooooong loop
for classifier in classifier_list:
    for model_s in model_s_list:
        # label column to use
        label_col = classifier + ' init'
        # datasets to use
        c_mode_trn = 'ep_fold_' + classifier # specify training data
        c_mode_tst = 'ep_fold_' + classifier # specify testing data
        
        cb = True # use early stopping in training
        save_model = False # don't save the model

        # timestamp
        time_stamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')

        # parameters to be investigated in grid seearch
        time_steps = [5]  # list of input history lengths to test
        cnn_sizes = [[8,16]] # CNN filter sizes
        fc_sizes = [8] # FC layer sizes
        l1s = [0.01] # l1 regularizer weights
        lrs = [0.0001] # optimizer learning rates

        total = len(time_steps) * len(cnn_sizes) * len(fc_sizes) * len(l1s) * len(lrs) # total number of combos
        para_list = list(itertools.product(time_steps, cnn_sizes, fc_sizes, l1s, lrs))

        run_id = 'exp/' + model_s + '_' + c_mode_trn + '_' + time_stamp
        file_log = run_id + '_log.txt'
        model_name = run_id + '_model.h5'
        
        # run main loop to perform 5-fold CV on the selected classification task using the selected model
        f1_cv_list_weighted, f1_cv_list_unweighted, acc_cv_list = Training_GS_CV(model_s, c_mode_trn, c_mode_tst, para_list, 
                                                                                 total, file_log, model_name, cv_list, 
                                                                                 cb, save_model)