In [None]:
%reload_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np

from datetime import datetime, timedelta

import tensorflow as tf
from tensorflow import keras
import keras_tuner as kt
from keras_tuner import RandomSearch, HyperModel, Hyperband, HyperParameters
from tensorflow.keras.callbacks import EarlyStopping

from DataEncoder import encode_pad_event_concurr, encode_pad_sequence, encode_y
from BaseLSTM import BaseLSTMModel, print_best_hp
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
event = pd.read_csv("../input/BPI12f_Combin_Feature.csv")

In [None]:
sequence = event[['case:concept:name','case:AMOUNT_REQ','result']].groupby(['case:concept:name']).first()
sequence = sequence.reset_index()

In [None]:
cat_col_event = ['pre_activity', 'StartRes']
num_col_event = ['Duration']
case_index = 'case:concept:name'
start_time_col = 'StartTime'

event_encode = encode_pad_event_concurr(event, cat_col_event, num_col_event, case_index, start_time_col, cat_mask = True, num_mask = True, eos = False)

In [None]:
cat_col_seq = []
num_col_seq = ['case:AMOUNT_REQ']
sequence_encode = encode_pad_sequence(sequence, cat_col_seq, num_col_seq)

In [None]:
y_col = sequence.result
y_encode = encode_y(y_col)

In [None]:
# Define the input shape (sequence_length, number_of_features)
event_input_shape = (event_encode.shape[1], event_encode.shape[2])

# Define the number of classes 
num_classes = y_encode.shape[1]

# number of sequence features
num_sequence_features = sequence_encode.shape[1]

In [None]:
 # Include batch size as a hyperparameter to be tuned
hp_b = HyperParameters()
batch_size = hp_b.Choice('batch_size', values=[16, 32, 64, 128])

In [None]:
#Initialize the hypermodel
hypermodel = BaseLSTMModel(event_input_shape=event_input_shape,
                            num_sequence_features= num_sequence_features,
                            num_classes=num_classes)
early_stopping = EarlyStopping(monitor='val_loss', patience= 3, restore_best_weights=True)

In [None]:
tuner_band = Hyperband(hypermodel,
                       objective='val_accuracy',
                       max_epochs=200,
                       factor=3,
                       directory='hparam_tuning',
                       overwrite=False, #if resume tuner, keep overwrite = False
                       project_name='classfication_2levelfeature_bpi12',
                       hyperparameters=hp_b)

tuner_band.search(x=[event_encode, sequence_encode], y = y_encode,\
                  epochs=200, validation_split=0.2, callbacks=[early_stopping])

# Get the optimal hyperparameters
best_hps_band = tuner_band.get_best_hyperparameters(num_trials=1)[0]

# Get the best model
best_model_band = tuner_band.get_best_models(num_models=1)[0]


In [None]:
# alternatively, build the model with the optimal hyperparameters 
# best_model_band  = band_tuner.hypermodel.build(best_hps_band)
history_band = best_model_band.fit([event_encode, sequence_encode],y_encode, epochs=200, batch_size=best_hps_band.get('batch_size'), validation_split=0.2, callbacks=[early_stopping])

val_acc_per_epoch_band = history_band.history['val_accuracy']
best_epoch_band = val_acc_per_epoch_band.index(max(val_acc_per_epoch_band)) + 1
print('Best epoch: %d' % (best_epoch_band,))
print('Best Accuracy: ', max(val_acc_per_epoch_band))

# Evaluate the best model
#loss, accuracy = best_model_band.evaluate([event_encode, sequence_encode],y_encode)
#print('Test accuracy:', accuracy)

print_best_hp(best_hps_band)