# Hyperparameter search for LSTM

In [None]:
import os
import sys
from pathlib import Path

PROJECT_DIRECTORY = Path(os.path.abspath('')).resolve().parents[0]
sys.path.extend([str(PROJECT_DIRECTORY)])

DATA_DIRECTORY = Path(os.path.abspath('')).resolve().parents[2]
sys.path.extend([str(DATA_DIRECTORY)])

print(f'Python {sys.version} on {sys.platform}')

In [None]:
import keras_tuner as kt
import tensorflow as tf
import numpy as np

import utilities

In [None]:
NORMALIZATION = False
STANDARDIZATION = False
ONEHOTENCODING = False
CONVLSTM = False

COMPRESS = True
COMPRESSION_SIZE = 128

In [None]:
# Load and split dataset
data, labels, fids, velocities, angles = utilities.load_dataset(DATA_DIRECTORY / 'data/data_adp.pkl')
train_idx, test_idx, train_data, test_data = utilities.split_dataset(fids, labels, data)
FILENAME = "dataset"

# Clear
del data

# Preprocessing
if NORMALIZATION:
    FILENAME += "_norm"
    utilities.normalize_data(train_data)
    utilities.normalize_data(test_data)
elif STANDARDIZATION:
    FILENAME += "_stand"
    utilities.standardize_data(train_data)
    utilities.standardize_data(test_data)

if COMPRESS:
    FILENAME += f"_comp{COMPRESSION_SIZE}"
    utilities.compress_data(train_data, COMPRESSION_SIZE)
    utilities.compress_data(test_data, COMPRESSION_SIZE)

if CONVLSTM:
    FILENAME += "_convLSTM"
    train_data = np.transpose(train_data, (0,2,1))
    test_data = np.transpose(test_data, (0,2,1))

    train_data = train_data.reshape((len(train_data),1,COMPRESSION_SIZE,6))
    test_data = test_data.reshape((len(test_data),1,COMPRESSION_SIZE,6))

train_labels = labels[train_idx]
test_labels = labels[test_idx]

train_dataset = tf.data.Dataset.from_tensor_slices((train_data,train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_data,test_labels))

# Generate mini batches
BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = len(train_dataset)

train_dataset_batch = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
test_dataset_batch = test_dataset.batch(BATCH_SIZE, drop_remainder=True)


In [None]:
print(len(train_dataset))
for d, l in train_dataset.take(1):
  print('data.shape: ', d.shape)
  print('labels.shape: ', l.shape)

print(len(train_dataset_batch))
for d, l in train_dataset_batch.take(1):
  print('data.shape: ', d.shape)
  print('labels.shape: ', l.shape)

LEN_SAMPLE = d.shape[1]

### 3x LSTM-Layer

In [None]:
# 3 LSTM-Layer

def model_builder(hp):
  model = tf.keras.Sequential()

  hp_units_lstm1 = hp.Int('lstm_units_1', min_value=16, max_value=256, step=32)
  model.add(tf.keras.layers.LSTM(hp_units_lstm1, 
                                 recurrent_initializer='glorot_uniform', 
                                 recurrent_activation='sigmoid',
                                 return_sequences=True, 
                                 stateful=True))
  
  if hp.Boolean('dropout_1'):
    hp_dropout1_faktor = hp.Choice('dropout1_faktor', values=[0.2, 0.4, 0.6])
    model.add(tf.keras.layers.Dropout(hp_dropout1_faktor))
                        

  hp_units_lstm2 = hp.Int('lstm_units_2', min_value=16, max_value=256, step=32)
  model.add(tf.keras.layers.LSTM(hp_units_lstm2, 
                                 recurrent_initializer='glorot_uniform', 
                                 recurrent_activation='sigmoid',
                                 return_sequences=True, 
                                 stateful=True))
  
  if hp.Boolean('dropout_2'):
    hp_dropout2_faktor = hp.Choice('dropout2_faktor', values=[0.2, 0.4, 0.6])
    model.add(tf.keras.layers.Dropout(hp_dropout2_faktor))

  hp_units_lstm3 = hp.Int('lstm_units_3', min_value=16, max_value=256, step=32)
  model.add(tf.keras.layers.LSTM(hp_units_lstm3, 
                                 recurrent_initializer='glorot_uniform', 
                                 recurrent_activation='sigmoid',
                                 return_sequences=False, 
                                 stateful=True))
  
  if hp.Boolean('dropout_3'):
    hp_dropout3_faktor = hp.Choice('dropout3_faktor', values=[0.2, 0.4, 0.6])
    model.add(tf.keras.layers.Dropout(hp_dropout3_faktor))
    

  hp_units_postprocession_dense = hp.Int('postprocession_dense_units', min_value=16, max_value=128, step=16)
  hp_activation_Dense = hp.Choose('activiation_dense', ['relu', 'tanh', 'sigmoid'])
  model.add(tf.keras.layers.Dense(hp_units_postprocession_dense, 
                                  activation=hp_activation_Dense))


  hp_dropout_faktor = hp.Choice('dropout', values=[0.2, 0.4, 0.6])
  model.add(tf.keras.layers.Dropout(hp_dropout_faktor))


  model.add(tf.keras.layers.Dense(4, activation='softmax'))

  # Tune Learning-Rate
  hp_learning_rate = hp.Choice('learning_rate', values=[1e-3, 1e-4])

  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')])

  return model

### 2x LSTM-Layer

In [None]:
# 2 LSTM-Layer

def model_builder(hp):
  model = tf.keras.Sequential()

  hp_units_preprocession_dense = hp.Int('preprocession_dense_units', min_value=64, max_value=128, step=32)
  model.add(tf.keras.layers.Dense(hp_units_preprocession_dense, 
                                  activation='relu'))
  if hp.Boolean('lstm_1'):
    hp_units_lstm1 = hp.Int('lstm_units_1', min_value=64, max_value=128, step=32)
    model.add(tf.keras.layers.LSTM(hp_units_lstm1, 
                                    recurrent_initializer='glorot_uniform', 
                                    recurrent_activation='sigmoid',
                                    return_sequences=True, 
                                    stateful=True))
    


  hp_units_lstm2 = hp.Int('lstm_units_2', min_value=64, max_value=128, step=32)
  model.add(tf.keras.layers.LSTM(hp_units_lstm2, 
                                    recurrent_initializer='glorot_uniform', 
                                    recurrent_activation='sigmoid',
                                    return_sequences=False, 
                                    stateful=True))

  if hp.Boolean('dropout_1'):
    hp_dropout1_faktor = hp.Choice('dropout1_faktor', values=[0.2, 0.4, 0.6])
    model.add(tf.keras.layers.Dropout(hp_dropout1_faktor))
                            

  hp_units_postprocession_dense = hp.Int('postprocession_dense_units', min_value=32, max_value=64, step=32)
  hp_activation_Dense = hp.Choice('activiation_dense', ['relu', 'tanh', 'sigmoid'])
  model.add(tf.keras.layers.Dense(hp_units_postprocession_dense, 
                                  activation=hp_activation_Dense))


  hp_dropout_faktor = hp.Choice('dropout', values=[0.2, 0.4, 0.6])
  model.add(tf.keras.layers.Dropout(hp_dropout_faktor))


  model.add(tf.keras.layers.Dense(4, activation='softmax'))

  hp_learning_rate = hp.Choice('learning_rate', values=[1e-3, 1e-4])

  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')])

  return model

### 1x LSTM-Layer

In [None]:
# 1 LSTM-Layer

def model_builder(hp):
  model = tf.keras.Sequential()

  hp_units_preprocession_dense = hp.Int('preprocession_dense_units', min_value=16, max_value=256, step=32)
  model.add(tf.keras.layers.Dense(hp_units_preprocession_dense, 
                                  activation='relu'))

  hp_units_lstm1 = hp.Int('lstm_units_1', min_value=64, max_value=256, step=32)
  model.add(tf.keras.layers.LSTM(hp_units_lstm1, 
                                 recurrent_initializer='glorot_uniform', 
                                 recurrent_activation='sigmoid',
                                 return_sequences=False, 
                                 stateful=True))
  
  if hp.Boolean('dropout_1'):
    hp_dropout1_faktor = hp.Choice('dropout1_faktor', values=[0.2, 0.4, 0.6])
    model.add(tf.keras.layers.Dropout(hp_dropout1_faktor))
                            
  hp_units_postprocession_dense = hp.Int('postprocession_dense_units', min_value=64, max_value=256, step=32)
  hp_activation_Dense = hp.Choice('activiation_dense', ['relu', 'tanh', 'sigmoid'])
  model.add(tf.keras.layers.Dense(hp_units_postprocession_dense, 
                                  activation=hp_activation_Dense))

  hp_dropout_faktor = hp.Choice('dropout', values=[0.2, 0.4, 0.6])
  model.add(tf.keras.layers.Dropout(hp_dropout_faktor))


  model.add(tf.keras.layers.Dense(4, activation='softmax'))

  hp_learning_rate = hp.Choice('learning_rate', values=[1e-3, 1e-4])

  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')])

  return model

In [None]:
# Choose hyper param search algorithm
HYPERBAND_TUNER = True
RANDOM_TUNER = False
BAYESIAN_TUNER = False
EPOCHS = 10

SEARCH = "Meta"
TRIAL = 10
PROJECT_NAME = f"lstm_{BATCH_SIZE}_{FILENAME}_{SEARCH}_{TRIAL}"

In [None]:
# Use Hyperband first -> searches for good settings over 2 Epochs and train the best ones again, 
# chooses parameters also just randomly

if HYPERBAND_TUNER:
    DIRECTORY = "hyper-search"

    tuner = kt.Hyperband(
        model_builder,
        objective='val_accuracy',
        max_epochs=10,
        factor=2,
        directory=DIRECTORY,
        project_name=PROJECT_NAME
    )

# Uses random combination of parameters (against Grid-Search, which takes every single combination)

if RANDOM_TUNER:
    DIRECTORY = "random-search"

    tuner = kt.RandomSearch(
        hypermodel=model_builder,
        objective="val_accuracy",
        max_trials=3,
        executions_per_trial=2,
        overwrite=True,
        directory=DIRECTORY,
        project_name=PROJECT_NAME
    )

# Just takes the first few parameters at random, after that takes the best performing ones to continue

if BAYESIAN_TUNER:
    DIRECTORY = "bayesian-search"

    tuner = kt.BayesianOptimization(
        hypermodel=model_builder,
        objective="val_accuracy",
        max_trials=40,
        directory=DIRECTORY,
        project_name=PROJECT_NAME,
    )

In [None]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=DIRECTORY + '/tb_logs_' + PROJECT_NAME, histogram_freq=1)
tuner.search(train_dataset_batch,
             epochs=EPOCHS,
             callbacks=[stop_early, tensorboard_callback],
             validation_data=test_dataset_batch)

In [None]:
best_hps = tuner.get_best_hyperparameters(num_trials=5)[0]
print(best_hps.values)

tuner.results_summary(num_trials=2)

In [None]:
best_model = tuner.get_best_models(num_models=1)
# Model evaluate
test_loss, test_acc = best_model.evaluate(test_dataset_batch)
print('Test accuracy:', test_acc)
best_model.summary()

In [None]:
best_model.save(PROJECT_DIRECTORY / 'models/2lstm128')

In [None]:
# Choose best model
best_hps = tuner.get_best_hyperparameters(2)
best_model = model_builder(best_hps[0])

In [None]:
tf.keras.backend.clear_session()