# Load Data

In [1]:
import pandas as pd
import os

In [2]:
all_data = []
if os.getcwd().split("\\")[len(os.getcwd().split("\\")) - 1] != "data":
    os.chdir('./data')

for f in os.listdir():
    # print(f)
    part_id = f.split("_")[1].split(".")[0]
    # print(part_id)
    df = pd.read_csv(f, sep=";")
    df["part_id"] = part_id
    all_data.append(df)

In [3]:
print(len(all_data))
all_data[0].head()

60


Unnamed: 0,timestamp,active,xPos,yPos,TargetX,StartX,TunnelAmplitude,TunnelWidth,YDistanceToTop,YDistanceToBottom,part_id
0,1652861652030,0,0,0,79.5,1286.5,1207,57,-284.0,341.0,12299398
1,1652861652056,0,0,0,79.5,1286.5,1207,57,-284.0,341.0,12299398
2,1652861652116,0,676,593,79.5,1286.5,1207,57,237.5,-180.5,12299398
3,1652861652128,0,676,593,79.5,1286.5,1207,57,237.5,-180.5,12299398
4,1652861652140,0,676,593,79.5,1286.5,1207,57,237.5,-180.5,12299398


# Preprocess Data

## Normalize by Participants Displaysize

In [3]:
import copy

In [4]:
def normalize_by_screen(datapoint, screenwidth, screenheight):
    normalized = copy.deepcopy(datapoint)
    normalized["xPos"] /= screenwidth
    normalized["TargetX"] /= screenwidth
    normalized["StartX"] /= screenwidth
    normalized["TunnelAmplitude"] /= screenwidth
    normalized["yPos"] /= screenheight
    normalized["TunnelWidth"] /= screenheight
    normalized["YDistanceToTop"] /= screenheight
    normalized["YDistanceToBottom"] /= screenheight
    return normalized

In [5]:
def get_participants_screensize(datapoint):
    x_res = -1
    y_res = -1
    # print(datapoint)
    curr_part_id = datapoint["part_id"][0]
    # print(part_id)
    for f in os.listdir():
        curr_id = f.split("_")[1].split(".")[0]
        if curr_id == curr_part_id:
            demographic = pd.read_csv(f, sep=";")
            real_res_string = demographic["Real Res"][0]
            real_res_string = real_res_string.replace("px", "")
            x_res = real_res_string.split(" x ")[0]
            y_res = real_res_string.split(" x ")[1]
    return int(x_res), int(y_res)

In [6]:
def normalize_screensize(dataset):
    if os.getcwd().split("\\")[len(os.getcwd().split("\\")) - 1] != "demographic_data":
        os.chdir("../demographic_data")
    for i, datapoint in enumerate(dataset):
        screenwidth, screenheight = get_participants_screensize(datapoint)
        dataset[i] = normalize_by_screen(datapoint, screenwidth, screenheight)

## First Order Derivation
The goal is to make the movement independent of the positions
Therefore, xPos and yPos are converted to speeds by taking the first derivation

In [35]:
def get_data_with_derivate(datapoint):
    diffSeries = datapoint["xPos"].diff(periods=1)
    datapoint.insert(3, "xSpeed", diffSeries)
    diffSeries = datapoint["yPos"].diff(periods=1)
    datapoint.insert(5, "ySpeed", diffSeries)

## Umwandeln in Array

In [7]:
import numpy as np

In [8]:
def get_np_arrays(data):
    data_array = []
    for curr in data:
        removed = copy.deepcopy(curr)
        removed = removed.drop('timestamp', axis=1)
        removed = removed.drop('xPos', axis=1)
        removed = removed.drop('yPos', axis=1)
        removed = removed.drop('part_id', axis=1)
        data_array.append(removed.to_numpy(copy=True, dtype='float32'))
    return data_array

## Split into sections of n samples

In [9]:
def append_samples_of_length_n(participant_data, n, samples, labels, label_distance):
    for i in range(participant_data.shape[0] - (n+label_distance)):
        samples.append(copy.deepcopy(participant_data[i:i+n]))
        sum_to_label = np.array([0,0], dtype='float64')
        for j in range (i+n+1, i+n+label_distance+1):
            sum_to_label += participant_data[j][1:3]
        labels.append(copy.deepcopy(sum_to_label))
        # labels.append(copy.deepcopy(participant_data[i+n+label_distance][1:3]))

In [10]:
def get_samples_and_labels(data_array, N=10, label_distance=10):
    samples = []
    labels = []
    for participant_data in data_array:
        append_samples_of_length_n(participant_data, N, samples, labels, label_distance)
    return samples, labels

## Remove unusable samples
If in one sample all 10 xSpeeds and ySpeeds are equal the sample is removed, if between the last sample and the label the mouse isn't moved the sample is removed

In [11]:
def remove_unusable(data, labels, verbose=0):
    # print(labels)
    new_data = []
    new_labels = []
    for i, sample in enumerate(data):
        bool_array = np.all(sample == sample[0,:], axis=0)
        if bool_array[1] == False or bool_array[2] == False:
            if labels[i][0] != 0:
                new_data.append(sample)
                new_labels.append(labels[i])
        if verbose > 1 and i % 100000 == 0:
            print(f'evaluated {i}')

    if verbose > 0:
        print(f'removed {len(data) - len(new_data)} samples')

    return new_data, new_labels

## Make all Samples face the same way
The average xSpeed is calculated, if this average is negative all xSpeeds are multiplied by one to make all vectors face in the same direction

In [12]:
def make_same_orientation(data, verbose=0):
    changed = 0
    for sample in data:
        average_x_speed = np.average(sample, axis=0)[1]
        if average_x_speed < 0:
            sample[:, 1] *= -1
            if verbose > 0:
                changed += 1
    if verbose > 0:
        print(f'Changed {changed} samples')

## Transform a dataset

In [13]:
def get_transformed_data(dataset, N, label_distance, verbose):
    normalize_screensize(dataset)

    if'xSpeed' not in dataset[0].columns:
        for datapoint in dataset:
            get_data_with_derivate(datapoint)
            datapoint.drop(index=0, axis=0, inplace=True)

    data_array = get_np_arrays(dataset)

    samples, labels = get_samples_and_labels(data_array, N, label_distance)

    samples, labels = remove_unusable(samples, labels, verbose=verbose)

    make_same_orientation(samples, verbose=verbose)

    return samples, labels

## Train-Test-Split
Initially the entire dataset given in all_data are split into Test and Train group by participants, than the datasets are preprocessed in the same way

In [14]:
TRAIN_SPLIT = 0.8
TEST_SPLIT = 1.0 - TRAIN_SPLIT

In [15]:
import random as rand
SEED = 16961

In [16]:
def get_train_test_split_data(dataset):
    train_data_amount = 0.8 * len(dataset)

    train_data = []
    test_data = []

    rand.seed(SEED)
    train_indizes = rand.sample(range(len(dataset)), int(train_data_amount))
    # print(train_indizes)

    for i, sample in enumerate(dataset):
        if i in train_indizes:
            train_data.append(sample)
        else:
            test_data.append(sample)

    # print(len(train_data))
    # print(len(test_data))

    return train_data, test_data

## Normalize Trainingdata
The average column values of the training data is calculated, each value in test and training data is divided by the average of its column

In [17]:
def get_average_array(arr, axis=(0,1)):
    return np.mean(arr, axis=axis)

In [18]:
def get_normalized_arrays(train_samples, train_labels, test_samples, test_labels):
    avg_array = get_average_array(train_samples)
    avg_label_array = get_average_array(train_labels, 0)

    train_array = np.array(train_samples)
    test_array = np.array(test_samples)
    train_labels_array = np.array(train_labels)
    test_labels_array = np.array(test_labels)

    train_normalized = train_array[:, :, ] / avg_array
    train_normalized[:, : , 0] = train_array[:, :, 0]
    train_normalized[:, :, 2] = train_array[:, :, 2]

    test_normalized = test_array[:, :, ] / avg_array
    test_normalized[:, :, 0] = test_array[:, :, 0]
    test_normalized[:, :, 2] = test_array[:, :, 2]

    # train_labels_array[:, 0] /= avg_label_array[0:1] - averages are very low!

    # test_labels_array[:, 0] /= avg_label_array[0:1]

    return train_normalized, train_labels_array, test_normalized, test_labels_array

## Shuffle Arrays

In [19]:
from sklearn.utils import shuffle

In [20]:
VERBOSE = 1
LABEL_DISTANCE = 6 # 6 => 100ms

def get_preprocessed_data(N):
    train_data, test_data = get_train_test_split_data(all_data)

    train_samples, train_labels = get_transformed_data(train_data, N, LABEL_DISTANCE, 0)
    test_samples, test_labels = get_transformed_data(test_data, N, LABEL_DISTANCE, 0)

    train_samples, train_labels, test_samples, test_labels = get_normalized_arrays(train_samples, train_labels, test_samples, test_labels)

    train_samples, train_labels = shuffle(train_samples, train_labels, random_state=SEED)
    test_samples, test_labels = shuffle(test_samples, test_labels, random_state=SEED)

    return train_samples, train_labels, test_samples, test_labels

# Hyperparameter Optimization

In [21]:
import tensorflow as tf
from tensorflow import keras
from keras import layers
import optuna

In [22]:
physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [23]:
OPTIMIZATION_SPLIT = 0.2
NUMBER_OF_TRIALS = 200
EPOCHS = 10

## Fully-Connected Feed-Forward Neural Network

Optimize Dataset - How many samples to predict next

In [34]:
def create_dataset(trial):
    N = trial.suggest_int('sequence_length', 5, 30)

    train_samples, train_labels, test_samples, test_labels = get_preprocessed_data(N)

    highest_index_train = int(train_samples.shape[0] * OPTIMIZATION_SPLIT)
    highest_index_test = int(test_samples.shape[0] * OPTIMIZATION_SPLIT)

    train_samples = train_samples.reshape(train_samples.shape[0], train_samples.shape[1] * train_samples.shape[2])
    test_samples = test_samples.reshape(test_samples.shape[0], test_samples.shape[1] * test_samples.shape[2])

    return train_samples[:highest_index_train], train_labels[:highest_index_train], test_samples[:highest_index_test], test_labels[:highest_index_test]

Optimize Model
- Number of hidden layers
- Neurons per layer
- Dropoutrate
- Activation function
- weight initialization

In [35]:
def create_model(trial, train_X):
    number_of_inputs = train_X.shape[1]

    number_of_hidden_neurons = trial.suggest_int('hidden_neurons', 5, 1000)
    number_of_hidden_layers = trial.suggest_int('hidden_layers', 1, 20)
    dropout_rate = trial.suggest_float('dropout_rate', 0., 0.5)
    activation_function = trial.suggest_categorical('activation', ['relu', 'tanh', 'sigmoid'])

    model = keras.Sequential()
    model.add(layers.Input(shape=(number_of_inputs, )))
    model.add(layers.BatchNormalization())

    for _ in range(number_of_hidden_layers):
        model.add(layers.Dense(number_of_hidden_neurons, activation=activation_function))

    model.add(layers.Dropout(dropout_rate))

    model.add(layers.Dense(2, activation='linear'))

    return model

Optimize Training
- Optimizer
- Loss function (?)
- Learning rate
- batch size
- Epochs (?)

**Prune bad Trials!**

In [36]:
def objective(trial):
    train_X, train_Y, test_X, test_Y = create_dataset(trial)

    model = create_model(trial, train_X)

    monitor = 'val_loss'

    callbacks = [
        # keras.callbacks.EarlyStopping(patience=10),
        optuna.integration.TFKerasPruningCallback(trial, monitor),
    ]

    # lr = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    optimizer = trial.suggest_categorical('optimizer', ['sgd', 'rmsprop', 'adagrad', 'adam'])
    batch_size = trial.suggest_int('batch_size', 1, 1000)

    model.compile(optimizer=optimizer, loss=keras.losses.MeanSquaredError(), metrics=['mean_absolute_percentage_error'])

    history = model.fit(train_X, train_Y, batch_size=batch_size, validation_split=0.1, verbose=1, epochs=EPOCHS, callbacks=callbacks)

    eval_results = model.evaluate(test_X, test_Y, batch_size=batch_size)

    return float(eval_results[0])

Start Optuna Optimization

In [37]:
def make_study():
    study = optuna.create_study(direction='minimize', pruner=optuna.pruners.SuccessiveHalvingPruner(), sampler=optuna.samplers.TPESampler())

    study.optimize(objective, n_trials=NUMBER_OF_TRIALS)

    # show results
    pruned_trials = study.get_trials(states=[optuna.trial.TrialState.PRUNED])
    complete_trials = study.get_trials(states=[optuna.trial.TrialState.COMPLETE])

    print('Study statistics:')
    print('   Number of finished Trials: ', len(study.trials))
    print('   Number of pruned Trials: ', len(pruned_trials))
    print('   Number of complete Trials: ', len(complete_trials))

    print('Best Trial: ')
    trial = study.best_trial

    print('  MSE: ', trial.value)

    print('   Params: ')
    for key, value in trial.params.items():
        print('   {}: {}'.format(key, value))

In [38]:
make_study()

[32m[I 2022-06-07 12:44:07,613][0m A new study created in memory with name: no-name-2ac73301-e3b2-42f0-b38f-79a105f80119[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[32m[I 2022-06-07 12:53:58,254][0m Trial 0 finished with value: 0.00288063520565629 and parameters: {'sequence_length': 19, 'hidden_neurons': 278, 'hidden_layers': 16, 'dropout_rate': 0.19068054717816801, 'activation': 'tanh', 'optimizer': 'rmsprop', 'batch_size': 348}. Best is trial 0 with value: 0.00288063520565629.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[32m[I 2022-06-07 12:59:04,561][0m Trial 1 finished with value: 0.002840838860720396 and parameters: {'sequence_length': 16, 'hidden_neurons': 243, 'hidden_layers': 6, 'dropout_rate': 0.49936005513335796, 'activation': 'sigmoid', 'optimizer': 'sgd', 'batch_size': 626}. Best is trial 1 with value: 0.002840838860720396.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[32m[I 2022-06-07 13:07:26,312][0m Trial 2 finished with value: 0.0029311031103134155 and parameters: {'sequence_length': 7, 'hidden_neurons': 894, 'hidden_layers': 13, 'dropout_rate': 0.29321031863285085, 'activation': 'relu', 'optimizer': 'adagrad', 'batch_size': 270}. Best is trial 1 with value: 0.002840838860720396.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 13:12:04,654][0m Trial 3 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 13:17:23,772][0m Trial 4 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 13:22:05,006][0m Trial 5 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 13:30:33,772][0m Trial 6 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 13:35:53,698][0m Trial 7 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 13:40:36,963][0m Trial 8 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[32m[I 2022-06-07 13:46:40,199][0m Trial 9 finished with value: 0.0028108484111726284 and parameters: {'sequence_length': 5, 'hidden_neurons': 154, 'hidden_layers': 17, 'dropout_rate': 0.2295818303474188, 'activation': 'tanh', 'optimizer': 'adam', 'batch_size': 382}. Best is trial 9 with value: 0.0028108484111726284.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 13:51:31,062][0m Trial 10 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 13:56:40,302][0m Trial 11 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 14:00:50,707][0m Trial 12 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 14:05:44,902][0m Trial 13 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 14:10:23,133][0m Trial 14 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 14:16:03,297][0m Trial 15 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 14:21:22,864][0m Trial 16 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 14:26:26,408][0m Trial 17 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 14:31:07,613][0m Trial 18 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 14:35:46,827][0m Trial 19 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 14:41:31,038][0m Trial 20 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 14:47:11,573][0m Trial 21 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 14:52:34,259][0m Trial 22 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 14:58:11,970][0m Trial 23 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 15:04:00,249][0m Trial 24 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 15:08:53,038][0m Trial 25 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 15:52:18,716][0m Trial 26 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 15:57:06,527][0m Trial 27 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 16:01:30,897][0m Trial 28 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 16:06:46,488][0m Trial 29 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 16:12:19,984][0m Trial 30 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 16:18:38,120][0m Trial 31 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 16:24:47,079][0m Trial 32 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[32m[I 2022-06-07 16:37:48,703][0m Trial 33 finished with value: 0.0029648509807884693 and parameters: {'sequence_length': 6, 'hidden_neurons': 867, 'hidden_layers': 20, 'dropout_rate': 0.3775773939100119, 'activation': 'relu', 'optimizer': 'adagrad', 'batch_size': 139}. Best is trial 9 with value: 0.0028108484111726284.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 16:42:37,421][0m Trial 34 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 16:48:29,955][0m Trial 35 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 16:55:01,725][0m Trial 36 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 17:00:18,353][0m Trial 37 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 17:04:50,749][0m Trial 38 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 17:10:13,666][0m Trial 39 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 17:15:12,471][0m Trial 40 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[32m[I 2022-06-07 17:28:50,153][0m Trial 41 finished with value: 0.002964008366689086 and parameters: {'sequence_length': 6, 'hidden_neurons': 924, 'hidden_layers': 20, 'dropout_rate': 0.49937729806574715, 'activation': 'relu', 'optimizer': 'adagrad', 'batch_size': 142}. Best is trial 9 with value: 0.0028108484111726284.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 17:39:01,919][0m Trial 42 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 17:46:38,368][0m Trial 43 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 17:51:29,382][0m Trial 44 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 17:57:43,985][0m Trial 45 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 18:02:52,770][0m Trial 46 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 18:08:15,886][0m Trial 47 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 18:13:23,078][0m Trial 48 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 18:17:44,668][0m Trial 49 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 18:23:04,509][0m Trial 50 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[32m[I 2022-06-07 18:36:54,583][0m Trial 51 finished with value: 0.0029638553969562054 and parameters: {'sequence_length': 6, 'hidden_neurons': 869, 'hidden_layers': 20, 'dropout_rate': 0.3824066863092847, 'activation': 'relu', 'optimizer': 'adagrad', 'batch_size': 134}. Best is trial 9 with value: 0.0028108484111726284.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 18:46:05,102][0m Trial 52 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 18:53:00,320][0m Trial 53 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 19:00:31,914][0m Trial 54 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 19:15:48,360][0m Trial 55 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 19:20:57,069][0m Trial 56 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 19:27:21,676][0m Trial 57 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 19:31:40,782][0m Trial 58 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 19:37:17,039][0m Trial 59 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 19:43:01,871][0m Trial 60 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 19:48:38,550][0m Trial 61 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 19:56:37,234][0m Trial 62 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 20:02:33,139][0m Trial 63 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 20:09:43,531][0m Trial 64 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 20:21:18,237][0m Trial 65 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 20:27:04,048][0m Trial 66 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 20:32:06,459][0m Trial 67 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 22:01:45,155][0m Trial 68 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 22:07:37,508][0m Trial 69 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 22:13:35,708][0m Trial 70 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 22:21:13,712][0m Trial 71 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 22:30:54,226][0m Trial 72 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 22:38:08,784][0m Trial 73 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[32m[I 2022-06-07 22:53:54,909][0m Trial 74 finished with value: 0.002960433019325137 and parameters: {'sequence_length': 6, 'hidden_neurons': 745, 'hidden_layers': 17, 'dropout_rate': 0.48129553542605963, 'activation': 'relu', 'optimizer': 'adagrad', 'batch_size': 64}. Best is trial 9 with value: 0.0028108484111726284.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 23:00:32,436][0m Trial 75 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 23:08:47,027][0m Trial 76 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 23:13:41,528][0m Trial 77 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 23:20:14,410][0m Trial 78 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 23:24:40,458][0m Trial 79 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 23:30:42,426][0m Trial 80 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[32m[I 2022-06-07 23:42:54,042][0m Trial 81 finished with value: 0.0029638863634318113 and parameters: {'sequence_length': 6, 'hidden_neurons': 838, 'hidden_layers': 20, 'dropout_rate': 0.492425244121771, 'activation': 'relu', 'optimizer': 'adagrad', 'batch_size': 154}. Best is trial 9 with value: 0.0028108484111726284.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-07 23:49:31,502][0m Trial 82 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-07 23:54:49,500][0m Trial 83 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[32m[I 2022-06-08 00:09:28,084][0m Trial 84 finished with value: 0.00294690509326756 and parameters: {'sequence_length': 6, 'hidden_neurons': 921, 'hidden_layers': 10, 'dropout_rate': 0.43984414113567194, 'activation': 'relu', 'optimizer': 'adagrad', 'batch_size': 68}. Best is trial 9 with value: 0.0028108484111726284.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-08 00:17:51,260][0m Trial 85 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-08 00:25:45,369][0m Trial 86 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-08 00:30:09,797][0m Trial 87 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-08 00:34:58,956][0m Trial 88 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-08 00:39:43,022][0m Trial 89 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-08 00:44:23,671][0m Trial 90 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-08 00:52:41,893][0m Trial 91 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-08 00:57:55,860][0m Trial 92 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-08 01:03:20,064][0m Trial 93 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[32m[I 2022-06-08 01:17:06,898][0m Trial 94 finished with value: 0.0029619780834764242 and parameters: {'sequence_length': 6, 'hidden_neurons': 974, 'hidden_layers': 17, 'dropout_rate': 0.420273441396165, 'activation': 'relu', 'optimizer': 'adagrad', 'batch_size': 108}. Best is trial 9 with value: 0.0028108484111726284.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-08 01:27:11,264][0m Trial 95 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-08 01:33:19,407][0m Trial 96 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-08 01:54:07,524][0m Trial 97 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-08 01:59:58,803][0m Trial 98 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-08 02:04:45,122][0m Trial 99 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-08 02:11:20,079][0m Trial 100 pruned. Trial was pruned at epoch 1.[0m


Study statistics:
   Number of finished Trials:  100
   Number of pruned Trials:  60
   Number of complete Trials:  40
Best Trial: 
  MSE:  0.0028108484111726284
   Params: 
   sequence_length: 5
   hidden_neurons: 154
   hidden_layers: 17
   dropout_rate: 0.2295818303474188
   activation: tanh
   optimizer: adam
   batch_size: 382


## RNN

In [31]:
def create_rnn_dataset(trial):
    N = trial.suggest_int('sequence_length', 5, 30)

    train_samples, train_labels, test_samples, test_labels = get_preprocessed_data(N)

    highest_index_train = int(train_samples.shape[0] * OPTIMIZATION_SPLIT)
    highest_index_test = int(test_samples.shape[0] * OPTIMIZATION_SPLIT)

    return train_samples[:highest_index_train], train_labels[:highest_index_train], test_samples[:highest_index_test], test_labels[:highest_index_test]

In [36]:
def create_rnn_model(trial, train_X):
    number_of_inputs = train_X.shape[1]

    number_of_hidden_neurons = trial.suggest_int('hidden_neurons', 5, 300)
    number_of_hidden_layers = trial.suggest_int('hidden_layers', 1, 5)
    dropout_rate = trial.suggest_float('dropout_rate', 0., 0.5)
    activation_function = trial.suggest_categorical('activation', ['relu', 'tanh', 'sigmoid'])
    normalize = trial.suggest_categorical('normalize', [True, False])

    model = keras.Sequential()
    if number_of_hidden_layers > 1:
        model.add(layers.SimpleRNN(number_of_hidden_neurons, activation=activation_function, input_shape=(number_of_inputs, 9), return_sequences=True))
    else:
        model.add(layers.SimpleRNN(number_of_hidden_neurons, activation=activation_function, input_shape=(number_of_inputs, 9), return_sequences=False))
        if normalize:
            model.add(layers.BatchNormalization())
        model.add(layers.Dropout(dropout_rate))
        model.add(layers.Dense(2, activation='linear'))
        return model


    if normalize:
        model.add(layers.BatchNormalization())

    for _ in range(number_of_hidden_layers-2):
        model.add(layers.SimpleRNN(number_of_hidden_neurons, activation=activation_function, return_sequences=True))
        if normalize:
            model.add(layers.BatchNormalization())

    model.add(layers.SimpleRNN(number_of_hidden_neurons, activation=activation_function, return_sequences=False))
    if normalize:
        model.add(layers.BatchNormalization())

    model.add(layers.Dropout(dropout_rate))

    model.add(layers.Dense(2, activation='linear'))

    return model

In [37]:
def rnn_objective(trial):
    train_X, train_Y, test_X, test_Y = create_rnn_dataset(trial)

    model = create_rnn_model(trial, train_X)

    monitor = 'val_loss'

    callbacks = [
        # keras.callbacks.EarlyStopping(patience=10),
        optuna.integration.TFKerasPruningCallback(trial, monitor),
    ]

    # lr = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    optimizer = trial.suggest_categorical('optimizer', ['sgd', 'rmsprop', 'adagrad', 'adam'])
    batch_size = trial.suggest_int('batch_size', 32, 200)

    model.compile(optimizer=optimizer, loss=keras.losses.MeanSquaredError(), metrics=['mean_absolute_percentage_error'])

    history = model.fit(train_X, train_Y, batch_size=batch_size, validation_split=0.1, verbose=1, epochs=EPOCHS, callbacks=callbacks)

    eval_results = model.evaluate(test_X, test_Y, batch_size=batch_size)
    print(f'Result: {float(eval_results[0])}')

    return float(eval_results[0])

In [40]:
def make_rnn_study():
    study = optuna.create_study(direction='minimize', pruner=optuna.pruners.SuccessiveHalvingPruner(), sampler=optuna.samplers.TPESampler())

    study.optimize(rnn_objective, n_trials=100)

    # show results
    pruned_trials = study.get_trials(states=[optuna.trial.TrialState.PRUNED])
    complete_trials = study.get_trials(states=[optuna.trial.TrialState.COMPLETE])

    print('Study statistics:')
    print('   Number of finished Trials: ', len(study.trials))
    print('   Number of pruned Trials: ', len(pruned_trials))
    print('   Number of complete Trials: ', len(complete_trials))

    print('Best Trial: ')
    trial = study.best_trial

    print('  MSE: ', trial.value)

    print('   Params: ')
    for key, value in trial.params.items():
        print('   {}: {}'.format(key, value))

In [None]:
make_rnn_study()

[32m[I 2022-06-10 11:09:36,915][0m A new study created in memory with name: no-name-bdbabcf1-8e2b-483f-8f3c-0e0a052ecda2[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.0028397408314049244


[32m[I 2022-06-10 14:39:29,666][0m Trial 0 finished with value: 0.0028397408314049244 and parameters: {'sequence_length': 28, 'hidden_neurons': 58, 'hidden_layers': 3, 'dropout_rate': 0.440584636751394, 'activation': 'tanh', 'normalize': True, 'optimizer': 'rmsprop', 'batch_size': 78}. Best is trial 0 with value: 0.0028397408314049244.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.06824205815792084


[32m[I 2022-06-10 15:54:35,682][0m Trial 1 finished with value: 0.06824205815792084 and parameters: {'sequence_length': 14, 'hidden_neurons': 175, 'hidden_layers': 4, 'dropout_rate': 0.46607273679449474, 'activation': 'sigmoid', 'normalize': True, 'optimizer': 'rmsprop', 'batch_size': 162}. Best is trial 0 with value: 0.0028397408314049244.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.0028490223921835423


[32m[I 2022-06-10 16:50:47,408][0m Trial 2 finished with value: 0.0028490223921835423 and parameters: {'sequence_length': 14, 'hidden_neurons': 216, 'hidden_layers': 2, 'dropout_rate': 0.26479057567655245, 'activation': 'sigmoid', 'normalize': False, 'optimizer': 'rmsprop', 'batch_size': 101}. Best is trial 0 with value: 0.0028397408314049244.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 18:08:08,706][0m Trial 3 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 18:29:52,192][0m Trial 4 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 18:39:35,699][0m Trial 5 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 19:42:20,315][0m Trial 6 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 20:02:39,027][0m Trial 7 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 20:37:17,198][0m Trial 8 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-10 21:15:50,695][0m Trial 9 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 21:35:39,178][0m Trial 10 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 22:00:55,063][0m Trial 11 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 22:11:44,616][0m Trial 12 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 22:54:10,582][0m Trial 13 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 23:07:09,379][0m Trial 14 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 23:41:54,494][0m Trial 15 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-10 23:50:30,654][0m Trial 16 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 00:07:40,744][0m Trial 17 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 00:25:39,376][0m Trial 18 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 00:57:27,757][0m Trial 19 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 02:13:31,534][0m Trial 20 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:36:00,463][0m Trial 21 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:49:16,777][0m Trial 22 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:07:26,150][0m Trial 23 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:27:23,303][0m Trial 24 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:36:55,701][0m Trial 25 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 04:54:06,938][0m Trial 26 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 05:48:35,203][0m Trial 27 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 07:18:42,354][0m Trial 28 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:03:50,007][0m Trial 29 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:26:15,338][0m Trial 30 pruned. Trial was pruned at epoch 1.[0m


[32m[I 2022-06-11 05:48:35,203][0m Trial 31 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 07:18:42,354][0m Trial 32 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:03:50,007][0m Trial 33 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:26:15,338][0m Trial 34 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 10:06:33,464][0m Trial 35 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:36:00,463][0m Trial 36 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:49:16,777][0m Trial 37 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:07:26,150][0m Trial 38 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:27:23,303][0m Trial 39 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:36:55,701][0m Trial 40 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 04:54:06,938][0m Trial 41 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 05:48:35,203][0m Trial 42 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 07:18:42,354][0m Trial 43 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:03:50,007][0m Trial 44 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:26:15,338][0m Trial 45 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 10:06:33,464][0m Trial 46 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:36:00,463][0m Trial 47 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:49:16,777][0m Trial 48 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:07:26,150][0m Trial 49 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:27:23,303][0m Trial 50 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:36:55,701][0m Trial 51 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 04:54:06,938][0m Trial 52 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 05:48:35,203][0m Trial 53 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 07:18:42,354][0m Trial 54 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:03:50,007][0m Trial 55 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:26:15,338][0m Trial 56 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 10:06:33,464][0m Trial 57 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:36:00,463][0m Trial 58 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:49:16,777][0m Trial 59 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:07:26,150][0m Trial 60 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:27:23,303][0m Trial 61 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:36:55,701][0m Trial 62 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 04:54:06,938][0m Trial 63 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 05:48:35,203][0m Trial 64 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 07:18:42,354][0m Trial 65 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:03:50,007][0m Trial 66 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:26:15,338][0m Trial 67 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 10:06:33,464][0m Trial 68 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:36:00,463][0m Trial 69 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:49:16,777][0m Trial 70 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:07:26,150][0m Trial 71 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:27:23,303][0m Trial 72 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:36:55,701][0m Trial 73 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 04:54:06,938][0m Trial 74 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 05:48:35,203][0m Trial 75 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 07:18:42,354][0m Trial 76 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:03:50,007][0m Trial 77 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:26:15,338][0m Trial 78 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 10:06:33,464][0m Trial 79 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:36:00,463][0m Trial 80 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:49:16,777][0m Trial 81 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:07:26,150][0m Trial 82 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:27:23,303][0m Trial 83 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:36:55,701][0m Trial 84 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 04:54:06,938][0m Trial 85 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 05:48:35,203][0m Trial 86 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 07:18:42,354][0m Trial 87 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:03:50,007][0m Trial 88 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:26:15,338][0m Trial 89 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 10:06:33,464][0m Trial 90 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:36:00,463][0m Trial 91 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 02:49:16,777][0m Trial 92 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:07:26,150][0m Trial 93 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:27:23,303][0m Trial 94 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 03:36:55,701][0m Trial 95 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 04:54:06,938][0m Trial 96 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 05:48:35,203][0m Trial 97 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


[32m[I 2022-06-11 07:18:42,354][0m Trial 98 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:03:50,007][0m Trial 99 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10


[32m[I 2022-06-11 09:26:15,338][0m Trial 100 pruned. Trial was pruned at epoch 1.[0m


Study statistics:
   Number of finished Trials:  100
   Number of pruned Trials:  76
   Number of complete Trials:  24
Best Trial: 
  MSE:  0.09282874974024
   Params: 
   sequence_length: 7
   hidden_layers: 1
   number_of_neurons: 91
   dropout_rate: 0.080122423144556634
   activation: sigmoid
   normalize: False
   dropout_rate: 0.013421302851038347
   batch_size: 138
   optimizer: SGD


## Transformer

### Transformer Model implementation

In [24]:
import math

In [25]:
class LearnablePositionEncoding(layers.Layer):
    def __init__(self, d_model, dropout=0.1, max_len=10):
        super(LearnablePositionEncoding, self).__init__()
        self.dropout= layers.Dropout(dropout)
        # self.encoding = tf.Variable(tf.random.uniform((max_len, 1, d_model), -0.2, 0.2), trainable=True)
        self.encoding = tf.Variable(tf.random.uniform((max_len, d_model), -0.2, 0.2), trainable=True)

    def call(self, inputs):
        # print(f"Pos_Encoding Matrix Shape: {self.encoding[:inputs.shape[0], :].shape}")
        x = inputs + self.encoding[:inputs.shape[0], :]
        return self.dropout(x)

In [26]:
class TransformerBatchNormEncoderLayer(layers.Layer):

    def __init__(self, num_heads, d_model, dropout, dim_ff, activation):
        super(TransformerBatchNormEncoderLayer, self).__init__()
        self.self_attn = layers.MultiHeadAttention(num_heads, d_model, dropout=dropout)

        self.linear1 = layers.Dense(dim_ff, activation=activation)
        self.dropout = layers.Dropout(dropout)
        self.linear2 = layers.Dense(d_model, activation='linear')

        self.norm1 = layers.BatchNormalization(epsilon=1e-5)
        self.norm2 = layers.BatchNormalization(epsilon=1e-5)
        self.dropout1 = layers.Dropout(dropout)
        self.dropout2 = layers.Dropout(dropout)

    def call(self, value):
        mha_out = self.self_attn(value, value)
        drop1_out = self.dropout(mha_out)
        norm1_out = self.norm1(drop1_out)
        combined_1 = norm1_out + drop1_out

        linear1_out = self.linear1(combined_1)
        drop2_out = self.dropout1(linear1_out)
        linear2_out = self.linear2(drop2_out)
        drop3_out = self.dropout2(linear2_out)

        norm2_out = self.norm2(drop3_out)

        out = norm2_out + combined_1

        return out

In [27]:
class TransformerBatchNormEncoderBlock(layers.Layer):

    def __init__(self, num_layers, num_heads, d_model, dropout, dim_ff, activation):
        super(TransformerBatchNormEncoderBlock, self).__init__()
        self.model = keras.Sequential()

        for _ in range(num_layers):
            self.model.add(TransformerBatchNormEncoderLayer(num_heads, d_model, dropout, dim_ff, activation))

    def call(self, inputs):
        return self.model(inputs)

In [28]:
class TransformerEncoderRegressor(layers.Layer):

    def __init__(self, max_len, d_model, n_heads, num_layers, dim_ff, num_classes, dropout=0.1, activation='relu', num_output_layers=1):
        super(TransformerEncoderRegressor, self).__init__()

        self.max_len = max_len
        self.d_model = d_model
        self.n_heads = n_heads
        self.num_classes = num_classes

        self.input_embedding = layers.Dense(d_model, activation='linear')
        self.pos_encoding = LearnablePositionEncoding(d_model, dropout, max_len)

        self.encoder = TransformerBatchNormEncoderBlock(num_layers, n_heads, d_model, dropout, dim_ff, activation)

        self.dropout = layers.Dropout(dropout)

        self.flatten_layer = layers.Flatten()
        self.output_layers = self.build_output_module(d_model, num_output_layers, num_classes, dropout)


    def build_output_module(self, d_model, num_output_layers, num_classes, dropout=0.1):
        output = keras.Sequential()

        i = 1
        while i < num_output_layers:
            output.add(layers.Dense(d_model, activation='relu'))
            output.add(layers.Dropout(dropout))
            i += 1

        output.add(layers.Dense(num_classes, activation='linear'))

        return output

    def call(self, input):
        x = self.input_embedding(input) * math.sqrt(self.d_model)

        # print(f"shape before pos_encoding: {x.shape}") # okay
        x = self.pos_encoding(x) # problem!

        # print(f"shape before transformer: {x.shape}") # not okay
        x = self.encoder(x)
        x = self.dropout(x)

        # x = x.reshape(x.shape[0], -1)
        # print(f"shape before reshape: {x.shape}")
        # x = tf.reshape(x, (x.shape[0], -1))
        # x = tf.reshape(x, (x.shape[0], -1))
        x = self.flatten_layer(x)


        output = self.output_layers(x)

        return output

In [29]:
def get_model(input_shape, max_len, d_model, n_heads, num_layers, dim_ff, num_classes, dropout=0.1, activation='relu', num_output_layers=1):
    input = layers.Input(shape=input_shape)

    # print(f"shape before regressor: {input.shape}")
    regressor = TransformerEncoderRegressor(max_len, d_model, n_heads, num_layers, dim_ff, num_classes, dropout, activation, num_output_layers)

    x = regressor(input)

    return keras.Model(input, x)

### Transformer Study

In [30]:
def create_transformer_dataset(trial):
    N = trial.suggest_int('sequence_length', 8, 25)

    train_samples, train_labels, test_samples, test_labels = get_preprocessed_data(N)

    highest_index_train = int(train_samples.shape[0] * OPTIMIZATION_SPLIT)
    highest_index_test = int(test_samples.shape[0] * OPTIMIZATION_SPLIT)

    return train_samples[:highest_index_train], train_labels[:highest_index_train], test_samples[:highest_index_test], test_labels[:highest_index_test]

In [31]:
def create_transformer_model(trial, train_X):
    number_of_inputs = train_X.shape[1]

    internal_dimension = trial.suggest_int('model_dimension', 20, 256)
    number_of_heads = trial.suggest_int('number_of_attention_heads', 2, 10)
    number_of_encoder_layers = trial.suggest_int('encoder_layers', 1, 10)
    number_of_ff_neurons = trial.suggest_int('ff_neurons', 5, 256)
    number_of_ff_layers = trial.suggest_int('hidden_layers', 1, 10)
    dropout_rate = trial.suggest_float('dropout_rate', 0., 0.5)
    activation_function = trial.suggest_categorical('activation', ['relu', 'tanh', 'sigmoid'])

    model = get_model([number_of_inputs, 9], number_of_inputs, internal_dimension, number_of_heads, number_of_encoder_layers, number_of_ff_neurons, 2, dropout_rate, activation_function, number_of_ff_layers)

    return model

In [32]:
def transformer_objective(trial):
    train_X, train_Y, test_X, test_Y = create_transformer_dataset(trial)

    model = create_transformer_model(trial, train_X)

    monitor = 'val_loss'

    callbacks = [
        # keras.callbacks.EarlyStopping(patience=10),
        optuna.integration.TFKerasPruningCallback(trial, monitor),
    ]

    # lr = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    optimizer = trial.suggest_categorical('optimizer', ['sgd', 'rmsprop', 'adagrad', 'adam'])
    batch_size = trial.suggest_int('batch_size', 8, 200)

    model.compile(optimizer=optimizer, loss=keras.losses.MeanSquaredError(), metrics=['mean_absolute_percentage_error'])

    history = model.fit(train_X, train_Y, batch_size=batch_size, validation_split=0.1, verbose=1, epochs=EPOCHS, callbacks=callbacks)

    eval_results = model.evaluate(test_X, test_Y, batch_size=batch_size)
    print(f'Result: {float(eval_results[0])}')

    return float(eval_results[0])

In [33]:
def make_transformer_study():
    study = optuna.create_study(direction='minimize', pruner=optuna.pruners.SuccessiveHalvingPruner(), sampler=optuna.samplers.TPESampler())

    study.optimize(transformer_objective, n_trials=NUMBER_OF_TRIALS)

    # show results
    pruned_trials = study.get_trials(states=[optuna.trial.TrialState.PRUNED])
    complete_trials = study.get_trials(states=[optuna.trial.TrialState.COMPLETE])

    print('Study statistics:')
    print('   Number of finished Trials: ', len(study.trials))
    print('   Number of pruned Trials: ', len(pruned_trials))
    print('   Number of complete Trials: ', len(complete_trials))

    print('Best Trial: ')
    trial = study.best_trial

    print('  MSE: ', trial.value)

    print('   Params: ')
    for key, value in trial.params.items():
        print('   {}: {}'.format(key, value))

In [None]:
make_transformer_study()

In [36]:
study = optuna.create_study(direction='minimize', pruner=optuna.pruners.SuccessiveHalvingPruner(), sampler=optuna.samplers.TPESampler())

study.optimize(transformer_objective, n_trials=NUMBER_OF_TRIALS)

# show results
pruned_trials = study.get_trials(states=[optuna.trial.TrialState.PRUNED])
complete_trials = study.get_trials(states=[optuna.trial.TrialState.COMPLETE])

print('Study statistics:')
print('   Number of finished Trials: ', len(study.trials))
print('   Number of pruned Trials: ', len(pruned_trials))
print('   Number of complete Trials: ', len(complete_trials))

print('Best Trial: ')
trial = study.best_trial

print('  MSE: ', trial.value)

print('   Params: ')
for key, value in trial.params.items():
    print('   {}: {}'.format(key, value))

[32m[I 2022-06-12 20:45:36,249][0m A new study created in memory with name: no-name-e95e3cda-dbb1-4b48-9686-bc3193ac48f1[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.002883722772821784


[32m[I 2022-06-12 21:19:34,452][0m Trial 0 finished with value: 0.002883722772821784 and parameters: {'sequence_length': 9, 'model_dimension': 65, 'number_of_attention_heads': 3, 'encoder_layers': 7, 'ff_neurons': 61, 'hidden_layers': 6, 'dropout_rate': 0.1403716054129206, 'activation': 'relu', 'optimizer': 'adagrad', 'batch_size': 133}. Best is trial 0 with value: 0.002883722772821784.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-12 22:06:45,116][0m Trial 1 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.0028894548304378986


[32m[I 2022-06-12 22:22:34,511][0m Trial 2 finished with value: 0.0028894548304378986 and parameters: {'sequence_length': 8, 'model_dimension': 105, 'number_of_attention_heads': 7, 'encoder_layers': 1, 'ff_neurons': 78, 'hidden_layers': 4, 'dropout_rate': 0.23690550476257832, 'activation': 'tanh', 'optimizer': 'rmsprop', 'batch_size': 106}. Best is trial 0 with value: 0.002883722772821784.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-12 22:44:16,415][0m Trial 3 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-12 22:49:39,975][0m Trial 4 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-12 23:10:24,252][0m Trial 5 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-12 23:16:50,792][0m Trial 6 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.002843492431566119


[32m[I 2022-06-12 23:56:44,626][0m Trial 7 finished with value: 0.002843492431566119 and parameters: {'sequence_length': 16, 'model_dimension': 32, 'number_of_attention_heads': 4, 'encoder_layers': 8, 'ff_neurons': 129, 'hidden_layers': 10, 'dropout_rate': 0.15351902893202574, 'activation': 'sigmoid', 'optimizer': 'adagrad', 'batch_size': 141}. Best is trial 7 with value: 0.002843492431566119.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 00:02:55,239][0m Trial 8 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 00:21:18,705][0m Trial 9 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 00:35:29,002][0m Trial 10 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 00:44:26,385][0m Trial 11 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 00:56:23,795][0m Trial 12 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 01:07:33,270][0m Trial 13 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.002932238392531872


[32m[I 2022-06-13 05:14:07,241][0m Trial 14 finished with value: 0.002932238392531872 and parameters: {'sequence_length': 22, 'model_dimension': 256, 'number_of_attention_heads': 7, 'encoder_layers': 10, 'ff_neurons': 80, 'hidden_layers': 7, 'dropout_rate': 0.2667696458929326, 'activation': 'tanh', 'optimizer': 'adagrad', 'batch_size': 163}. Best is trial 7 with value: 0.002843492431566119.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 05:23:57,299][0m Trial 15 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-13 05:48:44,978][0m Trial 16 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 05:59:43,307][0m Trial 17 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 06:15:21,085][0m Trial 18 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 06:48:27,428][0m Trial 19 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 06:53:29,007][0m Trial 20 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 07:00:37,024][0m Trial 21 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-13 07:32:57,884][0m Trial 22 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 07:40:07,660][0m Trial 23 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.00289224274456501


[32m[I 2022-06-13 09:02:30,180][0m Trial 24 finished with value: 0.00289224274456501 and parameters: {'sequence_length': 12, 'model_dimension': 148, 'number_of_attention_heads': 4, 'encoder_layers': 9, 'ff_neurons': 113, 'hidden_layers': 4, 'dropout_rate': 0.1198870010676013, 'activation': 'tanh', 'optimizer': 'rmsprop', 'batch_size': 109}. Best is trial 7 with value: 0.002843492431566119.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 09:15:35,295][0m Trial 25 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 09:23:06,208][0m Trial 26 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 09:30:52,723][0m Trial 27 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 09:47:47,944][0m Trial 28 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-13 09:58:56,915][0m Trial 29 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-13 10:16:01,607][0m Trial 30 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.0028925463557243347


[32m[I 2022-06-13 11:44:17,726][0m Trial 31 finished with value: 0.0028925463557243347 and parameters: {'sequence_length': 12, 'model_dimension': 151, 'number_of_attention_heads': 4, 'encoder_layers': 9, 'ff_neurons': 108, 'hidden_layers': 4, 'dropout_rate': 0.10958857572598663, 'activation': 'tanh', 'optimizer': 'rmsprop', 'batch_size': 98}. Best is trial 7 with value: 0.002843492431566119.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-13 12:35:29,144][0m Trial 32 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 12:49:13,334][0m Trial 33 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-13 13:09:01,483][0m Trial 34 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 13:31:06,462][0m Trial 35 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 13:41:31,912][0m Trial 36 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 13:45:24,793][0m Trial 37 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 14:09:04,379][0m Trial 38 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 14:19:08,626][0m Trial 39 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.002893649972975254


[32m[I 2022-06-13 14:47:51,283][0m Trial 40 finished with value: 0.002893649972975254 and parameters: {'sequence_length': 22, 'model_dimension': 132, 'number_of_attention_heads': 5, 'encoder_layers': 2, 'ff_neurons': 151, 'hidden_layers': 6, 'dropout_rate': 0.1278642558293794, 'activation': 'tanh', 'optimizer': 'rmsprop', 'batch_size': 139}. Best is trial 7 with value: 0.002843492431566119.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.0028200154192745686


[32m[I 2022-06-13 16:23:55,517][0m Trial 41 finished with value: 0.0028200154192745686 and parameters: {'sequence_length': 13, 'model_dimension': 157, 'number_of_attention_heads': 4, 'encoder_layers': 9, 'ff_neurons': 108, 'hidden_layers': 4, 'dropout_rate': 0.10995636208524415, 'activation': 'tanh', 'optimizer': 'rmsprop', 'batch_size': 97}. Best is trial 41 with value: 0.0028200154192745686.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.002822780515998602


[32m[I 2022-06-13 18:04:38,273][0m Trial 42 finished with value: 0.002822780515998602 and parameters: {'sequence_length': 13, 'model_dimension': 175, 'number_of_attention_heads': 4, 'encoder_layers': 9, 'ff_neurons': 135, 'hidden_layers': 4, 'dropout_rate': 0.03149177430608134, 'activation': 'tanh', 'optimizer': 'rmsprop', 'batch_size': 96}. Best is trial 41 with value: 0.0028200154192745686.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 18:27:48,786][0m Trial 43 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 18:50:57,222][0m Trial 44 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.0028167979326099157


[32m[I 2022-06-13 20:15:21,836][0m Trial 45 finished with value: 0.0028167979326099157 and parameters: {'sequence_length': 13, 'model_dimension': 180, 'number_of_attention_heads': 4, 'encoder_layers': 7, 'ff_neurons': 192, 'hidden_layers': 10, 'dropout_rate': 0.03316042307281653, 'activation': 'sigmoid', 'optimizer': 'adagrad', 'batch_size': 88}. Best is trial 45 with value: 0.0028167979326099157.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-13 20:44:00,869][0m Trial 46 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.002840370638296008


[32m[I 2022-06-13 22:30:17,016][0m Trial 47 finished with value: 0.002840370638296008 and parameters: {'sequence_length': 13, 'model_dimension': 196, 'number_of_attention_heads': 4, 'encoder_layers': 8, 'ff_neurons': 196, 'hidden_layers': 10, 'dropout_rate': 0.08084032463175427, 'activation': 'sigmoid', 'optimizer': 'adagrad', 'batch_size': 72}. Best is trial 45 with value: 0.0028167979326099157.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-13 23:25:39,777][0m Trial 48 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 00:00:30,367][0m Trial 49 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.0028232280164957047


[32m[I 2022-06-14 02:45:22,160][0m Trial 50 finished with value: 0.0028232280164957047 and parameters: {'sequence_length': 13, 'model_dimension': 191, 'number_of_attention_heads': 5, 'encoder_layers': 10, 'ff_neurons': 228, 'hidden_layers': 10, 'dropout_rate': 0.05946628803211375, 'activation': 'sigmoid', 'optimizer': 'adagrad', 'batch_size': 52}. Best is trial 45 with value: 0.0028167979326099157.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.0028252145275473595


[32m[I 2022-06-14 05:25:41,273][0m Trial 51 finished with value: 0.0028252145275473595 and parameters: {'sequence_length': 13, 'model_dimension': 190, 'number_of_attention_heads': 5, 'encoder_layers': 10, 'ff_neurons': 237, 'hidden_layers': 10, 'dropout_rate': 0.05751156372657435, 'activation': 'sigmoid', 'optimizer': 'adagrad', 'batch_size': 46}. Best is trial 45 with value: 0.0028167979326099157.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 05:59:54,517][0m Trial 52 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.0028134321328252554


[32m[I 2022-06-14 08:42:52,051][0m Trial 53 finished with value: 0.0028134321328252554 and parameters: {'sequence_length': 13, 'model_dimension': 176, 'number_of_attention_heads': 4, 'encoder_layers': 10, 'ff_neurons': 229, 'hidden_layers': 9, 'dropout_rate': 0.013421302851038347, 'activation': 'sigmoid', 'optimizer': 'adagrad', 'batch_size': 35}. Best is trial 53 with value: 0.0028134321328252554.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 10:21:04,757][0m Trial 54 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 11:28:51,211][0m Trial 55 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.002854615217074752


[32m[I 2022-06-14 14:34:21,677][0m Trial 56 finished with value: 0.002854615217074752 and parameters: {'sequence_length': 13, 'model_dimension': 175, 'number_of_attention_heads': 4, 'encoder_layers': 10, 'ff_neurons': 221, 'hidden_layers': 10, 'dropout_rate': 0.00014676746538830332, 'activation': 'sigmoid', 'optimizer': 'adagrad', 'batch_size': 36}. Best is trial 53 with value: 0.0028134321328252554.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 15:02:37,109][0m Trial 57 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 15:37:00,531][0m Trial 58 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-14 17:06:47,186][0m Trial 59 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 17:51:30,291][0m Trial 60 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-14 18:58:25,888][0m Trial 61 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 19:27:00,417][0m Trial 62 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 20:07:21,295][0m Trial 63 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-14 21:01:23,076][0m Trial 64 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 21:24:53,806][0m Trial 65 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 21:47:44,372][0m Trial 66 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 22:12:32,273][0m Trial 67 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-14 22:33:45,194][0m Trial 68 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.002821354428306222


[32m[I 2022-06-15 00:35:40,110][0m Trial 69 finished with value: 0.002821354428306222 and parameters: {'sequence_length': 13, 'model_dimension': 140, 'number_of_attention_heads': 4, 'encoder_layers': 10, 'ff_neurons': 159, 'hidden_layers': 9, 'dropout_rate': 0.03742761615600195, 'activation': 'sigmoid', 'optimizer': 'rmsprop', 'batch_size': 87}. Best is trial 53 with value: 0.0028134321328252554.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 01:09:07,469][0m Trial 70 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.0028226226568222046


[32m[I 2022-06-15 05:37:48,541][0m Trial 71 finished with value: 0.0028226226568222046 and parameters: {'sequence_length': 13, 'model_dimension': 179, 'number_of_attention_heads': 4, 'encoder_layers': 10, 'ff_neurons': 164, 'hidden_layers': 10, 'dropout_rate': 0.0632280781772483, 'activation': 'sigmoid', 'optimizer': 'rmsprop', 'batch_size': 32}. Best is trial 53 with value: 0.0028134321328252554.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 06:53:18,500][0m Trial 72 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 07:33:41,047][0m Trial 73 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 08:26:52,354][0m Trial 74 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 09:38:41,040][0m Trial 75 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 10:03:46,590][0m Trial 76 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.00282072345726192


[32m[I 2022-06-15 13:25:39,055][0m Trial 77 finished with value: 0.00282072345726192 and parameters: {'sequence_length': 13, 'model_dimension': 145, 'number_of_attention_heads': 3, 'encoder_layers': 9, 'ff_neurons': 154, 'hidden_layers': 4, 'dropout_rate': 0.1346462285775975, 'activation': 'sigmoid', 'optimizer': 'rmsprop', 'batch_size': 29}. Best is trial 53 with value: 0.0028134321328252554.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 14:04:09,685][0m Trial 78 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 15:15:35,466][0m Trial 79 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.002821611240506172


[32m[I 2022-06-15 16:39:01,366][0m Trial 80 finished with value: 0.002821611240506172 and parameters: {'sequence_length': 13, 'model_dimension': 129, 'number_of_attention_heads': 3, 'encoder_layers': 9, 'ff_neurons': 174, 'hidden_layers': 5, 'dropout_rate': 0.4351686376280519, 'activation': 'sigmoid', 'optimizer': 'rmsprop', 'batch_size': 91}. Best is trial 53 with value: 0.0028134321328252554.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Result: 0.0028298539109528065


[32m[I 2022-06-15 17:53:29,650][0m Trial 81 finished with value: 0.0028298539109528065 and parameters: {'sequence_length': 13, 'model_dimension': 130, 'number_of_attention_heads': 2, 'encoder_layers': 9, 'ff_neurons': 176, 'hidden_layers': 5, 'dropout_rate': 0.40889312507508424, 'activation': 'sigmoid', 'optimizer': 'rmsprop', 'batch_size': 92}. Best is trial 53 with value: 0.0028134321328252554.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 18:08:57,886][0m Trial 82 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 18:29:03,685][0m Trial 83 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 18:49:40,969][0m Trial 84 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-15 19:36:17,780][0m Trial 85 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-15 20:16:20,491][0m Trial 86 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 20:49:13,605][0m Trial 87 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 21:17:19,659][0m Trial 88 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 23:22:50,700][0m Trial 89 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-15 23:46:56,131][0m Trial 90 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-16 00:55:59,130][0m Trial 91 pruned. Trial was pruned at epoch 4.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-16 01:30:40,784][0m Trial 92 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-16 01:59:27,798][0m Trial 93 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-16 02:35:55,888][0m Trial 94 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-16 03:25:56,726][0m Trial 95 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-16 03:46:49,717][0m Trial 96 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-16 04:06:05,543][0m Trial 97 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-16 04:37:12,125][0m Trial 98 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10

[32m[I 2022-06-16 05:09:01,383][0m Trial 99 pruned. Trial was pruned at epoch 1.[0m


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

[32m[I 2022-06-16 06:03:06,544][0m Trial 100 pruned. Trial was pruned at epoch 4.[0m


In [37]:
# show results
pruned_trials = study.get_trials(states=[optuna.trial.TrialState.PRUNED])
complete_trials = study.get_trials(states=[optuna.trial.TrialState.COMPLETE])

print('Study statistics:')
print('   Number of finished Trials: ', len(study.trials))
print('   Number of pruned Trials: ', len(pruned_trials))
print('   Number of complete Trials: ', len(complete_trials))

print('Best Trial: ')
trial = study.best_trial

print('  MSE: ', trial.value)

print('   Params: ')
for key, value in trial.params.items():
    print('   {}: {}'.format(key, value))

Study statistics:
   Number of finished Trials:  100
   Number of pruned Trials:  78
   Number of complete Trials:  22
Best Trial: 
  MSE:  0.0028134321328252554
   Params: 
   sequence_length: 13
   model_dimension: 176
   number_of_attention_heads: 4
   encoder_layers: 10
   ff_neurons: 229
   hidden_layers: 9
   dropout_rate: 0.013421302851038347
   activation: sigmoid
   optimizer: adagrad
   batch_size: 35
