In [13]:
import numpy as np
import pandas as pd

from tensorflow import keras
from hyperopt import fmin, hp, tpe, STATUS_OK
from typing import Callable

In [14]:
train = pd.read_csv('data/train.csv').drop(['id', 'target'], axis=1).to_numpy()
test = pd.read_csv('data/test.csv').drop('id', axis=1).to_numpy()

In [15]:
split = int(train.shape[0] * .8)
X_train = train[:split, :]
X_valid = train[split:, :]

In [21]:
def reshape(data: np.array, n_in: int, n_out: int) -> pd.DataFrame:
    n_vars = 1
    df = pd.DataFrame(data)
    cols, names = [], []

    for i in range(n_in, 0, -1):
        shifted = df.shift(i).fillna(0)

        cols.append(shifted)

        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]

    for i in range(0, n_out):
        shifted = df.shift(-i).fillna(0)

        cols.append(shifted)

        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]

    agg = pd.concat(cols, axis=1)
    agg.columns = names

    return agg

In [27]:
search_space = {
    'activation': hp.choice('activation', ('relu', 'elu')),
    'units': hp.choice('units', (3, 4)),
    'distorting_layer': hp.choice(
        'distorting_layer',
        (
            keras.layers.Dropout(.2),
            keras.layers.Dropout(.3),
            keras.layers.GaussianNoise(.01),
            keras.layers.GaussianNoise(.02),
            keras.layers.GaussianNoise(.03)
        ))
}
STEPS = 7


def create_objective(X_train, X_valid, X_test) -> Callable:
    def objective(space: dict) -> dict:
        encoder = keras.models.Sequential([
            keras.layers.Dense(STEPS + 1, activation=space['activation']),
            space['distorting_layer'],
            keras.layers.Dense(space['units'], activation=space['activation'])
        ])
        decoder = keras.models.Sequential([
            keras.layers.Dense(space['units'], activation=space['activation']),
            keras.layers.Dense(STEPS + 1, activation=space['activation'])
        ])
        denoising_ae = keras.models.Sequential([encoder, decoder])
        early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=4,
                                                       min_delta=1e-4)

        denoising_ae.compile(optimizer='adam', loss='mse')
        denoising_ae.fit(X_train, X_train, epochs=20, batch_size=64,
                         verbose=0, callbacks=[early_stopping],
                         validation_data=(X_valid, X_valid))

        acc = denoising_ae.evaluate(X_test, X_test, verbose=0)

        return {'loss': acc, 'status': STATUS_OK}

    return objective

In [None]:
params_by_col = {}


for idx in range(15):
    train_values = reshape(train[:, idx], 1, STEPS).values
    test_values = reshape(test[:, idx], 1, STEPS).values
    train_reshaped = train_values[:split, :]
    valid_reshaped = train_values[split:, :]
    best_params = fmin(
        fn=create_objective(train_reshaped, valid_reshaped, test_values),
        space=search_space,
        algo=tpe.suggest,
        max_evals=40)
    params_by_col[f'col_{idx}'] = best_params

    print(f'Column {idx} processed')

100%|██████████| 40/40 [46:58<00:00, 70.46s/trial, best loss: 0.02087397873401642] 
Column 0 processed
100%|██████████| 40/40 [40:24<00:00, 60.61s/trial, best loss: 0.02599860355257988] 
Column 1 processed
100%|██████████| 40/40 [40:38<00:00, 60.97s/trial, best loss: 0.020129483193159103]
Column 2 processed
100%|██████████| 40/40 [56:30<00:00, 84.77s/trial, best loss: 0.0270722396671772]   
Column 3 processed
100%|██████████| 40/40 [1:10:55<00:00, 106.38s/trial, best loss: 0.024401657283306122]
Column 4 processed
 75%|███████▌  | 30/40 [34:30<11:11, 67.17s/trial, best loss: 0.023840466514229774]