# Load Data

In [None]:
import pandas as pd
import numpy as np

import gc

class Config:
    data_dir = ''
    seed = 42
    
rs = Config.seed

def rmspe(y_true, y_pred):
    return  -(np.sqrt(np.mean(np.square((y_true - y_pred) / y_true))))

In [None]:
train_data_set = pd.read_hdf('../input/optiver-transformed-data/new_train.hdf5')

In [None]:
train_data_set

In [None]:
x = gc.collect()

In [None]:
X_display = train_data_set.drop(['time_id', 'target', 'weights'], axis = 1)
X = X_display.values.astype(np.float64)
y = train_data_set['target'].values.astype(np.float64)

X.shape, y.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, stratify=X[:, 0], shuffle=True)

In [None]:
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
sc_rs = RobustScaler(with_centering=True)
sc_mm = MinMaxScaler()
poly = PolynomialFeatures(degree=2)

sc = Pipeline([
    #('poly',poly),
    ('RobustScaler', sc_rs),
    ('MinMax', sc_mm)
])

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# from joblib import dump
# dump(sc, 'Temp/full_pipe.joblib')

In [None]:
X.shape

In [None]:
del X, y, train_data_set
_ = gc.collect()

# Optuna Tuning

In [None]:
!pip install tensorflow-addons

In [None]:
import tensorflow_addons as tfa
import tensorflow as tf
from sklearn.model_selection import KFold
import tensorflow.keras.backend as K

def rmspe_k(y_true, y_pred):
    pct_var = (y_true - y_pred) / y_true
    return K.sqrt(K.mean(K.square(pct_var)))

def make_layer(x, units, dropout_rate):
    t = tfa.layers.WeightNormalization(tf.keras.layers.Dense(units))(x)
    t = tf.keras.layers.Dense(units, tf.keras.activations.swish)(x)
    t = tf.keras.layers.BatchNormalization()(t)
    t = tf.keras.layers.Dropout(dropout_rate)(t)
    return t

def make_model(data, units, dropout_rates):
    
    inputs = tf.keras.layers.Input(shape=(data.shape[1],))
    x = tf.keras.layers.BatchNormalization()(inputs)

    for i in range(len(units)):
        u = units[i]
        d = dropout_rates[i]
        x = make_layer(x, u, d)
       
    y = tf.keras.layers.Dense(1, 'linear', name='dense_output')(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=y)
    model.compile(loss='mse',
                  optimizer='adam',
                  metrics=[rmspe_k])
    return model

def fit_predict(n_splits, x_train, y_train, units, dropout_rates, epochs, x_test, y_test, verbose, random_state):

    histories = []
    scores = []
    y_preds = []

    cv = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    for train_idx, valid_idx in cv.split(x_train, y_train):

        x_train_train = x_train[train_idx]
        y_train_train = y_train[train_idx]
        x_train_valid = x_train[valid_idx]
        y_train_valid = y_train[valid_idx]
                
        K.clear_session()

        estimator = make_model(x_train, units, dropout_rates)

        es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=2e-5, patience=5,
                                              verbose=verbose, mode='min', restore_best_weights=True)

        rl = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=3,
                                                  mode='min', verbose=verbose)

        history = estimator.fit(x_train_train, y_train_train,
                                batch_size=2048, epochs=epochs, callbacks=[es, rl],
                                validation_data=(x_train_valid, y_train_valid), shuffle=False,
                                verbose=verbose)
        
        if x_test is not None:
            y_part = estimator.predict(x_test)
            y_preds.append(y_part)

        histories.append(history)
        scores.append(history.history['val_rmspe_k'][-1])
    
    if x_test is not None:
        y_pred = np.mean(y_preds, axis=0)
    else:
        y_pred = None

    score = np.mean(scores)
    
    return y_pred, histories, score


In [None]:
import optuna
def objective(trial):
    
    n_layers = trial.suggest_int('n_layers', 1, 4)
    
    units = []
    dropout_rates = []
    
    for i in range(n_layers):
        u = trial.suggest_categorical('units_{}'.format(i+1), [1024, 512, 256, 128])
        units.append(u)
        r = trial.suggest_loguniform('dropout_rate_{}'.format(i+1), 0.1, 0.5)
        dropout_rates.append(r)
    
    print('Units:', units, "Dropout rates:", dropout_rates, "Layers:", n_layers)
    
    _, _, score = fit_predict(10, X_train, y_train, units, dropout_rates, 50, X_test, y_test, 0, 42)
    return score

In [None]:
study = optuna.create_study(direction='minimize',
                            sampler=optuna.samplers.TPESampler(),
                            #sampler=optuna.samplers.RandomSampler(seed=rs),
                            study_name='Optuna_NN')

In [None]:
x = gc.collect()

In [None]:
%%time
study.optimize(objective,
               timeout=3600*7.5,
               gc_after_trial=True)

In [None]:
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

In [None]:
optuna.visualization.plot_optimization_history(study)