# Strategy

Trying out different models then aggregating the results:
- XGBoost
- LightBGM
- Neural Network

In [None]:
# Getting the directories
import glob 

BASE_DIR = '/kaggle/input/optiver-realized-volatility-prediction/'

# Paths to book and trade data
TRAIN_BOOK_PATHS  = glob.glob(f'{BASE_DIR}book_train.parquet/*')
TEST_BOOK_PATHS   = glob.glob(f'{BASE_DIR}book_test.parquet/*')
TRAIN_TRADE_PATHS = glob.glob(f'{BASE_DIR}trade_train.parquet/*')
TEST_TRADE_PATHS  = glob.glob(f'{BASE_DIR}trade_test.parquet/*')

# Plotting
import matplotlib.pyplot as plt 

# Basic Data Wrangling utilites
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error as mae, r2_score as r2
from sklearn.model_selection import KFold

# Xgboost
import xgboost as xgb

# LightGBM
from lightgbm import LGBMRegressor, plot_tree, plot_importance, plot_metric, plot_split_value_histogram
import lightgbm as lgb

# Working with dataframes and sequences
import numpy as np
import pandas as pd 

train = pd.read_csv(f'{BASE_DIR}train.csv')
sub   = pd.read_csv(f'{BASE_DIR}sample_submission.csv')

# Some helper functions

def submit(prediction):
    """ Submition process for the competition. """
    sub.drop(sub.index, inplace=True)                         # Remove values in the sample submission file
    sub['row_id'] = test_data['row_id']                       # Get the row_id for each test_data 
    sub['target'] = prediction                                # Getting the prediction
    sub.to_csv('/kaggle/working/submission.csv', index=False) # Writting out the .csv file
    
def rmspe(y_true, y_pred):
    return np.sqrt(np.nanmean(np.square(((y_true - y_pred) / y_true))))

def validate(model, Return=False):
    """Validates the model for differnt metrics. """
    val_data = dval if type(model) == xgb.core.Booster else X_val
    y_pred = model.predict(val_data)
    print(f' MAE: {mae(y_pred, y_val)}, R2: {r2(y_pred, y_val)}, RMSPE: {rmspe(y_val, y_pred)}')
    if Return: return r2(y_pred, y_val), rmspe(y_val, y_pred)
    
def log_return(stock_prices):
    return np.log(stock_prices).diff()

def xgb_importance_plot(xgb_model):
    xgb.plot_importance(xgb_model, max_num_features=18, importance_type='gain'  , show_values=False)
    xgb.plot_importance(xgb_model, max_num_features=18, importance_type='cover' , show_values=False)
    xgb.plot_importance(xgb_model, max_num_features=18, importance_type='weight', show_values=False)

## Questions
- Is it a good idea to augment features and then run a Gradient Boosting model to see which features tend to be more important?
- Should I trim the features in each iteration? Then add new ones and repeat?

## Data Deneration

This part is only ran when a new data version is being worked on. Then the outpued file is uploaded [here](https://www.kaggle.com/damoonshahhosseini/processedbooktrade).

In [None]:
class DataManager:
    """ Used for processing the input data so the model can be fitted on it. """
    def __init__(self, train=True):
        self._train = train
        self._book_file_list = TRAIN_BOOK_PATHS if train else TEST_BOOK_PATHS
        self._trade_file_list = TRAIN_TRADE_PATHS if train else TEST_TRADE_PATHS
        self.measures_list = []
    
    def _traverse_book(self):
        """ Goes through each of the training files. """
        for book_file_path, trade_file_path in zip(self._book_file_list, self._trade_file_list):
            stock_id = book_file_path.split("=")[1] # Getting the stock_id
            
            # Reading the book info and preparing it for aggregation
            book = pd.read_parquet(book_file_path)
            
            book.sort_values(by=['time_id', 'seconds_in_bucket'])
            book['wap1'] = (book['bid_price1'] * book['ask_size1'] + book['ask_price1'] * book['bid_size1']) / (book['bid_size1']+ book['ask_size1'])
            book['log_return1'] = book.groupby(['time_id'])['wap1'].apply(log_return)
            book = book[~book['log_return1'].isnull()]
            
            book['wap2'] = (book['bid_price2'] * book['ask_size2'] + book['ask_price2'] * book['bid_size2']) / (book['bid_size2']+ book['ask_size2'])
            book['log_return2'] = book.groupby(['time_id'])['wap2'].apply(log_return)
            book = book[~book['log_return2'].isnull()]
            
            # Different spreads: Get the max of these for each time_id
            book['h_spread_l1'] = book['ask_price1'] - book['bid_price1']
            book['h_spread_l2'] = book['ask_price2'] - book['bid_price2']
            book['v_spread_b']  = book['bid_price1'] - book['bid_price2']
            book['v_spread_a']  = book['ask_price1'] - book['bid_price2']
            
            book.loc[:, 'bas'] = (book.loc[:, ('ask_price1', 'ask_price2')].min(axis = 1) / book.loc[:, ('bid_price1', 'bid_price2')].max(axis = 1) - 1) 
            
            # Reading the trade info
            trade = pd.read_parquet(trade_file_path)
            
            # Slicing the train data based on stock_id
            book_stock_slice = train[train['stock_id'] == int(stock_id)]
            
            for time_id in book['time_id'].unique():
                book_slice = book[book['time_id'] == time_id] # Slicing based on time_id
                # Features
                dic = {
                    'row_id': f"{stock_id}-{time_id}", # Fixing row-id from here
                    
                    'wap1_mean': book_slice['wap1'].mean(),
                    'wap1_std':book_slice['wap1'].std(),
                    'wap1_max':book_slice['wap1'].max(),
                    
                    'wap2_mean': book_slice['wap2'].mean(),
                    'wap2_std':book_slice['wap2'].std(),
                    'wap2_max':book_slice['wap2'].max(),

                    'h_spread_l1_mean': book['h_spread_l1'].mean(),
                    'h_spread_l1_std': book['h_spread_l1'].std(),
                    'h_spread_l1_std': book['h_spread_l1'].max(),
                    
                    'h_spread_l2_mean': book['h_spread_l2'].mean(),
                    'h_spread_l2_std': book['h_spread_l2'].std(),
                    'h_spread_l2_max': book['h_spread_l2'].max(),
                    
                    'v_spread_b_mean': book['v_spread_b'].mean(),
                    'v_spread_b_std': book['v_spread_b'].std(),
                    'v_spread_b_max': book['v_spread_b'].max(),
                    
                    'v_spread_a_mean': book['v_spread_a'].mean(),
                    'v_spread_a_std': book['v_spread_a'].std(),
                    'v_spread_a_max': book['v_spread_a'].max(),
                    
                    'log_return1_mean': book_slice['log_return1'].mean(),
                    'log_return1_std':book_slice['log_return1'].std(),
                    'log_return1_max':book_slice['log_return1'].max(),
                    
                    'log_return2_mean': book_slice['log_return2'].mean(),
                    'log_return2_std':book_slice['log_return2'].std(),
                    'log_return2_max':book_slice['log_return2'].max(),
                    
                    'bas_mean': book_slice['bas'].mean(),
                    'bas_std': book_slice['bas'].std(),
                    'bas_max': book_slice['bas'].max(),
                    
                    'ask_size_mean': book_slice['ask_size1'].mean(),
                    'ask_size_std': book_slice['ask_size1'].std(),
                    
                    'ask_price_mean': book_slice['ask_price1'].mean(),
                    'ask_price_std': book_slice['ask_price1'].std(),
                    
                    'bid_size_mean': book_slice['bid_size1'].mean(),
                    'bid_size_std': book_slice['bid_size1'].std(),
                    
                    'bid_price_mean': book_slice['bid_price1'].mean(),
                    'bid_price_std': book_slice['bid_price1'].std(),
                    
                    'actual_price_mean': trade['price'].mean(),
                    'actual_price_std': trade['price'].std(),
                    'actual_price_max': trade['price'].max(),
                    
                    'size_mean': trade['size'].mean(),
                    'size_std': trade['size'].std(),
                    
                    'order_count_mean': trade['order_count'].mean(),
                    'order_count_std': trade['order_count'].std(),
                }
                
                # Note: When getting the test_data ready, there is no target column.
                if self._train: dic['target'] = book_stock_slice[book_stock_slice['time_id'] == time_id]['target'].values[0]
                
                self.measures_list.append(dic)
    
    def get_processed(self):
        """ Returns the processed the data. """
        self._traverse_book() 
        
        return pd.DataFrame(self.measures_list)

In [None]:
# book = DataManager().get_processed()
# book.to_csv('/kaggle/working/train_v3.csv', index=False)

# Importing Data
- Training data is imported from the preprocessed dataset.
- Test data is generated here useing DataManager class.
- Both training and test datasets are being normalized so the metrics in different columns are close to one another.

In [None]:
# Importing trainind data from the input folder, and generating test data with the same schema
data = pd.read_csv('/kaggle/input/processedbooktrade/train_v3.csv')
test_data = DataManager(train=False).get_processed()

# Min-Max Scaling the data for better models
for col_name in data.columns[1:-1]:
    test_data[col_name] = (test_data[col_name] - data[col_name].min()) / (data[col_name].max() - data[col_name].min())
    data[col_name] = (data[col_name] - data[col_name].min()) / (data[col_name].max() - data[col_name].min())

# Training Data
X, y = data.iloc[:,1:-1], data['target']

# Test Data
X_test = test_data.iloc[:,1:]

# Getting training and validations plits to check for overfitting
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# train_preds, test_preds = pd.DataFrame(), pd.DataFrame()

# XGBoost

In [None]:
# dtrain = xgb.DMatrix(X_train, label=y_train)
# dval   = xgb.DMatrix(X_val, label=y_val)
# dtest  = xgb.DMatrix(X_test)

# X_xgb  = xgb.DMatrix(X)

In [None]:
# def run_xgb(params, n):
#     xgb_model = xgb.train(
#         params, 
#         dtrain, 
#         num_boost_round=1500, 
#         early_stopping_rounds=20, 
#         evals=[(dtrain, 'train'), (dval, 'eval')],
#     )
    
#     r2, rp = validate(xgb_model, True)
    
#     xgb_model.save_model(f'xgb_v3_{n}_1.model')
    
#     train_preds[f'xgb_{n}'] = xgb_model.predict(X_xgb).tolist()
#     test_preds[f'xgb_{n}'] =  xgb_model.predict(dtest).tolist()
    
# #     return pred, r2, rp

In [None]:
xgb_param_1 = {
    'eta': 1e-1,
    'max_depth': 12,
    'objective': 'reg:squarederror',
    'booster': 'gbtree',
    'colsample_bytree': 0.9,
    'sampling_method': 'gradient_based',
    'subsample': 0.6, # Avoiding overfitting
    'tree_method': 'gpu_hist'
}

# xgb1, xgb1_r1, xgb1_rp = 
# run_xgb(xgb_param_1, 1)

In [None]:
xgb_param_2 = {
    'eta': 1e-1,
    'max_depth': 5,
    'eval_metric': 'mape',
    'objective': 'reg:squarederror',
    'booster': 'gbtree',
    'lambda': 0.9,
    'colsample_bytree': 0.5,
    'sampling_method': 'gradient_based',
    'subsample': 0.9, # Avoiding overfitting
    'tree_method': 'gpu_hist'
}

# xgb2, xgb2_r2, xgb2_rp = 
# run_xgb(xgb_param_2, 2)

In [None]:
xgb_param_3 = {
    'eta': 8e-1,
    'max_depth': 15,
    'verbosity': 0,
    'eval_metric': 'mape',
    'objective': 'reg:squarederror',
    'booster': 'dart',
    'tree_method': 'gpu_hist',
    'sample_type': 'weighted',
    'rate_drop': 0.4,
    'max_leaves': 30,
    'alpha': 9e-4,
    'seed':10,
#     'min_child_weight': 1e-3 
}

# xgb3, xgb2_r3, xgb3_rp =
# run_xgb(xgb_param_3, 3)

In [None]:
# f, axs = plt.subplots(1,1, figsize=(100,100))

# xgb.plot_tree(xgb_model, num_trees=75, ax=axs)

In [None]:
# import gc

# gc.get_count()

# LightGBM

In [None]:
# Reference https://www.kaggle.com/yus002/realized-volatility-prediction-lgbm-train
def my_metrics(y_true, y_pred):
    return np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))

def lgbm_rmspe(y_true, y_pred):  
    output = my_metrics(y_true, y_pred)
    return 'rmspe', output, False

def run_lgbm(params, n):
    lgbm_model = LGBMRegressor(**params)
    
    lgbm_model.fit(
        X_train, y_train,
        eval_set=[(X_train, y_train), (X_val, y_val)],
        eval_metric = lgbm_rmspe,
        verbose=300,
        early_stopping_rounds=100
    )
    
    r2, rp = validate(lgbm_model, True)
    
#     lgbm_model.save_model(f'lgbm_v3_{n}_1')
    train_preds[f'lgbm_{n}'] = lgbm_model.predict(X).tolist()
    test_preds[f'lgbm_{n}'] = lgbm_model.predict(X_test).tolist()

In [None]:
lgbm_param_1 = {
    'objective':'rmse', 
    'metric': 'rmse',
    "boosting_type": "gbdt",
    'device_type': 'gpu',
    'num_iterations': 5000,
    'early_stopping_rounds': 30,
      'learning_rate': 0.01,
      'lambda_l1': 1,
      'lambda_l2': 1,
      'feature_fraction': 0.8,
      'bagging_fraction': 0.8,
    'num_leaves': 50,
    'max_depth': 5,
    'seed': 11,
}

# lgbm_pred_1 = 
# run_lgbm(lgbm_param_1, 1)

In [None]:
lgbm_param_2 = {
    'objective':'mean_squared_error', 
    'metric': 'rmse',
    'device_type': 'gpu',
    'num_iterations': 5000,
    'num_leaves': 100,
    'learning_rate': 0.1,
    'max_depth': 8,
    'colsample_bytree': 0.85,
    'subsample': 0.8,   
    'seed': 11,
    'tree_learner': 'feature'
}

# lgbm_pred_2 = run_lgbm(lgbm_param_2, 2)

In [None]:
# gc.collect()

# CatBoost

# Neural Network

In [None]:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import mean_absolute_error as MAE, mean_squared_error as MSE
from tensorflow.keras.callbacks import ReduceLROnPlateau as RLP, EarlyStopping as ES

import random
import copy

In [None]:
def nn_rmspe(y_true, y_pred):
    return tf.sqrt(tf.experimental.numpy.nanmean(tf.square(((y_true - y_pred) / y_true))))

def nn_seq_model(layers, n):
    model = Sequential(copy.deepcopy(layers))

    model.compile(
        optimizer=Adam(2e-3),
        loss=nn_rmspe,
    )
    
    model.fit(
        x=X_train, y=y_train, 
        batch_size=256, 
        epochs=500,
        verbose=False,
        callbacks=[
            RLP(monitor='val_loss', factor=0.98, patience=15, verbose=1), 
            ES(monitor='val_loss', patience=50, verbose=1, restore_best_weights=True)
        ], 
        validation_data=(X_val, y_val),
        shuffle=True,
    )
    
    path = f'./nn_v3_{n}_1.h5'
    
    model.save(path)
    
    train_preds[f'nn_{n}'] = model.predict(X)
    test_preds[f'nn_{n}']  = model.predict(X_test)

In [None]:
layers_1 = [
    Dense(32, kernel_initializer=TruncatedNormal(0, 1, 11), bias_initializer=TruncatedNormal(1e-1, 1e-7, 11)),
    Dense(64, kernel_initializer=TruncatedNormal(0, 2, 11), bias_initializer=TruncatedNormal(0, 5e-7, 11)),
    Dense(128, kernel_initializer=TruncatedNormal(0, 1, 161), bias_initializer=TruncatedNormal(0, 1e-7, 51)),
    BatchNormalization(),
     Dense(32, kernel_initializer=TruncatedNormal(0, 1e-1, 11), bias_initializer=TruncatedNormal(1e-1, 1e-7, 11)),
    Dense(64, kernel_initializer=TruncatedNormal(0, 2e-1, 11), bias_initializer=TruncatedNormal(0, 5e-7, 11)),
    Dense(128, kernel_initializer=TruncatedNormal(0, 1e-1, 161), bias_initializer=TruncatedNormal(0, 1e-7, 51)),
    BatchNormalization(),
     Dense(32, kernel_initializer=TruncatedNormal(0, 1e-2, 11), bias_initializer=TruncatedNormal(1e-1, 1e-7, 11)),
    Dense(64, kernel_initializer=TruncatedNormal(0, 2e-2, 11), bias_initializer=TruncatedNormal(0, 5e-7, 11)),
    Dense(128, kernel_initializer=TruncatedNormal(0, 1e-2, 161), bias_initializer=TruncatedNormal(0, 1e-7, 51)),
    BatchNormalization(),
     Dense(32, kernel_initializer=TruncatedNormal(0, 1e-2, 51), bias_initializer=TruncatedNormal(1e-1, 1e-7, 11)),
    Dense(64, kernel_initializer=TruncatedNormal(0, 2e-2, 1), bias_initializer=TruncatedNormal(0, 5e-7, 11)),
    Dense(128, kernel_initializer=TruncatedNormal(0, 1e-2, 11), bias_initializer=TruncatedNormal(0, 1e-7, 51)),
    BatchNormalization(),
     Dense(32, kernel_initializer=TruncatedNormal(0, 1e-3, 11), bias_initializer=TruncatedNormal(1e-1, 1e-7, 11)),
    Dense(64, kernel_initializer=TruncatedNormal(0, 2e-3, 11), bias_initializer=TruncatedNormal(0, 5e-7, 11)),
    Dense(128, kernel_initializer=TruncatedNormal(0, 1e-3, 161), bias_initializer=TruncatedNormal(0, 1e-7, 51)),
    BatchNormalization(),
    Dense(64, kernel_initializer=TruncatedNormal(0, 1e-2, 71), bias_initializer=TruncatedNormal(1e-1, 1e-7, 11)),
    Dense(128, kernel_initializer=TruncatedNormal(0, 2e-2, 51), bias_initializer=TruncatedNormal(0, 5e-7, 11)),
    Dense(256, kernel_initializer=TruncatedNormal(0, 1e-2, 61), bias_initializer=TruncatedNormal(0, 1e-7, 51)),
    BatchNormalization(),
    Dense(256, kernel_initializer=TruncatedNormal(0, 1e-3, 11), bias_initializer=TruncatedNormal(1e-1, 1e-7, 11)),
    Dense(64, kernel_initializer=TruncatedNormal(0, 2e-3, 11), bias_initializer=TruncatedNormal(0, 5e-7, 11)),
    Dense(32, kernel_initializer=TruncatedNormal(0, 1e-3, 161), bias_initializer=TruncatedNormal(0, 1e-7, 51)),
    BatchNormalization(),
    Dense(1, kernel_initializer=TruncatedNormal(0, 1, 11), bias_initializer=TruncatedNormal(0, 1e-7, 32)),
]

# nn_seq_model(layers_1, 1)

In [None]:
 model = Sequential(layers_1)

model.compile(
        optimizer=Adam(1e-3),
        loss=nn_rmspe,
    )
    
hist = model.fit(
        x=X_train, y=y_train, 
        batch_size=512, 
        epochs=1000,
        verbose=True,
        callbacks=[
            RLP(monitor='val_loss', factor=0.98, patience=15, verbose=1), 
            ES(monitor='val_loss', patience=200, verbose=1, restore_best_weights=True)
        ], 
        validation_data=(X_val, y_val),
        shuffle=True,
    )

In [None]:
model.save('/kaggle/working/nnv3_03')

In [None]:
layers_2 = [
    Dense(16, activation='relu', kernel_initializer=TruncatedNormal(0, 1, 11), bias_initializer=TruncatedNormal(1e-1, 1e-3, 11)),
    Dense(16, activation='relu', kernel_initializer=TruncatedNormal(0, 2e-3, 11), bias_initializer=TruncatedNormal(0, 5e-3, 11)),
    BatchNormalization(),
    Dense(16, activation='relu', kernel_initializer=TruncatedNormal(0, 1, 161), bias_initializer=TruncatedNormal(0, 1e-2, 151)),
    Dense(16, activation='relu', kernel_initializer=TruncatedNormal(0, 1e-1, 61), bias_initializer=TruncatedNormal(0, 1e-2, 151)),
    BatchNormalization(),
    Dense(1, activation='sigmoid', kernel_initializer=TruncatedNormal(0, 1e-1, 11), bias_initializer=TruncatedNormal(0, 1e-4, 32)),
]

# nn_seq_model(layers_2, 2)

In [None]:
layers_3 = [
    Dense(32, activation='relu', kernel_initializer=TruncatedNormal(0, 1, 11), bias_initializer=TruncatedNormal(1e-1, 1e-3, 11)),
    Dense(32, activation='relu', kernel_initializer=TruncatedNormal(0, 2, 11), bias_initializer=TruncatedNormal(0, 5e-2, 11)),
    Dense(32, activation='relu', kernel_initializer=TruncatedNormal(0, 1, 161), bias_initializer=TruncatedNormal(0, 1e-3, 151)),
    Dense(32, activation='relu', kernel_initializer=TruncatedNormal(0, 1e-1, 61), bias_initializer=TruncatedNormal(0, 1e-4, 11)),
    Dense(32, activation='relu', kernel_initializer=TruncatedNormal(0, 1, 161), bias_initializer=TruncatedNormal(0, 1e-1, 101)),
    Dense(32, activation='relu', kernel_initializer=TruncatedNormal(0, 1e-1, 61), bias_initializer=TruncatedNormal(0, 1e-2, 51)),
    Dense(32, activation='relu', kernel_initializer=TruncatedNormal(0, 1, 161), bias_initializer=TruncatedNormal(0, 3e-1, 11)),
    Dense(32, activation='relu', kernel_initializer=TruncatedNormal(0, 1e-1, 61), bias_initializer=TruncatedNormal(0, 1, 151)),
    BatchNormalization(),
    Dense(1, activation='sigmoid', kernel_initializer=TruncatedNormal(0, 1e-1, 11), bias_initializer=TruncatedNormal(0, 1e-1, 32)),
]

# nn_seq_model(layers_3, 3)

In [None]:
layers_4 = [
    BatchNormalization(),
    Dense(32, activation=None, kernel_initializer=TruncatedNormal(0, 1e-1, 161), bias_initializer=TruncatedNormal(0, 1, 151)),
    Dense(32, activation='relu', kernel_initializer=TruncatedNormal(0, 1e-1, 61), bias_initializer=TruncatedNormal(0, 1, 151)),
    BatchNormalization(),
    Dense(64, activation=None, kernel_initializer=TruncatedNormal(0, 5e-2, 11), bias_initializer=TruncatedNormal(0, 1, 151)),
    Dense(128, activation='relu', kernel_initializer=TruncatedNormal(0, 5e-1, 161), bias_initializer=TruncatedNormal(0, 1, 151)),
    Dense(256, activation=None, kernel_initializer=TruncatedNormal(0, 8e-1, 161), bias_initializer=TruncatedNormal(0, 1, 151)),
    Dense(1, activation=None, kernel_initializer=TruncatedNormal(0, 1e-1, 11), bias_initializer=TruncatedNormal(0, 1e-1, 32)),
]

# nn_seq_model(layers_4, 4)

In [None]:
# train_preds = pd.DataFrame(train_preds)
# test_preds = pd.DataFrame(test_preds)

# Ensembling



In [None]:
# X_train, X_val, y_train, y_val = train_test_split(train_preds, y, test_size=0.2, random_state=23)

In [None]:
# ens_layers = [
#     Dense(8, activation=None, kernel_initializer=TruncatedNormal(0, 2, 11), bias_initializer=TruncatedNormal(0, 1, 151)),
#     Dense(8, activation='relu', kernel_initializer=TruncatedNormal(0, 5e-1, 11), bias_initializer=TruncatedNormal(0, 1, 11)),
#     Dense(8, activation='relu', kernel_initializer=TruncatedNormal(0, 8e-1, 61),bias_initializer=TruncatedNormal(0, 1, 51)),
#     BatchNormalization(),
#     Dense(1, activation='sigmoid', kernel_initializer=TruncatedNormal(0, 1e-1, 11),
#         bias_initializer=TruncatedNormal(0, 1e-1, 32), kernel_regularizer=None,
#         bias_regularizer=None, activity_regularizer=None),
# ]

# model = Sequential(ens_layers)

# model.compile(
#     optimizer=Adam(5e-3),
#     loss=nn_rmspe,
# )

# model.fit(
#         x=X_train, y=y_train, 
#         batch_size=256, 
#         epochs=1000,
#         verbose=False,
#         callbacks=[
#             RLP(monitor='val_loss', factor=0.98, patience=15, verbose=1), 
#             ES(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)
#         ], 
#         validation_data=(X_val, y_val),
#         shuffle=True,
#     )

In [None]:
submit(model.predict(X_test))

In [None]:
pd.read_csv('/kaggle/working/submission.csv')

## Reference (s)
https://www.kaggle.com/konradb/we-need-to-go-deeper-and-validate#Model