In [None]:
from IPython.core.display import display, HTML

import pandas as pd
import numpy as np 
import glob
import os
import gc

from joblib import Parallel, delayed
from sklearn.neighbors import NearestNeighbors

from sklearn import preprocessing, model_selection
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.metrics import r2_score

import matplotlib.pyplot as plt 
import seaborn as sns
import numpy.matlib
from sklearn.cluster import KMeans

from fastai.tabular.all import *
import pickle
from sklearn.model_selection import KFold, GroupKFold
import lightgbm as lgb
from catboost import CatBoostRegressor, Pool
from scipy import special
from scipy.stats import skew

from numpy.random import seed

import tensorflow as tf

from tensorflow import keras
from keras import backend as K
from keras.backend import sigmoid
from keras.utils.generic_utils import get_custom_objects
from keras.layers import Activation

import warnings
warnings.filterwarnings('ignore')

seed(42)
tf.random.set_seed(42)

In [None]:
!pip -q install ../input/pytorchtabnet/pytorch_tabnet-3.1.1-py3-none-any.whl

In [None]:
from scipy import stats
from scipy.stats import norm
from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetRegressor

import torch
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, StandardScaler

In [None]:
# data directory
data_dir = '../input/optiver-realized-volatility-prediction/'

# Function to calculate first WAP
def calc_wap1(df):
    wap = (df['bid_price1'] * df['ask_size1'] + df['ask_price1'] * df['bid_size1']) / (df['bid_size1'] + df['ask_size1'])
    return wap

# Function to calculate second WAP
def calc_wap2(df):
    wap = (df['bid_price2'] * df['ask_size2'] + df['ask_price2'] * df['bid_size2']) / (df['bid_size2'] + df['ask_size2'])
    return wap

def calc_wap3(df):
    wap = (df['bid_price1'] * df['bid_size1'] + df['ask_price1'] * df['ask_size1']) / (df['bid_size1'] + df['ask_size1'])
    return wap

def calc_wap4(df):
    wap = (df['bid_price2'] * df['bid_size2'] + df['ask_price2'] * df['ask_size2']) / (df['bid_size2'] + df['ask_size2'])
    return wap

# Function to calculate the log of the return
# Remember that logb(x / y) = logb(x) - logb(y)
def log_return(series):
    return np.log(series).diff()

# Calculate the realized volatility
def realized_volatility(series):
    return np.sqrt(np.sum(series**2))

# Calculate the absolute max log return
def max_log_return(series):
    return np.max(np.abs(series))

# Function to count unique elements of a series
def count_unique(series):
    return len(np.unique(series))


# Function to read our base train and test set
def read_train_test():
    train = pd.read_csv('../input/optiver-realized-volatility-prediction/train.csv')
    test = pd.read_csv('../input/optiver-realized-volatility-prediction/test.csv')
    # Create a key to merge with book and trade data
    train['row_id'] = train['stock_id'].astype(str) + '-' + train['time_id'].astype(str)
    test['row_id'] = test['stock_id'].astype(str) + '-' + test['time_id'].astype(str)
    print(f'Our training set has {train.shape[0]} rows')
    return train, test


# Function to preprocess book data (for each stock id)
def book_preprocessor(file_path):
    df = pd.read_parquet(file_path)
    # Calculate Wap
    df['wap1'] = calc_wap1(df)
    df['wap2'] = calc_wap2(df)
    df['wap3'] = calc_wap3(df)
    df['wap4'] = calc_wap4(df)
    # Calculate log returns
    df['log_return1'] = df.groupby(['time_id'])['wap1'].apply(log_return)
    df['log_return2'] = df.groupby(['time_id'])['wap2'].apply(log_return)
    df['log_return3'] = df.groupby(['time_id'])['wap3'].apply(log_return)
    df['log_return4'] = df.groupby(['time_id'])['wap4'].apply(log_return)
    # Calculate wap balance
    df['wap_balance1'] = abs(df['wap1'] - df['wap2'])
    df['wap_balance2'] = abs(df['wap3'] - df['wap4'])
    # Calculate spread
    df['price_spread'] = (df['ask_price1'] - df['bid_price1']) / ((df['ask_price1'] + df['bid_price1']) / 2)
    df['price_spread2'] = (df['ask_price2'] - df['bid_price2']) / ((df['ask_price2'] + df['bid_price2']) / 2)
    df['bid_spread'] = abs(df['bid_price1'] - df['bid_price2'])
    df['ask_spread'] = abs(df['ask_price1'] - df['ask_price2'])
    df["bid_ask_spread"] = abs(df['bid_spread'] - df['ask_spread'])
    df['total_volume'] = (df['ask_size1'] + df['ask_size2']) + (df['bid_size1'] + df['bid_size2'])
    df['volume_imbalance'] = abs((df['ask_size1'] + df['ask_size2']) - (df['bid_size1'] + df['bid_size2']))
    df['bid_ask_max_spread1'] = df['ask_price1'].max() - df['bid_price1'].min()
    df['bid_ask_max_spread2'] = df['ask_price2'].max() - df['bid_price2'].min()
    
    # Dict for aggregations
    create_feature_dict = {
        'wap1': [np.mean, np.max, np.min],
        'wap2': [np.mean, np.max, np.min],
        'wap3': [np.mean, np.max, np.min],
        'wap4': [np.mean, np.max, np.min],
        'log_return1': [np.sum, max_log_return, realized_volatility],
        'log_return2': [np.sum, max_log_return, realized_volatility],
        'log_return3': [np.sum, max_log_return, realized_volatility],
        'log_return4': [np.sum, max_log_return, realized_volatility],
        'wap_balance1': [np.mean, np.max],
        'wap_balance2': [np.mean, np.max],
        'price_spread':[np.mean, np.max],
        'price_spread2':[np.mean, np.max],
        'bid_spread':[np.max],
        'ask_spread':[np.max],
        'total_volume':[np.sum, np.max],
        'volume_imbalance':[np.sum, np.max],
        "bid_ask_spread":[np.sum,  np.max],
        "bid_ask_max_spread1":[np.mean],
        "bid_ask_max_spread2":[np.mean]
    }
    create_feature_dict_time = {
        'log_return1': [max_log_return, realized_volatility, np.sum],
        'log_return2': [max_log_return, realized_volatility, np.sum],
        'log_return3': [max_log_return, realized_volatility, np.sum],
        'log_return4': [max_log_return, realized_volatility, np.sum],
    }
    
    # Function to get group stats for different windows (seconds in bucket)
    def get_stats_window(fe_dict,seconds_in_bucket, add_suffix = False):
        # Group by the window
        df_feature = df[df['seconds_in_bucket'] >= seconds_in_bucket].groupby(['time_id']).agg(fe_dict).reset_index()
        # Rename columns joining suffix
        df_feature.columns = ['_'.join(col) for col in df_feature.columns]
        # Add a suffix to differentiate windows
        if add_suffix:
            df_feature = df_feature.add_suffix('_' + str(seconds_in_bucket))
        return df_feature
    
    # Get the stats for different windows
    df_feature = get_stats_window(create_feature_dict,seconds_in_bucket = 0, add_suffix = False)
    df_feature_500 = get_stats_window(create_feature_dict_time,seconds_in_bucket = 500, add_suffix = True)
    df_feature_400 = get_stats_window(create_feature_dict_time,seconds_in_bucket = 400, add_suffix = True)
    df_feature_300 = get_stats_window(create_feature_dict_time,seconds_in_bucket = 300, add_suffix = True)
    df_feature_200 = get_stats_window(create_feature_dict_time,seconds_in_bucket = 200, add_suffix = True)
    df_feature_100 = get_stats_window(create_feature_dict_time,seconds_in_bucket = 100, add_suffix = True)

    # Merge all
    df_feature = df_feature.merge(df_feature_500, how = 'left', left_on = 'time_id_', right_on = 'time_id__500')
    df_feature = df_feature.merge(df_feature_400, how = 'left', left_on = 'time_id_', right_on = 'time_id__400')
    df_feature = df_feature.merge(df_feature_300, how = 'left', left_on = 'time_id_', right_on = 'time_id__300')
    df_feature = df_feature.merge(df_feature_200, how = 'left', left_on = 'time_id_', right_on = 'time_id__200')
    df_feature = df_feature.merge(df_feature_100, how = 'left', left_on = 'time_id_', right_on = 'time_id__100')
    # Drop unnecesary time_ids
    df_feature.drop(['time_id__500','time_id__400', 'time_id__300', 'time_id__200','time_id__100'], axis = 1, inplace = True)
    
    
    # Create row_id so we can merge
    stock_id = file_path.split('=')[1]
    df_feature['row_id'] = df_feature['time_id_'].apply(lambda x: f'{stock_id}-{x}')
    df_feature.drop(['time_id_'], axis = 1, inplace = True)
    return df_feature


# Function to preprocess trade data (for each stock id)
def trade_preprocessor(file_path):
    df = pd.read_parquet(file_path)
    df['log_return'] = df.groupby('time_id')['price'].apply(log_return)
    df['amount']=df['price']*df['size']
    df['average_size'] = df['size']/df['order_count']
    # Dict for aggregations
    create_feature_dict = {
        'log_return':[np.sum, realized_volatility, max_log_return],
        'seconds_in_bucket':[count_unique],
        'average_size': [np.mean, np.std, np.max],
        'size':[np.sum, np.max],
        'order_count':[np.sum,np.max],
        'amount':[np.sum,np.max],
    }
    create_feature_dict_time = {
        'log_return':[np.sum, realized_volatility, max_log_return],
        'seconds_in_bucket':[count_unique],
        'size':[np.sum, np.max],
        'order_count':[np.sum, np.max],
    }
    # Function to get group stats for different windows (seconds in bucket)
    def get_stats_window(fe_dict,seconds_in_bucket, add_suffix = False):
        # Group by the window
        df_feature = df[df['seconds_in_bucket'] >= seconds_in_bucket].groupby(['time_id']).agg(fe_dict).reset_index()
        # Rename columns joining suffix
        df_feature.columns = ['_'.join(col) for col in df_feature.columns]
        # Add a suffix to differentiate windows
        if add_suffix:
            df_feature = df_feature.add_suffix('_' + str(seconds_in_bucket))
        return df_feature
    

    # Get the stats for different windows
    df_feature = get_stats_window(create_feature_dict,seconds_in_bucket = 0, add_suffix = False)
    df_feature_500 = get_stats_window(create_feature_dict_time,seconds_in_bucket = 500, add_suffix = True)
    df_feature_400 = get_stats_window(create_feature_dict_time,seconds_in_bucket = 400, add_suffix = True)
    df_feature_300 = get_stats_window(create_feature_dict_time,seconds_in_bucket = 300, add_suffix = True)
    df_feature_200 = get_stats_window(create_feature_dict_time,seconds_in_bucket = 200, add_suffix = True)
    df_feature_100 = get_stats_window(create_feature_dict_time,seconds_in_bucket = 100, add_suffix = True)
    
    def tendency(price, vol):    
        df_diff = np.diff(price)
        val = (df_diff/price[1:])*100
        power = np.sum(val*vol[1:])
        return(power)
    
    lis = []
    for n_time_id in df['time_id'].unique():
        df_id = df[df['time_id'] == n_time_id]        
        tendencyV = tendency(df_id['price'].values, df_id['size'].values)      
        f_max = np.sum(df_id['price'].values > np.mean(df_id['price'].values))
        f_min = np.sum(df_id['price'].values < np.mean(df_id['price'].values))
        df_max =  np.sum(np.diff(df_id['price'].values) > 0)
        df_min =  np.sum(np.diff(df_id['price'].values) < 0)
        # new
        abs_diff = np.median(np.abs( df_id['price'].values - np.mean(df_id['price'].values)))        
        energy = np.mean(df_id['price'].values**2)
        iqr_p = np.percentile(df_id['price'].values,75) - np.percentile(df_id['price'].values,25)
        
        # vol vars
        
        abs_diff_v = np.median(np.abs( df_id['size'].values - np.mean(df_id['size'].values)))        
        energy_v = np.sum(df_id['size'].values**2)
        iqr_p_v = np.percentile(df_id['size'].values,75) - np.percentile(df_id['size'].values,25)
        
        lis.append({'time_id':n_time_id,'tendency':tendencyV,'f_max':f_max,'f_min':f_min,'df_max':df_max,'df_min':df_min,
                   'abs_diff':abs_diff,'energy':energy,'iqr_p':iqr_p,'abs_diff_v':abs_diff_v,'energy_v':energy_v,'iqr_p_v':iqr_p_v})
    
    df_lr = pd.DataFrame(lis)
        
   
    df_feature = df_feature.merge(df_lr, how = 'left', left_on = 'time_id_', right_on = 'time_id')
    
    # Merge all
    df_feature = df_feature.merge(df_feature_500, how = 'left', left_on = 'time_id_', right_on = 'time_id__500')
    df_feature = df_feature.merge(df_feature_400, how = 'left', left_on = 'time_id_', right_on = 'time_id__400')
    df_feature = df_feature.merge(df_feature_300, how = 'left', left_on = 'time_id_', right_on = 'time_id__300')
    df_feature = df_feature.merge(df_feature_200, how = 'left', left_on = 'time_id_', right_on = 'time_id__200')
    df_feature = df_feature.merge(df_feature_100, how = 'left', left_on = 'time_id_', right_on = 'time_id__100')
    # Drop unnecesary time_ids
    df_feature.drop(['time_id__500','time_id__400', 'time_id__300', 'time_id__200','time_id','time_id__100'], axis = 1, inplace = True)
    
    
    df_feature = df_feature.add_prefix('trade_')
    stock_id = file_path.split('=')[1]
    df_feature['row_id'] = df_feature['trade_time_id_'].apply(lambda x:f'{stock_id}-{x}')
    df_feature.drop(['trade_time_id_'], axis = 1, inplace = True)
    return df_feature


# Function to get group stats for the stock_id and time_id
def get_time_stock(df):
    vol_cols = ['log_return1_realized_volatility', 'log_return2_realized_volatility', 'log_return1_realized_volatility_400', 'log_return2_realized_volatility_400', 
                'log_return1_realized_volatility_300', 'log_return2_realized_volatility_300', 'log_return1_realized_volatility_200', 'log_return2_realized_volatility_200', 
                'trade_log_return_realized_volatility', 'trade_log_return_realized_volatility_400', 'trade_log_return_realized_volatility_300', 'trade_log_return_realized_volatility_200', 
                'log_return1_max_log_return', 'log_return2_max_log_return', 'log_return1_max_log_return_400', 'log_return2_max_log_return_400', 
                'log_return1_max_log_return_300', 'log_return2_max_log_return_300', 'log_return1_max_log_return_200', 'log_return2_max_log_return_200', 
                'trade_log_return_max_log_return', 'trade_log_return_max_log_return_400', 'trade_log_return_max_log_return_300', 'trade_log_return_max_log_return_200']


    # Group by the stock id
    df_stock_id = df.groupby(['stock_id'])[vol_cols].agg(['mean', 'std', skew]).reset_index()
    # Rename columns joining suffix
    df_stock_id.columns = ['_'.join(col) for col in df_stock_id.columns]
    df_stock_id = df_stock_id.add_suffix('_' + 'stock')

    # Group by the stock id
    df_time_id = df.groupby(['time_id'])[vol_cols].agg(['mean', 'std', skew]).reset_index()
    # Rename columns joining suffix
    df_time_id.columns = ['_'.join(col) for col in df_time_id.columns]
    df_time_id = df_time_id.add_suffix('_' + 'time')
    
    # Merge with original dataframe
    df = df.merge(df_stock_id, how = 'left', left_on = ['stock_id'], right_on = ['stock_id__stock'])
    df = df.merge(df_time_id, how = 'left', left_on = ['time_id'], right_on = ['time_id__time'])
    df.drop(['stock_id__stock', 'time_id__time'], axis = 1, inplace = True)
    return df
# Funtion to make preprocessing function in parallel (for each stock id)
def preprocessor(list_stock_ids, is_train = True):
    
    # Parrallel for loop
    def for_joblib(stock_id):
        # Train
        if is_train:
            file_path_book = data_dir + "book_train.parquet/stock_id=" + str(stock_id)
            file_path_trade = data_dir + "trade_train.parquet/stock_id=" + str(stock_id)
        # Test
        else:
            file_path_book = data_dir + "book_test.parquet/stock_id=" + str(stock_id)
            file_path_trade = data_dir + "trade_test.parquet/stock_id=" + str(stock_id)
    
        # Preprocess book and trade data and merge them
        df_tmp = pd.merge(book_preprocessor(file_path_book), trade_preprocessor(file_path_trade), on = 'row_id', how = 'left')
        
        # Return the merge dataframe
        return df_tmp
    
    # Use parallel api to call paralle for loop
    df = Parallel(n_jobs = -1, verbose = 1)(delayed(for_joblib)(stock_id) for stock_id in list_stock_ids)
    # Concatenate all the dataframes that return from Parallel
    df = pd.concat(df, ignore_index = True)
    return df

In [None]:
# Read train and test
train_df, test_df = read_train_test()

if os.path.isfile('../input/optiver-final-dataset/train_df.pkl'):
    train_df =  pd.read_pickle('../input/optiver-final-dataset/train_df.pkl')
else:
    train_stock_ids = train_df['stock_id'].unique()
    train_ = preprocessor(train_stock_ids, is_train = True)
    train_df = train_df.merge(train_, on = ['row_id'], how = 'left')
    train_df = get_time_stock(train_df)
    
    del train_stock_ids, train_
    gc.collect()
    
    train_df.to_pickle("train_df.pkl")

test_stock_ids = test_df['stock_id'].unique()
test_ = preprocessor(test_stock_ids, is_train = False)
test_df = test_df.merge(test_, on = ['row_id'], how = 'left')
test_df = get_time_stock(test_df)

del test_stock_ids, test_
gc.collect()

In [None]:
# replace by order sum (tau)
train_df['size_tau'] = np.sqrt( 1/ train_df['trade_seconds_in_bucket_count_unique'] )
test_df['size_tau'] = np.sqrt( 1/ test_df['trade_seconds_in_bucket_count_unique'] )
train_df['size_tau_400'] = np.sqrt( 1/ train_df['trade_seconds_in_bucket_count_unique_400'] )
test_df['size_tau_400'] = np.sqrt( 1/ test_df['trade_seconds_in_bucket_count_unique_400'] )
train_df['size_tau_300'] = np.sqrt( 1/ train_df['trade_seconds_in_bucket_count_unique_300'] )
test_df['size_tau_300'] = np.sqrt( 1/ test_df['trade_seconds_in_bucket_count_unique_300'] )
train_df['size_tau_200'] = np.sqrt( 1/ train_df['trade_seconds_in_bucket_count_unique_200'] )
test_df['size_tau_200'] = np.sqrt( 1/ test_df['trade_seconds_in_bucket_count_unique_200'] )

train_df['size_tau2'] = np.sqrt( 1/ train_df['trade_order_count_sum'] )
test_df['size_tau2'] = np.sqrt( 1/ test_df['trade_order_count_sum'] )
train_df['size_tau2_400'] = np.sqrt( 0.33/ train_df['trade_order_count_sum'] )
test_df['size_tau2_400'] = np.sqrt( 0.33/ test_df['trade_order_count_sum'] )
train_df['size_tau2_300'] = np.sqrt( 0.5/ train_df['trade_order_count_sum'] )
test_df['size_tau2_300'] = np.sqrt( 0.5/ test_df['trade_order_count_sum'] )
train_df['size_tau2_200'] = np.sqrt( 0.66/ train_df['trade_order_count_sum'] )
test_df['size_tau2_200'] = np.sqrt( 0.66/ test_df['trade_order_count_sum'] )

train_df['size_tau2_d'] = train_df['size_tau2_400'] - train_df['size_tau2']
test_df['size_tau2_d'] = test_df['size_tau2_400'] - test_df['size_tau2']

# Clustered Features

In [None]:
def create_agg_features(train, test):

    # Making agg features

    l = [[1, 11, 22, 50, 55, 56, 62, 73, 76, 78, 84, 87, 96, 101, 112, 116, 122, 124, 126], 
         [0, 4, 5, 10, 15, 16, 17, 23, 26, 28, 29, 36, 42, 44, 48, 53, 66, 69, 72, 85, 94, 95, 100, 102, 109, 111, 113, 115, 118, 120], [3, 6, 9, 18, 61, 63, 86, 97], 
         [27, 31, 33, 37, 38, 40, 58, 59, 60, 74, 75, 77, 82, 83, 88, 89, 90, 98, 99, 110], 
         [2, 7, 13, 14, 19, 20, 21, 30, 32, 34, 35, 39, 41, 43, 46, 47, 51, 52, 64, 67, 68, 70, 93, 103, 104, 105, 107, 108, 114, 119, 123, 125], [81], [8, 80]]
    
    mat = []
    matTest = []
    n = 0
    for ind in l:
        newDf = train.loc[train['stock_id'].isin(ind) ]
        newDf = newDf.groupby(['time_id']).agg(np.nanmean)
        newDf.loc[:,'stock_id'] = str(n)+'c1'
        mat.append ( newDf )
        newDf = test.loc[test['stock_id'].isin(ind) ]    
        newDf = newDf.groupby(['time_id']).agg(np.nanmean)
        newDf.loc[:,'stock_id'] = str(n)+'c1'
        matTest.append ( newDf )
        n+=1

    mat1 = pd.concat(mat).reset_index()
    mat1.drop(columns=['target'],inplace=True)
    mat2 = pd.concat(matTest).reset_index()
    
    mat2 = pd.concat([mat2,mat1.loc[mat1.time_id==5]])
    
    mat1 = mat1.pivot(index='time_id', columns='stock_id')
    mat1.columns = ["_".join(x) for x in mat1.columns.ravel()]
    mat1.reset_index(inplace=True)
    
    mat2 = mat2.pivot(index='time_id', columns='stock_id')
    mat2.columns = ["_".join(x) for x in mat2.columns.ravel()]
    mat2.reset_index(inplace=True)
    
    prefix = ['log_return1_realized_volatility', 'log_return2_realized_volatility', 'total_volume_sum', 'trade_size_sum', 'trade_order_count_sum',
              'price_spread_mean', 'volume_imbalance_sum','size_tau2', 'trade_amount_sum', 'trade_log_return_realized_volatility', 
             'log_return1_max_log_return', 'log_return2_max_log_return', 'trade_log_max_log_return']
    
    selected_cols=mat1.filter(regex='|'.join(f'^{x}.(0|1|3|4|6)c1' for x in prefix)).columns.tolist()
    selected_cols.append('time_id')
    
    train_m = pd.merge(train,mat1[selected_cols],how='left',on='time_id')
    test_m = pd.merge(test,mat2[selected_cols],how='left',on='time_id')
    
    # filling missing values with train means

#     features = [col for col in train_m.columns.tolist() if col not in ['time_id','target','row_id']]
#     train_m[features] = train_m[features].fillna(train_m[features].mean())
#     test_m[features] = test_m[features].fillna(train_m[features].mean())

    return train_m, test_m

In [None]:
# Aggregating some features
train_df, test_df = create_agg_features(train_df,test_df)

# Nearest Neighbor based Features

In [None]:
vol_cols = ['log_return1_max_log_return', 'log_return1_realized_volatility', 'log_return2_max_log_return', 
'log_return2_realized_volatility', 'price_spread_mean', 'total_volume_sum', 'volume_imbalance_sum', 
'log_return1_max_log_return_500', 'log_return1_realized_volatility_500', 'log_return2_max_log_return_500', 
'log_return2_realized_volatility_500', 'log_return1_max_log_return_400', 'log_return1_realized_volatility_400',
 'log_return2_max_log_return_400', 'log_return2_realized_volatility_400', 'log_return1_max_log_return_300', 
 'log_return1_realized_volatility_300', 'log_return2_max_log_return_300', 'log_return2_realized_volatility_300', 
 'log_return1_max_log_return_200', 'log_return1_realized_volatility_200', 'log_return2_max_log_return_200', 
 'log_return2_realized_volatility_200', 'log_return1_max_log_return_100', 'log_return1_realized_volatility_100', 
 'log_return2_max_log_return_100', 'log_return2_realized_volatility_100', 'trade_log_return_realized_volatility', 
 'trade_size_sum', 'trade_order_count_sum', 'trade_amount_sum', 'trade_log_return_realized_volatility_500', 
 'trade_size_sum_500', 'trade_order_count_sum_500', 'trade_log_return_realized_volatility_400', 'trade_size_sum_400',
 'trade_order_count_sum_400', 'trade_log_return_realized_volatility_300', 'trade_size_sum_300', 'trade_order_count_sum_300',
 'trade_log_return_realized_volatility_200', 'trade_size_sum_200', 'trade_order_count_sum_200',
 'trade_log_return_realized_volatility_100', 'trade_size_sum_100', 'trade_order_count_sum_100']

#Nearest Neighbor based Feature Generation
print('Before nearest neighbor:', train_df.shape)

if os.path.isfile('../input/optiver-utils-files/neg_idx.pkl'):
    with open('../input/optiver-utils-files/neg_idx.pkl', 'rb') as fin:
        neg_idx = pickle.load(fin)
    with open('../input/optiver-utils-files/pos_idx.pkl', 'rb') as fin:
        pos_idx = pickle.load(fin)
else:
    train_p = pd.read_csv('../input/optiver-realized-volatility-prediction/train.csv')
    train_p = train_p.pivot(index='stock_id', columns='time_id', values='target')
    train_p = train_p.fillna(0)

    n=15
    model = NearestNeighbors(n_neighbors = 112, metric='cosine')
    model.fit(train_p)
    distances, indices = model.kneighbors(train_p)

    for i, col in enumerate(train_p.index):
        indices[i, :] = (train_p.index[indices[i, :]].values)

    pos_idx = {}
    neg_idx = {}

    for i, col in enumerate(train_p.index):
        pos_idx[col] = list(indices[i][1:n])
        neg_idx[col] = list(indices[i][-n:])

    with open('pos_idx.pkl', 'wb') as handle:
        pickle.dump(pos_idx, handle, protocol=pickle.HIGHEST_PROTOCOL)

    with open('neg_idx.pkl', 'wb') as handle:
        pickle.dump(neg_idx, handle, protocol=pickle.HIGHEST_PROTOCOL)

    del train_p, indices, distances, model
    gc.collect()

mat = []
mat_test = []

for stock in train_df['stock_id'].unique():
    ind = pos_idx[stock]
    newDf = train_df.loc[train_df['stock_id'].isin(ind)]
    newDf = newDf[vol_cols+['time_id']].groupby(['time_id']).agg(np.nanmean)
    newDf.columns = [x + "_pos" for x in newDf.columns]
    newDf.loc[:, 'stock_id'] = stock
    mat.append(newDf)
    
    newDf = test_df.loc[test_df['stock_id'].isin(ind)]
    newDf = newDf[vol_cols+['time_id']].groupby(['time_id']).agg(np.nanmean)
    newDf.columns = [x + "_pos" for x in newDf.columns]
    newDf['stock_id'] = stock
    mat_test.append(newDf)


mat = pd.concat(mat).reset_index()
mat_test = pd.concat(mat_test).reset_index()

train_df = train_df.merge(mat, how='left', on=['time_id', "stock_id"])
test_df = test_df.merge(mat_test, how='left', on=['time_id', "stock_id"])

mat = []
mat_test = []

for stock in train_df['stock_id'].unique():
    ind = neg_idx[stock]
    newDf = train_df.loc[train_df['stock_id'].isin(ind)]
    newDf = newDf[vol_cols+['time_id']].groupby(['time_id']).agg(np.nanmean)
    newDf.columns = [x + "_neg" for x in newDf.columns]
    newDf.loc[:, 'stock_id'] = stock
    mat.append(newDf)
    
    newDf = test_df.loc[test_df['stock_id'].isin(ind)]
    newDf = newDf[vol_cols+['time_id']].groupby(['time_id']).agg(np.nanmean)
    newDf.columns = [x + "_neg" for x in newDf.columns]
    newDf['stock_id'] = stock
    mat_test.append(newDf)

mat = pd.concat(mat).reset_index()
mat_test = pd.concat(mat_test).reset_index()

train_df = train_df.merge(mat, how='left', on=['time_id', "stock_id"])
test_df = test_df.merge(mat_test, how='left', on=['time_id', "stock_id"])

del mat, mat_test
gc.collect()

print('After nearest neighbor:', train_df.shape)

# Predicting LGBM and CatBoost

In [None]:
cat_cols = ['stock_id']

# cont_cols = list(train_df)
# cont_cols.remove('time_id')
# cont_cols.remove('target')
# cont_cols.remove('row_id')

features = pd.read_csv('../input/optiver-feature-importance-based-lgb/feature_importance.csv')
cont_cols = features.loc[features['Value']>=100, 'Feature'].to_list()

cont_cols = [f for f in cont_cols if f not in cat_cols]

In [None]:
# Function to calculate the root mean squared percentage error
def rmspe(y_true, y_pred):
    return np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))

# Function to early stop with root mean squared percentage error
def feval_rmspe(y_pred, lgb_train):
    y_true = lgb_train.get_label()
    return 'RMSPE', rmspe(y_true, y_pred), False

In [None]:
class feval_rmspe:
    def is_max_optimal(self):
        return False

    def evaluate(self, approxes, target, weight):
        assert len(approxes) == 1
        assert len(target) == len(approxes[0])

        approx = approxes[0]

        y_pred = np.array(approx)
        y_true = np.array(target)

        output_weight = 1  # weight is not used

        score = rmspe(y_true, y_pred)

        return score, output_weight

    def get_final_error(self, error, weight):
        return error

In [None]:
import pickle

model_path_lgb = '../input/d/lhagiimn/optiver-fastai-and-keras-models'
model_path_cat = '../input/optiver-catboost-models'
pred_lgb0 = np.zeros(test_df.shape[0])
pred_lgb1 = np.zeros(test_df.shape[0])
pred_cat0 = np.zeros(test_df.shape[0])
Nfolds = 8
    
for fold in range(Nfolds):
    with open(f'{model_path_lgb}/model_lgb_0_{fold}.pkl', 'rb') as fin:
        model_lgb = pickle.load(fin)
            
    pred_lgb0 += model_lgb.predict(test_df[cat_cols+cont_cols])/Nfolds
    
    del model_lgb
    gc.collect()
    
    with open(f'{model_path_lgb}/model_lgb_1_{fold}.pkl', 'rb') as fin:
        model_lgb = pickle.load(fin)
            
    pred_lgb1 += model_lgb.predict(test_df[cat_cols+cont_cols])/Nfolds
    
    del model_lgb
    gc.collect()
    
    with open(f'{model_path_cat}/model_cat_{fold}.pkl', 'rb') as cat:
            model_cat = pickle.load(cat)
    
    pred_cat0 += model_cat.predict(test_df[cat_cols+cont_cols])/Nfolds
    
    del model_cat
    gc.collect()

# Predicting FastAI nn

In [None]:
from fastai.tabular.all import *

Nfolds=8
def pred_tabular_nn(train_df, test_df, model_path, dummy=False):
    train_df = train_df.fillna(0)
    train_df.stock_id = train_df.stock_id.astype('category')
    if dummy==True:
        train_df.dummy = train_df.dummy.astype('category')
    cont_nn,cat_nn = cont_cat_split(train_df,  dep_var='target')
    res = torch.zeros(len(test_df))
    
    dls = TabularPandas(train_df, [Categorify, Normalize], cat_nn, cont_nn, y_names='target').dataloaders(1024)
    learn = tabular_learner(dls, y_range=(0,.1), layers=[1024, 512, 256], n_out=1, path = model_path)

    for fold in range(Nfolds): 
        print(f'Fold-{fold}')
        learn.load(f'nn_model_{fold}')
        
        test_dl = dls.test_dl(test_df.fillna(0))
        preds, _ = learn.get_preds(dl=test_dl)
        res += preds.squeeze() / Nfolds
        
        del test_dl, preds, _
        gc.collect()
    
    del dls, learn
    gc.collect()
    
    return res.numpy()

In [None]:
cont_cols = list(train_df)
cont_cols.remove('time_id')
cont_cols.remove('target')
cont_cols.remove('row_id')

cont_cols = [f for f in cont_cols if f not in cat_cols]
pred_nn0 = pred_tabular_nn(train_df[cat_cols + cont_cols +['target']], test_df[cat_cols + cont_cols], 
                                    model_path='../input/d/lhagiimn/optiver-fastai-and-tabnet-models/', dummy=False)

In [None]:
gc.collect()

# Tabnet Model Training

In [None]:
from pytorch_tabnet.metrics import Metric
import torch
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts

def rmspe(y_true, y_pred):
    # Function to calculate the root mean squared percentage error
    return np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))

class RMSPE(Metric):
    def __init__(self):
        self._name = "rmspe"
        self._maximize = False

    def __call__(self, y_true, y_score):
        
        return np.sqrt(np.mean(np.square((y_true - y_score) / y_true)))
    


def RMSPELoss(y_pred, y_true):
    return torch.sqrt(torch.mean( ((y_true - y_pred) / y_true) ** 2 )).clone()

In [None]:
def Tabnet_pred(train_df, test_df):
    
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    train_df = train_df.fillna(0)
    test_df = test_df.fillna(0)
    
    nunique = train_df[cont_cols+cat_cols].nunique()
    types = train_df[cont_cols+cat_cols].dtypes

    categorical_columns = []
    categorical_dims =  {}

    for col in cont_cols+cat_cols:
        if  col == 'stock_id':
            l_enc = LabelEncoder()
            train_df[col] = l_enc.fit_transform(train_df[col].values)
            test_df[col] = l_enc.transform(test_df[col].values)
            categorical_columns.append(col)
            categorical_dims[col] = len(l_enc.classes_)
        else:
            scaler = StandardScaler()
            train_df[col] = scaler.fit_transform(train_df[col].values.reshape(-1, 1))
            test_df[col] = scaler.transform(test_df[col].values.reshape(-1, 1))


    cat_idxs = [ i for i, f in enumerate(train_df[cont_cols+cat_cols].columns.tolist()) if f in categorical_columns]
    cat_dims = [ categorical_dims[f] for i, f in enumerate(train_df[cont_cols+cat_cols].columns.tolist()) if f in categorical_columns]
    
    tabnet_params = dict(
                    cat_idxs=cat_idxs,
                    cat_dims=cat_dims,
                    cat_emb_dim=1,
                    n_d = 16,
                    n_a = 16,
                    n_steps = 2,
                    gamma = 2,
                    n_independent = 2,
                    n_shared = 2,
                    lambda_sparse = 0,
                    optimizer_fn = Adam,
                    optimizer_params = dict(lr = (2e-2)),
                    mask_type = "entmax",
                    scheduler_params = dict(T_0=100, T_mult=1, eta_min=1e-4, last_epoch=-1, verbose=False),
                    scheduler_fn = CosineAnnealingWarmRestarts,
                    seed = 42,
                    verbose = 5, 
                    device_name = DEVICE)
    
    kfold = KFold(n_splits = 5, random_state = 42, shuffle = False)
    # Create out of folds array
    oof_predictions = np.zeros((train_df.shape[0], 1))
    test_predictions = np.zeros(test_df.shape[0])
    
    for fold, (trn_ind, val_ind) in enumerate(kfold.split(train_df.index)):
        print(f'Training fold {fold + 1}')
        X_train, X_val = train_df.loc[trn_ind, cont_cols+cat_cols].values, train_df.loc[val_ind, cont_cols+cat_cols].values
        y_train, y_val = train_df.loc[trn_ind, 'target'].values.reshape(-1,1), train_df.loc[val_ind, 'target'].values.reshape(-1,1)


        clf =  TabNetRegressor(**tabnet_params)
        clf.fit(
          X_train, y_train,
          eval_set=[(X_val, y_val)],
          max_epochs = 100,
          patience = 20,
          batch_size = 1024*5, 
          virtual_batch_size = 128*5,
          num_workers = 4,
          drop_last = False,
          eval_metric=[RMSPE],
          loss_fn=RMSPELoss
          )

        saving_path_name = f"./fold{fold}"
        saved_filepath = clf.save_model(saving_path_name)
        
        oof_predictions[val_ind] = clf.predict(X_val)
        test_predictions+=clf.predict(test_df[cont_cols+cat_cols].values).flatten()/5
    
    #print(f'OOF score across folds: {rmspe(train_df['target'].values, oof_predictions)}')
    
    return test_predictions

In [None]:
tab_pred = Tabnet_pred(train_df, test_df)
gc.collect()

# Predicting Keras

In [None]:
from tensorflow import keras
from keras import backend as K

def root_mean_squared_per_error(y_true, y_pred):
         return K.sqrt(K.mean(K.square( (y_true - y_pred)/ y_true )))
    
es = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=50, verbose=0,
    mode='min',restore_best_weights=True)

plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.2, patience=10, verbose=0,
    mode='min')


out_train = pd.read_csv('../input/optiver-realized-volatility-prediction/train.csv')
out_train = out_train.pivot(index='time_id', columns='stock_id', values='target')

#out_train[out_train.isna().any(axis=1)]
out_train = out_train.fillna(out_train.mean())
out_train.head()


# data separation based on knn ++
nfolds = 5
index = []
totDist = []
values = []
# generates a matriz with the values of 
mat = out_train.values

scaler = MinMaxScaler(feature_range=(-1, 1))
mat = scaler.fit_transform(mat)

nind = int(mat.shape[0]/nfolds) # number of individuals

# adds index in the last column
mat = np.c_[mat,np.arange(mat.shape[0])]


lineNumber = np.random.choice(np.array(mat.shape[0]), size=nfolds, replace=False)

lineNumber = np.sort(lineNumber)[::-1]

for n in range(nfolds):
    totDist.append(np.zeros(mat.shape[0]-nfolds))

# saves index
for n in range(nfolds):
    values.append([lineNumber[n]])    


s=[]
for n in range(nfolds):
    s.append(mat[lineNumber[n],:])
    
    mat = np.delete(mat, obj=lineNumber[n], axis=0)

for n in range(nind-1):    

    luck = np.random.uniform(0,1,nfolds)
    
    for cycle in range(nfolds):
         # saves the values of index           

        s[cycle] = np.matlib.repmat(s[cycle], mat.shape[0], 1)

        sumDist = np.sum( (mat[:,:-1] - s[cycle][:,:-1])**2 , axis=1)   
        totDist[cycle] += sumDist        
                
        # probabilities
        f = totDist[cycle]/np.sum(totDist[cycle]) # normalizing the totdist
        j = 0
        kn = 0
        for val in f:
            j += val        
            if (j > luck[cycle]): # the column was selected
                break
            kn +=1
        lineNumber[cycle] = kn
        
        # delete line of the value added    
        for n_iter in range(nfolds):
            
            totDist[n_iter] = np.delete(totDist[n_iter],obj=lineNumber[cycle], axis=0)
            j= 0
        
        s[cycle] = mat[lineNumber[cycle],:]
        values[cycle].append(int(mat[lineNumber[cycle],-1]))
        mat = np.delete(mat, obj=lineNumber[cycle], axis=0)


for n_mod in range(nfolds):
    values[n_mod] = out_train.index[values[n_mod]]

In [None]:
with open('../input/d/lhagiimn/optiver-fastai-and-keras-models/colNames.pkl', 'rb') as fin:
    colNames = pickle.load(fin)
    
colNames = [f for f in colNames if f in list(train_df)]

In [None]:
#colNames.remove('row_id')
train_df.replace([np.inf, -np.inf], np.nan,inplace=True)
test_df.replace([np.inf, -np.inf], np.nan,inplace=True)

train_df=train_df[['stock_id','time_id', 'row_id', 'target'] + colNames]
test_df=test_df[['stock_id','time_id', 'row_id'] + colNames]
gc.collect()

for col in colNames:
    qt = QuantileTransformer(random_state=21,n_quantiles=2000, output_distribution='normal')
    train_df[col] = qt.fit_transform(train_df[[col]])
    test_df[col] = qt.transform(test_df[[col]])    

gc.collect()

In [None]:
# Aggregating some features
train_df,test_df = create_agg_features(train_df, test_df)

In [None]:
vol_cols = ['log_return1_max_log_return', 'log_return1_realized_volatility', 'log_return2_max_log_return', 
'log_return2_realized_volatility', 'price_spread_mean', 'total_volume_sum', 'volume_imbalance_sum', 
'log_return1_max_log_return_500', 'log_return1_realized_volatility_500', 'log_return2_max_log_return_500', 
'log_return2_realized_volatility_500', 'log_return1_max_log_return_400', 'log_return1_realized_volatility_400',
 'log_return2_max_log_return_400', 'log_return2_realized_volatility_400', 'log_return1_max_log_return_300', 
 'log_return1_realized_volatility_300', 'log_return2_max_log_return_300', 'log_return2_realized_volatility_300', 
 'log_return1_max_log_return_200', 'log_return1_realized_volatility_200', 'log_return2_max_log_return_200', 
 'log_return2_realized_volatility_200', 'log_return1_max_log_return_100', 'log_return1_realized_volatility_100', 
 'log_return2_max_log_return_100', 'log_return2_realized_volatility_100', 'trade_log_return_realized_volatility', 
 'trade_size_sum', 'trade_order_count_sum', 'trade_amount_sum', 'trade_log_return_realized_volatility_500', 
 'trade_size_sum_500', 'trade_order_count_sum_500', 'trade_log_return_realized_volatility_400', 'trade_size_sum_400',
 'trade_order_count_sum_400', 'trade_log_return_realized_volatility_300', 'trade_size_sum_300', 'trade_order_count_sum_300',
 'trade_log_return_realized_volatility_200', 'trade_size_sum_200', 'trade_order_count_sum_200',
 'trade_log_return_realized_volatility_100', 'trade_size_sum_100', 'trade_order_count_sum_100']

#Nearest Neighbor based Feature Generation
print('Before nearest neighbor:', train_df.shape)

if os.path.isfile('../input/optiver-utils-files/neg_idx.pkl'):
    with open('../input/optiver-utils-files/neg_idx.pkl', 'rb') as fin:
        neg_idx = pickle.load(fin)
    with open('../input/optiver-utils-files/pos_idx.pkl', 'rb') as fin:
        pos_idx = pickle.load(fin)
else:
    train_p = pd.read_csv('../input/optiver-realized-volatility-prediction/train.csv')
    train_p = train_p.pivot(index='stock_id', columns='time_id', values='target')
    train_p = train_p.fillna(0)

    n=15
    model = NearestNeighbors(n_neighbors = 112, metric='cosine')
    model.fit(train_p)
    distances, indices = model.kneighbors(train_p)

    for i, col in enumerate(train_p.index):
        indices[i, :] = (train_p.index[indices[i, :]].values)

    pos_idx = {}
    neg_idx = {}

    for i, col in enumerate(train_p.index):
        pos_idx[col] = list(indices[i][1:n])
        neg_idx[col] = list(indices[i][-n:])

    with open('pos_idx.pkl', 'wb') as handle:
        pickle.dump(pos_idx, handle, protocol=pickle.HIGHEST_PROTOCOL)

    with open('neg_idx.pkl', 'wb') as handle:
        pickle.dump(neg_idx, handle, protocol=pickle.HIGHEST_PROTOCOL)

    del train_p, indices, distances, model
    gc.collect()

mat = []
mat_test = []

for stock in train_df['stock_id'].unique():
    ind = pos_idx[stock]
    newDf = train_df.loc[train_df['stock_id'].isin(ind)]
    newDf = newDf[vol_cols+['time_id']].groupby(['time_id']).agg(np.nanmean)
    newDf.columns = [x + "_pos" for x in newDf.columns]
    newDf.loc[:, 'stock_id'] = stock
    mat.append(newDf)

    newDf = test_df.loc[test_df['stock_id'].isin(ind)]
    newDf = newDf[vol_cols+['time_id']].groupby(['time_id']).agg(np.nanmean)
    newDf.columns = [x + "_pos" for x in newDf.columns]
    newDf['stock_id'] = stock
    mat_test.append(newDf)


mat = pd.concat(mat).reset_index()
mat_test = pd.concat(mat_test).reset_index()

train_df = train_df.merge(mat, how='left', on=['time_id', "stock_id"])
test_df = test_df.merge(mat_test, how='left', on=['time_id', "stock_id"])

mat = []
mat_test = []

for stock in train_df['stock_id'].unique():
    ind = neg_idx[stock]
    newDf = train_df.loc[train_df['stock_id'].isin(ind)]
    newDf = newDf[vol_cols+['time_id']].groupby(['time_id']).agg(np.nanmean)
    newDf.columns = [x + "_neg" for x in newDf.columns]
    newDf.loc[:, 'stock_id'] = stock
    mat.append(newDf)

    newDf = test_df.loc[test_df['stock_id'].isin(ind)]
    newDf = newDf[vol_cols+['time_id']].groupby(['time_id']).agg(np.nanmean)
    newDf.columns = [x + "_neg" for x in newDf.columns]
    newDf['stock_id'] = stock
    mat_test.append(newDf)

mat = pd.concat(mat).reset_index()
mat_test = pd.concat(mat_test).reset_index()

train_df = train_df.merge(mat, how='left', on=['time_id', "stock_id"])
test_df = test_df.merge(mat_test, how='left', on=['time_id', "stock_id"])

del mat, mat_test
gc.collect()

print('After nearest neighbor:', train_df.shape)

In [None]:
cont_cols = list(train_df)
cont_cols.remove('time_id')
cont_cols.remove('target')
cont_cols.remove('row_id')

cont_cols = [f for f in cont_cols if f not in cat_cols]

len(cont_cols)

In [None]:
from keras.backend import sigmoid
from keras import backend as K

def swish(x, beta = 1):
    return (x * sigmoid(beta * x))

def mish(x, beta = 1):
    return (x * K.tanh(K.softplus(x)))

from keras.utils.generic_utils import get_custom_objects
from keras.layers import Activation
get_custom_objects().update({'swish': Activation(swish)})
get_custom_objects().update({'mish': Activation(mish)})

hidden_units = (256, 128, 64)
stock_embedding_size = 24

cat_data = train_df['stock_id']

def base_model():
    
    # Each instance will consist of two inputs: a single user id, and a single movie id
    stock_id_input = keras.Input(shape=(1,), name='stock_id')
    num_input = keras.Input(shape=(len(cont_cols),), name='num_data')


    #embedding, flatenning and concatenating
    stock_embedded = keras.layers.Embedding(max(cat_data)+1, stock_embedding_size, 
                                           input_length=1, name='stock_embedding')(stock_id_input)
    stock_flattened = keras.layers.Flatten()(stock_embedded)
    out = keras.layers.Concatenate()([stock_flattened, num_input])
    
    # Add one or more hidden layers
    for n_hidden in hidden_units:

        out = keras.layers.Dense(n_hidden, activation='swish')(out)
        

    #out = keras.layers.Concatenate()([out, num_input])

    # A single output: our predicted rating
    out = keras.layers.Dense(1, activation='linear', name='prediction')(out)
    
    model = keras.Model(
    inputs = [stock_id_input, num_input],
    outputs = out,
    )
    
    return model

In [None]:
# Function to calculate the root mean squared percentage error
def rmspe(y_true, y_pred):
    return np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))

In [None]:

target_name='target'
scores_folds = {}
model_name = 'NN'
pred_name = 'pred_{}'.format(model_name)

n_folds = 5
kf = model_selection.KFold(n_splits=n_folds, shuffle=True, random_state=2020)
scores_folds[model_name] = []
counter = 1

features_to_consider = cont_cols + ['stock_id']


train_df[features_to_consider] = train_df[features_to_consider].fillna(train_df[features_to_consider].mean()).fillna(0)
test_df[features_to_consider] = test_df[features_to_consider].fillna(train_df[features_to_consider].mean()).fillna(0)

train_df[pred_name] = 0
test_df[target_name] = 0
test_predictions_nn = np.zeros(test_df.shape[0])

for n_count in range(n_folds):
    print('CV {}/{}'.format(counter, n_folds))
    
    indexes = np.arange(nfolds).astype(int)    
    indexes = np.delete(indexes,obj=n_count, axis=0) 
    
    indexes = np.r_[values[indexes[0]],values[indexes[1]],values[indexes[2]],values[indexes[3]]]
    
    X_train = train_df.loc[train_df.time_id.isin(indexes), features_to_consider]
    y_train = train_df.loc[train_df.time_id.isin(indexes), target_name]
    X_test = train_df.loc[train_df.time_id.isin(values[n_count]), features_to_consider]
    y_test = train_df.loc[train_df.time_id.isin(values[n_count]), target_name]
    
    #############################################################################################
    # NN
    #############################################################################################
    
    model = base_model()
    
    model.compile(
        keras.optimizers.Adam(learning_rate=0.005),
        loss=root_mean_squared_per_error
    )
    
    try:
        features_to_consider.remove('stock_id')
    except:
        pass
    
    num_data = X_train[features_to_consider]
    
    scaler = MinMaxScaler(feature_range=(-1, 1))         
    num_data = scaler.fit_transform(num_data.values)    
    
    cat_data = X_train['stock_id']    
    target =  y_train
    
    num_data_test = X_test[features_to_consider]
    num_data_test = scaler.transform(num_data_test.values)
    cat_data_test = X_test['stock_id']
    
    if os.path.isfile(f'../input/optiver-keras-models/model_nn_{counter}.h5'):
        model =  keras.models.load_model(f'../input/optiver-keras-models/model_nn_{counter}.h5', 
                                         custom_objects={'swish': swish, 'Activation': Activation, 
                                                         'root_mean_squared_per_error':root_mean_squared_per_error})
        
    else:

        model.fit([cat_data.values, num_data], 
                  target.values,               
                  batch_size=2048,
                  epochs=1000,
                  validation_data=([cat_data_test.values, num_data_test], y_test.values),
                  callbacks=[es, plateau],
                  validation_batch_size=len(y_test),
                  shuffle=True, verbose = 1)

        model.save(f"model_nn_{counter}.h5")

    preds = model.predict([cat_data_test.values, num_data_test]).reshape(1,-1)[0]
    
    score = round(rmspe(y_true = y_test, y_pred = preds),5)
    print('Fold {} {}: {}'.format(counter, model_name, score))
    scores_folds[model_name].append(score)
    
    tt =scaler.transform(test_df[features_to_consider].values)
    test_predictions_nn += model.predict([test_df['stock_id'], tt]).reshape(1,-1)[0]/n_folds
  
    counter += 1
    features_to_consider.append('stock_id')
    

In [None]:
test_predictions_nn

# Submission

In [None]:
weights = [0.30, 0.05, 0.05, 0.10, 0.15, 0.35]
preds = [pred_lgb0, pred_lgb1, pred_cat0, pred_nn0, tab_pred, test_predictions_nn]

pred = np.zeros(test_df.shape[0])
for w, p in zip(weights, preds):
    pred +=w*p

test_df['target'] = pred

group1 = [1, 2, 10, 13, 14, 15, 17, 20, 22, 23, 26, 29, 32, 34, 35, 36, 39, 41, 43, 44, 46, 47, 48, 50, 51, 52,
          53, 56, 59, 61, 62, 63, 64, 67, 68, 69, 70, 73, 76, 77, 84, 87, 93, 95, 96, 100, 101, 104, 105, 107, 108,
          109, 111, 113, 114, 119, 120, 122, 123, 124, 125]

group2 = [3, 6, 7, 8, 11, 19, 21, 28, 38, 42, 55, 66, 72, 74, 78, 82, 85, 86, 94, 99, 102, 115, 116, 118, 126]
group3 = [0, 4, 5, 9, 16, 30, 40, 58, 75, 83, 89, 90, 97, 98, 103, 112, 18, 27, 33, 37, 60, 80, 81, 88, 110]
group4 = [31]

# test_df.loc[test_df.stock_id.isin(group1), 'target'] = test_df.loc[test_df.stock_id.isin(group1), 'target'].values * 1.00
# test_df.loc[test_df.stock_id.isin(group2), 'target'] = test_df.loc[test_df.stock_id.isin(group2), 'target'].values * 0.995
# test_df.loc[test_df.stock_id.isin(group3), 'target'] = test_df.loc[test_df.stock_id.isin(group3), 'target'].values * 0.99
# test_df.loc[test_df.stock_id.isin(group4), 'target'] = test_df.loc[test_df.stock_id.isin(group4), 'target'].values * 0.775

test_df[['row_id', 'target']].to_csv('submission.csv',index = False)

In [None]:
test_df[['row_id', 'target']].head()