In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
PATH = '/kaggle/input/submission-03'

In [None]:
import numpy as np
import pandas as pd

import os
from collections import OrderedDict, Counter
import itertools
import pickle

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion
from sklearn.preprocessing import FunctionTransformer, StandardScaler, PowerTransformer, MinMaxScaler, OneHotEncoder, PolynomialFeatures
from sklearn.impute import SimpleImputer, KNNImputer

COLUMNS = {
    'target': ['target'],
    'time': ['seconds_in_bucket'],
    'cat': ['imbalance_buy_sell_flag'],
    'num': ['imbalance_size', 'matched_size', 'reference_price', 'far_price', 'near_price', 'bid_price', 'bid_size', 'ask_price', 'ask_size', 'wap'],
    'aug': ['ratio_imbalance_size_matched_size', 'ratio_far_price_reference_price', 'ratio_near_price_reference_price', 'ratio_bid_price_ask_price', 'ratio_bid_size_ask_size'],
}


def feature_augmentation(df):
    df['ratio_imbalance_size_matched_size'] = df['imbalance_size'] / (df['matched_size'] + 1)
    df['ratio_far_price_reference_price'] = df['far_price'] / (df['reference_price'] + 1)
    df['ratio_near_price_reference_price'] = df['near_price'] / (df['reference_price'] + 1)
    df['ratio_bid_price_ask_price'] = df['bid_price'] / (df['ask_price'] + 1)
    df['ratio_bid_size_ask_size'] = df['bid_size'] / (df['ask_size'] + 1)
    return df


def create_dataframes(df):
    
    if 'target' not in df.columns:
        df['target'] = 0

    featcols = ['stock_id'] + COLUMNS['time'] + COLUMNS['cat'] + COLUMNS['num'] +  COLUMNS['aug']
    
    grouped = df.groupby(['date_id', 'seconds_in_bucket'])
    stock_id_df = pd.DataFrame(np.arange(200)[:,None], columns=['stock_id'])

    features = np.zeros((len(grouped), 200, len(featcols)), dtype='float32')
    targets = np.zeros((len(grouped), 200), dtype='float32')

    for i, (key, group) in enumerate(grouped):   
        date_id, seconds_in_bucket = key
        group_df = stock_id_df.merge(group, on='stock_id', how='left')
        group_df['seconds_in_bucket'] = seconds_in_bucket
        
        features[i,:,:] = group_df[featcols].values
        targets[i,:] = group_df['target'].values
    
    features_dfs = [pd.DataFrame(features[:,i,:], columns=featcols) for i in range(200)]
    targets_df = pd.DataFrame(targets) 

    return features_dfs, targets_df


class NumericalTransformer(BaseEstimator, TransformerMixin):
        
    def fit(self, X, y=None):
        values = X.values if isinstance(X, pd.DataFrame) else X
        self.lb = np.nanmin(values, axis=0, keepdims=True)
        self.ub = np.nanmax(values, axis=0, keepdims=True)
        self.mean = np.nanmean(values, axis=0, keepdims=True)
        return self

    def transform(self, X, y=None):
        values = X.values if isinstance(X, pd.DataFrame) else X
        values = np.nan_to_num(values) + np.isnan(values) * self.mean
        values = np.clip(values, self.lb, self.ub) - self.lb
        values = np.log1p(values) + 1
        return values
    

def cyclic_encoding(vals, max_val=540):
    """Computes cyclic feature for a given array and max possible value."""
    x = 2* np.pi * vals.ravel() / max_val
    arr = np.empty((len(x), 2), dtype='float32')
    arr[:,0] = np.sin(x)
    arr[:,1] = np.cos(x)
    return arr


class Enricher:

    def __init__(self):
        pass

    def feature_aggregation(self, df, keys):
        cols = COLUMNS['cat']
        funcs = [lambda s: Counter(s).most_common(1)[0][0]]
        df_cat = df.groupby(keys)[cols].agg(funcs)
        df_cat.columns = ['_'.join(names).strip() for names in itertools.product(cols, ['mode'])]

        cols = COLUMNS['num'] + COLUMNS['aug'] + COLUMNS['target']
        funcs = ['mean', 'std', 'min', 'max']
        df_num = df.groupby(keys)[cols].agg(funcs)
        df_num.columns = ['_'.join(names).strip() for names in itertools.product(cols, funcs)]

        df_all = df_cat.merge(df_num, on=keys, how='left')
        return df_all 
    
    def load(self):
        self.enrichment = {}
        for key in ['agg_stock', 'agg_stock_seconds']:
            self.enrichment[key] = pd.read_csv(os.path.join(PATH, 'feature_store', f'{key}.csv'))
        return self
    
    def save(self):
        for key, df in self.enrichment.items():
            df.to_csv(os.path.join(PATH, 'feature_store', f'{key}.csv'))
        return self

    def fit(self, df):        
        self.enrichment = {
            'agg_stock': self.feature_aggregation(df, keys=['stock_id']).add_suffix('_agg_stock'),
            'agg_stock_seconds': self.feature_aggregation(df, keys=['stock_id', 'seconds_in_bucket']).add_suffix('_agg_stock_seconds'),
        }         
        return self
    
    def transform(self, dfs):        
        for i, df in enumerate(dfs):
            df = df.merge(self.enrichment['agg_stock'], on=['stock_id'], how='left') #, suffixes=(None, '_agg_stock')
            df = df.merge(self.enrichment['agg_stock_seconds'], on=['stock_id', 'seconds_in_bucket'], how='left') #, suffixes=(None, '_agg_stock_seconds')
            dfs[i] = df
        return dfs


class Preprocessor:

    def __init__(self):
        pass

    def create_transformer(self, enrichcols):

        pipecols = {
            'time': COLUMNS['time'],
            'cat': COLUMNS['cat'],
            'isnan': COLUMNS['num'] + COLUMNS['aug'],
            'num': COLUMNS['num'] + COLUMNS['aug'] + enrichcols,
        }

        pipetrans = {
            'time': make_pipeline(FunctionTransformer(np.nan_to_num), FunctionTransformer(cyclic_encoding)),
            'cat': FunctionTransformer(np.nan_to_num),
            'isnan': FunctionTransformer(np.isnan),
            'num': make_pipeline(NumericalTransformer(), StandardScaler()), #PowerTransformer(method='box-cox', standardize=True)
        }

        featkeys = ['time', 'isnan', 'cat', 'num']

        transformer = ColumnTransformer([(key, pipetrans[key], pipecols[key]) for key in featkeys])
        return transformer

    def fit(self, features_dfs, targets_df):
        self.transformers = {'targets': StandardScaler().fit(targets_df)}
        enrichcols1 = [col for col in features_dfs[0].columns if col.endswith('_agg_stock')]
        enrichcols2 = [col for col in features_dfs[0].columns if col.endswith('_agg_stock_seconds')]
        enrichcols = enrichcols1 + enrichcols2
        for i, stock_df in enumerate(features_dfs):
            self.transformers[f'stock_{i}'] = self.create_transformer(enrichcols).fit(stock_df)
        return self

    def transform_features(self,  features_dfs, targets_df=None):
        data = []
        for i, stock_df in enumerate(features_dfs):
            x = self.transformers[f'stock_{i}'].transform(stock_df)
            data.append(x[:,None,:]) 
        feats = np.concatenate(data, axis=1)
        return feats
    
    def transform_targets(self, targets_df):
        y = self.transformers['targets'].transform(targets_df)
        return np.concatenate([np.nan_to_num(y)[:,:,None], (1 - np.isnan(y))[:,:,None]], axis=2)
    
    def inverse_transform_targets(self, y):
        return self.transformers['targets'].inverse_transform(y)


enricher = Enricher().load() 
preprocessor = pickle.load( open(os.path.join(PATH, 'preprocessor.pkl'), 'rb') ) 

In [None]:
import tensorflow as tf
print(tf.__version__)
tf.random.set_seed(4243) 

class AttentionBlock(tf.keras.layers.Layer):
    
    def __init__(self, num_heads, n_inputs):
        super().__init__()        
        self.blocks = []
        dims = [n_inputs // num_heads for _ in range(num_heads)]
        dims[-1] += n_inputs - sum(dims)
        for att_dims in dims:
            block = {
                'query': tf.keras.layers.Dense(att_dims, use_bias=False),
                'value': tf.keras.layers.Dense(att_dims, use_bias=False),
                'attention': tf.keras.layers.Attention(use_scale=True),
            }
            self.blocks.append(block)
        self.ffn = tf.keras.Sequential([
            tf.keras.layers.Dense(512, activation='relu'), #, kernel_regularizer=tf.keras.regularizers.L2(1e-4)
            tf.keras.layers.Dense(32),            
        ])
        self.layernorm = tf.keras.layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        lst = []
        for block in self.blocks:
            query = block['query'](inputs)
            value = block['value'](inputs)
            att_out = block['attention']([query, value])
            lst.append(att_out)
        att = tf.concat(lst, axis=2)
        h = self.layernorm(att + inputs)
        outputs = self.ffn(h)
        return outputs 
    
    
class Model(tf.keras.Model):

    def __init__(self, n_inputs):
        super().__init__() 
        
        self.attention = AttentionBlock(num_heads=6, n_inputs=n_inputs)                
        self.conv = tf.keras.layers.Conv1D(filters=256, kernel_size=200)
        self.flatten = tf.keras.layers.Flatten()

        self.ffn = tf.keras.Sequential([
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.L2(1e-4)),
            tf.keras.layers.Dense(200),
        ])

    def call(self, inputs):        
        att = self.attention(inputs)
        h = self.conv(att)      
        h = self.flatten(h)
        outputs = self.ffn(h)
        return outputs  

n_inputs = 163
model = Model(n_inputs)
model.load_weights( os.path.join(PATH, 'model', 'weights') )
model.predict(np.zeros((1,200,n_inputs), dtype='float32'));

In [None]:
import optiver2023
env = optiver2023.make_env()
iter_test = env.iter_test()


def helper_features(test):
    df = feature_augmentation(test) 
    features_dfs, targets_df = create_dataframes(df) 
    enriched_features_dfs = enricher.transform(features_dfs)    
    features = preprocessor.transform_features(enriched_features_dfs)
    return features

def helper_predictions(y_pred):
    y_pred = preprocessor.inverse_transform_targets(y_pred)
    y_pred = y_pred.ravel()
    return y_pred    

#cache = pd.DataFrame()

for (test, revealed_targets, sample_prediction) in iter_test:
    
    features = helper_features(test)
    
    y_pred = model.predict(features)
    
    y_pred = helper_predictions(y_pred)
    
    sample_prediction['target'] = sample_prediction['row_id'].apply( lambda z: y_pred[int(z.split("_")[-1])] )
    
    env.predict(sample_prediction)