In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = (10, 9)
plt.style.use('seaborn-darkgrid')
import seaborn as sns
sns.set_style('darkgrid')

from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression, Lasso, Ridge

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.layers import IntegerLookup

from warnings import filterwarnings, simplefilter
filterwarnings('ignore')
simplefilter('ignore')

from tqdm.auto import tqdm
from tqdm.keras import TqdmCallback

import gc

In [None]:
train = pd.read_pickle('../input/ubiquant-market-prediction-half-precision-pickle/train.pkl')

In [None]:
def make_dataset(x, y = None, batch_size = 512, shuffle = False) :
    def preprocess(x, y) :
        return x, y
    feat = [col for col in x.columns if col.startswith('f_')]
    investment = x['investment_id'].values
    feature = x[feat].values
    dataset = tf.data.Dataset.from_tensor_slices((
        (investment, feature), y
    )).map(preprocess).batch(batch_size = batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    if shuffle :
        dataset = dataset.shuffle(500, seed = 50)
    return dataset

In [None]:
investment_ids = train['investment_id'].unique()
investment_size = train['investment_id'].nunique() + 1
investment_size

In [None]:
InvestmentLayer = IntegerLookup(max_tokens = investment_size)
InvestmentLayer.adapt(
    pd.DataFrame({
        'investment_ids' : investment_ids.tolist()
    })
)

In [None]:
def correlationMetric(x, y, axis=-2):
    from tensorflow.python.ops import math_ops
    """Metric returning the Pearson correlation coefficient of two tensors over some axis, default -2."""
    x = tf.convert_to_tensor(x)
    y = math_ops.cast(y, x.dtype)
    n = tf.cast(tf.shape(x)[axis], x.dtype)
    xsum = tf.reduce_sum(x, axis=axis)
    ysum = tf.reduce_sum(y, axis=axis)
    xmean = xsum / n
    ymean = ysum / n
    xvar = tf.reduce_sum( tf.math.squared_difference(x, xmean), axis=axis)
    yvar = tf.reduce_sum( tf.math.squared_difference(y, ymean), axis=axis)
    cov = tf.reduce_sum( (x - xmean) * (y - ymean), axis=axis)
    corr = cov / tf.sqrt(xvar * yvar)
    return corr


def correlationLoss(x,y, axis=-2):
    from tensorflow.python.ops import math_ops
    x = tf.convert_to_tensor(x)
    y = math_ops.cast(y, x.dtype)
    n = tf.cast(tf.shape(x)[axis], x.dtype)
    xsum = tf.reduce_sum(x, axis=axis)
    ysum = tf.reduce_sum(y, axis=axis)
    xmean = xsum / n
    ymean = ysum / n
    xsqsum = tf.reduce_sum( tf.math.squared_difference(x, xmean), axis=axis)
    ysqsum = tf.reduce_sum( tf.math.squared_difference(y, ymean), axis=axis)
    cov = tf.reduce_sum( (x - xmean) * (y - ymean), axis=axis)
    corr = cov / tf.sqrt(xsqsum * ysqsum)
    return tf.convert_to_tensor( K.mean(tf.constant(1.0, dtype=x.dtype) - corr ) , dtype=tf.float32 )

In [None]:
def build_model() :
    investment = keras.layers.Input(shape = (1, ), dtype = tf.uint16)
    xinvest = InvestmentLayer(investment)
    xinvest = keras.layers.Embedding(investment_size, 64, input_length = 1)(xinvest)
    xinvest = keras.layers.Reshape((-1,))(xinvest)
    
    xinvest = keras.layers.Dense(128, activation = 'swish')(xinvest)
    xinvest = keras.layers.Dense(128, activation = 'swish')(xinvest)
    xinvest = keras.layers.Dense(256, activation = 'swish')(xinvest)
    
    feat = keras.layers.Input(shape = (300, ), dtype = tf.float16)
    xfeat = keras.layers.BatchNormalization()(feat)
    xfeat = keras.layers.Dense(512, activation = 'swish')(xfeat)
    xfeat = keras.layers.Reshape((-1, 1))(xfeat)
    
    xfeat = keras.layers.Conv1D(16, 4)(xfeat)
    xfeat = keras.layers.MaxPool1D()(xfeat)
    xfeat = keras.layers.BatchNormalization()(xfeat)
    
    xfeat = keras.layers.Conv1D(32, 4)(xfeat)
    xfeat = keras.layers.MaxPool1D()(xfeat)
    xfeat = keras.layers.BatchNormalization()(xfeat)
    
    xfeat = keras.layers.Conv1D(32, 4)(xfeat)
    xfeat = keras.layers.MaxPool1D()(xfeat)
    xfeat = keras.layers.BatchNormalization()(xfeat)
    
    xfeat = keras.layers.Conv1D(64, 4)(xfeat)
    xfeat = keras.layers.MaxPool1D()(xfeat)
    xfeat = keras.layers.BatchNormalization()(xfeat)
    
    xfeat = keras.layers.Conv1D(64, 4)(xfeat)
    xfeat = keras.layers.MaxPool1D()(xfeat)
    xfeat = keras.layers.BatchNormalization()(xfeat)
    
    xfeat = keras.layers.Conv1D(128, 4)(xfeat)
    xfeat = keras.layers.MaxPool1D()(xfeat)
    xfeat = keras.layers.BatchNormalization()(xfeat)
    
    xfeat = keras.layers.Conv1D(256, 4)(xfeat)
    xfeat = keras.layers.MaxPool1D()(xfeat)
    xfeat = keras.layers.BatchNormalization()(xfeat)
    
    xfeat = keras.layers.Flatten()(xfeat)
    
    x = keras.layers.Concatenate(axis = -1)([xinvest, xfeat])
    
    x1 = keras.layers.Dense(512, activation = 'swish', kernel_regularizer = 'l2')(x)
    x2 = keras.layers.Dense(512, activation = 'swish', kernel_regularizer = 'l2')(x)
    
    x = keras.layers.Concatenate(axis = -1)([x1, x2, x])
    
    x3 = keras.layers.Dense(256, activation = 'swish', kernel_regularizer = 'l2')(x)
    x4 = keras.layers.Dense(256, activation = 'swish', kernel_regularizer = 'l2')(x)
    
    x = keras.layers.Concatenate(axis = -1)([x3, x4, x])
    
    x5 = keras.layers.Dense(128, activation = 'swish', kernel_regularizer = 'l2')(x)
    x6 = keras.layers.Dense(128, activation = 'swish', kernel_regularizer = 'l2')(x)
    
    x = keras.layers.Concatenate(axis = -1)([x1, x2, x3, x4, x5, x6])
    x = keras.layers.Dense(64, activation = 'swish', kernel_regularizer = 'l2')(x)
    
    x = keras.layers.Dense(1, activation = 'linear')(x)
    
    model = keras.models.Model(
        inputs = [investment, feat],
        outputs = x
    )
    model.compile(
        optimizer = keras.optimizers.Adam(learning_rate = 7e-4),
        loss = correlationLoss,
        metrics = [correlationMetric]
    )
    return model

In [None]:
model = build_model()
model.summary()
keras.utils.plot_model(model, show_shapes = True)

In [None]:
cv_index = [
    (
        train.loc[(train.time_id > 800) & (train.time_id <= 1000)].index,
        train.loc[(train.time_id > 1000)].index
    ),
    (
        train.loc[(train.time_id > 900) & (train.time_id <= 1100)].index,
        train.loc[(train.time_id > 1100)].index
    ),
    (
        train.loc[(train.time_id > 1000) & (train.time_id <= 1200)].index,
        train.loc[(train.time_id > 1200)].index
    ),
    (
        train.loc[(train.time_id > 800) & (train.time_id <= 1200)].index,
        train.loc[(train.time_id > 1200)].index
    ),
    (
        train.loc[(train.time_id > 900)].index,
        train.loc[(train.time_id > 800) & (train.time_id <= 900)].index
    )
]

In [None]:
del model
K.clear_session()
gc.collect()

In [None]:
y = train.pop('target')
y

In [None]:
scores = []
for i, (t, v) in enumerate(cv_index) :
    K.clear_session()
    xtrain = train.iloc[t, :]
    xval = train.iloc[v, :]
    ytrain = y.iloc[t]
    yval = y.iloc[v]
    
    train_ds = make_dataset(xtrain, ytrain, shuffle = True, batch_size = 512)
    val_ds = make_dataset(xval, yval, shuffle = False, batch_size = 512)
    del xtrain, xval
    gc.collect()
    
    model = build_model()
    cb = [
        keras.callbacks.EarlyStopping(patience = 10, min_delta = .001, restore_best_weights = True),
        keras.callbacks.ReduceLROnPlateau(patience = 3, factor = .3, min_lr = 2e-5),
        TqdmCallback(verbose = 1)
    ]
    history = model.fit(
        train_ds, validation_data = val_ds,
        epochs = 250, callbacks = cb, verbose = 0
    )
    yhat = model.predict(val_ds).ravel()
    history = pd.DataFrame(history.history).loc[3:, ['val_loss', 'loss']].plot.line(figsize = (8, 8))
    model.save_weights(f'model_fold{i}')
    plt.show()
    score, p = pearsonr(yval, yhat)
    scores.append(score)
    print(f'Pearson Fold {i} : {score}')
    print(f'p-value Fold {i} : {p}')
    K.clear_session()
    del ytrain, yval, model, history, score
    gc.collect()