In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.read_pickle('../input/ubiquant-market-prediction-half-precision-pickle/train.pkl').dropna()
df['target'] = df['target'].astype('float32')
df.info()
df.head(5)

In [None]:
time = np.array(df['time_id'])
# print(time[0:5])

In [None]:
df.set_index(['time_id', 'investment_id'], inplace=True)
x = df.copy()
y = x.pop('target')

from sklearn.model_selection import GroupKFold

gkf = GroupKFold(n_splits=5)
for idx_train, idx_valid in gkf.split(x, y, groups=time):
    x_train, x_valid = x.iloc[idx_train], x.iloc[idx_valid]
    y_train, y_valid = y.iloc[idx_train], y.iloc[idx_valid]
print(x_train[0:5], '\n', y_valid[0:5])

In [None]:
x_train, x_valid = x_train.values.reshape(-1, x_train.shape[1], 1), x_valid.values.reshape(-1, x_valid.shape[1], 1)
print(x_train.shape)

In [None]:
from tensorflow.python.ops import math_ops
import tensorflow as tf
from tensorflow.keras import backend as k

def pearson_corr(x, y, axis=-2):  # pearson corr
    x = tf.convert_to_tensor(x)
    y = math_ops.cast(y, x.dtype)
    n = tf.cast(tf.shape(x)[axis], x.dtype)
    xsum = tf.reduce_sum(x, axis=axis)
    ysum = tf.reduce_sum(y, axis=axis)
    xmean = xsum / n
    ymean = ysum / n
    xvar = tf.reduce_sum(tf.math.squared_difference(x, xmean), axis=axis)
    yvar = tf.reduce_sum(tf.math.squared_difference(y, ymean), axis=axis)
    cov = tf.reduce_sum((x - xmean) * (y - ymean), axis=axis)
    corr = cov / tf.sqrt(xvar * yvar)
    return tf.constant(1.0, dtype=x.dtype) - corr

In [None]:
from tensorflow.keras import layers, models, optimizers, metrics, regularizers, losses

rmse = metrics.RootMeanSquaredError(name="rmse")
model = models.Sequential([
    layers.Conv1D(32, kernel_size=2, strides=1, activation='leaky_relu', kernel_regularizer=regularizers.l2(0.0003)),
    layers.MaxPooling1D(pool_size=3, strides=2),

    layers.Conv1D(64, kernel_size=2, strides=1, activation='leaky_relu', kernel_regularizer=regularizers.l2(0.0003)),
    layers.LayerNormalization(),
    layers.MaxPooling1D(pool_size=3, strides=2),

    layers.Conv1D(64, kernel_size=2, strides=1, activation='leaky_relu', kernel_regularizer=regularizers.l2(0.0003)),
    layers.Conv1D(128, kernel_size=2, strides=1, activation='leaky_relu', kernel_regularizer=regularizers.l2(0.0003)),
    layers.Conv1D(128, kernel_size=2, strides=1, activation='leaky_relu', kernel_regularizer=regularizers.l2(0.0003)),
    layers.MaxPooling1D(pool_size=3, strides=2),
    
    layers.Flatten(),
    layers.Dense(2048, activation='swish', kernel_regularizer=regularizers.l2(0.0003)),
    layers.LayerNormalization(),
    layers.Dropout(0.5),
    layers.Dense(1024, activation='leaky_relu', kernel_regularizer=regularizers.l2(0.0003)),
    layers.Dropout(0.5),
    layers.Dense(1024, activation='leaky_relu', kernel_regularizer=regularizers.l2(0.0003)),
    layers.Dropout(0.5),
    layers.Dense(1)
])
model.compile(optimizer=optimizers.Adam(0.0001), loss=losses.mean_squared_error, metrics=['mae', 'mape', rmse, pearson_corr])
history = model.fit(x_train, y_train, validation_data=(x_valid, y_valid), batch_size=128, epochs=30, verbose=1)
model.summary()
print(model.predict(x_valid[0:5]), '\n', y_valid[0:5])

In [None]:
model.save('ubiquant_tcn_model')

In [None]:
import tensorflow as tf

def preprocess(df):
    df['time_id'] = df['row_id'].str[0:4]
    df.set_index(['time_id', 'investment_id'], inplace=True)
    df = df.drop('row_id', axis=1)
    # print(df.head(1))
    df = df.values.reshape(-1, df.shape[1], 1)
    df = tf.convert_to_tensor(df, tf.float16)
    return df

In [None]:
import ubiquant

env = ubiquant.make_env()
iter_test = env.iter_test() 
print(iter_test)
for (test_df, sample_prediction_df) in iter_test:
    # print(test_df)
    df = preprocess(test_df)
    sample_prediction_df['target'] = model.predict(df)
    env.predict(sample_prediction_df) 
    display(sample_prediction_df)