In [None]:
import numpy as np 
import pandas as pd 
import tensorflow.keras as keras
import tensorflow as tf
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_parquet('../input/ubiquant-parquet/train_low_mem.parquet')
df = df.astype("float16")

In [None]:
index_col = df.drop(["row_id", "time_id", "investment_id", "target"], axis=1).columns
X_train = df[index_col]
y_train = df["target"]

In [None]:
df["time_id"][df["time_id"]==900.0]

In [None]:
def pearson_correlation(y_true, y_pred, axis=-1):
    y_true = y_true-tf.reduce_mean(y_true)
    y_pred = y_pred-tf.reduce_mean(y_pred)
    y_true = tf.linalg.l2_normalize(y_true, axis=axis)
    y_pred = tf.linalg.l2_normalize(y_pred, axis=axis)
    return tf.reduce_sum(y_true * y_pred, axis=axis)

def pearson_correlation_loss(y_true, y_pred, axis=-1):
    y_true = y_true-tf.reduce_mean(y_true)
    y_pred = y_pred-tf.reduce_mean(y_pred)
    cosine = keras.losses.cosine_similarity(y_true, y_pred, axis=axis)
    return cosine

In [None]:
Input = keras.layers.Input(shape=X_train.shape[1])
x = keras.layers.Dense(128, activation="relu")(Input)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Dense(64, activation="relu")(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Dense(32, activation="relu")(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Dense(16, activation="relu")(x)
x = keras.layers.BatchNormalization()(x)
Output = keras.layers.Dense(1, activation="linear")(x)

model = keras.models.Model(inputs = Input, outputs = Output)

In [None]:
model.compile(loss=pearson_correlation_loss, optimizer="adam", metrics=["mse", pearson_correlation])
history = model.fit(X_train, y_train, validation_split=0.2, epochs=5)

In [None]:
# model.save("model.h5")

In [None]:
import ubiquant
# model = tf.keras.models.load_model('model.h5')
env = ubiquant.make_env()   
iter_test = env.iter_test()    
for (test_df, sample_prediction_df) in iter_test:
    sample_prediction_df["target"] = model.predict(test_df[index_col].astype("float16"))
    env.predict(sample_prediction_df)