In [None]:
import numpy as np 
import pandas as pd 
import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
import tensorflow as tf
from tensorflow import keras
from scipy import stats 
from sklearn.decomposition import PCA
from tqdm import notebook
import warnings
warnings.filterwarnings('ignore')   

In [None]:
n_features = 300
features = [f'f_{i}' for i in range(n_features)] 
train = pd.read_pickle('../input/ubiquant-market-prediction-half-precision-pickle/train.pkl')
print(train.shape)
train.head()

In [None]:
investment_id = train.pop("investment_id")
time_id = train.pop("time_id")

target = train.pop("target")
target.head()

In [None]:
fig, ax = plt.subplots()  
ax.plot(target);  

In [None]:
train1 = train.iloc[:,0:100]
train2 = train.iloc[:,100:200]
train3 = train.iloc[:,200:300]

In [None]:
del train

In [None]:
print(train1.shape,
      train2.shape,
      train3.shape)

In [None]:
BATCH = 512 
FOLD = 6
SEED = 42
EPOCHS = 3

skfolds = StratifiedKFold(n_splits=FOLD, 
                          random_state=SEED)
                          
LOSS_HISTORY = [] 
TEST_LOSS_HISTORY = []
TRAIN_LOSS = tf.keras.metrics.Mean(name='TRAIN_LOSS', dtype=tf.float32) 
TEST_LOSS = tf.keras.metrics.Mean(name='TEST_LOSS', dtype=tf.float32)    

LOSS_FN = keras.losses.MeanAbsoluteError() 
OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.001)

def ret(a):
    return  a

tf.executing_eagerly()

In [None]:
class Model(keras.Model):
    def __init__(self):
        super(Model, self).__init__()
        
        self.inputs = tf.keras.layers.Lambda(ret, input_shape = [300], dtype=tf.float16)
        
        self.x1 = tf.keras.layers.Dense(256, activation = 'swish')
        self.batch_x1 = tf.keras.layers.BatchNormalization()
        self.x2 = tf.keras.layers.Dense(256, activation = 'swish')
        self.batch_x2 = tf.keras.layers.BatchNormalization()
        self.dr1 = tf.keras.layers.Dropout(0.15)
        self.x3 = tf.keras.layers.Dense(256, activation = 'swish')
        self.batch_x3 = tf.keras.layers.BatchNormalization()
        self.x4 = tf.keras.layers.Dense(128, kernel_regularizer="l2", activation = 'swish')
        self.batch_x4 = tf.keras.layers.BatchNormalization()
        self.dr2 = tf.keras.layers.Dropout(0.15)
        self.x5 = tf.keras.layers.Dense(128, kernel_regularizer="l2", activation = 'swish')
        self.x6 = tf.keras.layers.Dense(32, activation = 'swish')
        self.x7 = tf.keras.layers.Dense(32, activation = 'swish')
        self.out_x = tf.keras.layers.Dense(1)
        
        self.b1 = tf.keras.layers.Dense(256, activation = 'relu')
        self.batch_b1 = tf.keras.layers.BatchNormalization()
        self.bdr1 = tf.keras.layers.Dropout(0.15)
        self.b2 = tf.keras.layers.Dense(256, activation = 'relu')
        self.batch_b2 = tf.keras.layers.BatchNormalization()
        self.b3 = tf.keras.layers.Dense(256, activation = 'relu')
        self.batch_b3 = tf.keras.layers.BatchNormalization()
        self.b4 = tf.keras.layers.Dense(128, kernel_regularizer="l1", activation = 'relu')
        self.batch_b4 = tf.keras.layers.BatchNormalization()
        self.b5 = tf.keras.layers.Dense(128, kernel_regularizer="l1", activation = 'relu')
        self.b6 = tf.keras.layers.Dense(32, activation = 'relu')
        self.b7 = tf.keras.layers.Dense(32, activation = 'relu')
        self.out_b = tf.keras.layers.Dense(1)
        
        self.c1 = tf.keras.layers.Dense(256, activation = 'relu')
        self.batch_c1 = tf.keras.layers.BatchNormalization()
        self.c2 = tf.keras.layers.Dense(256, activation = 'relu')
        self.batch_c2 = tf.keras.layers.BatchNormalization()
        self.cdr1 = tf.keras.layers.Dropout(0.15)
        self.c3 = tf.keras.layers.Dense(128, kernel_regularizer="l2", activation = 'elu')
        self.batch_c3 = tf.keras.layers.BatchNormalization()
        self.c4 = tf.keras.layers.Dense(32, activation = 'elu')
        self.c5 = tf.keras.layers.Dense(32, activation = 'elu')
        self.out_c = tf.keras.layers.Dense(1)
        
        self.out = tf.keras.layers.Average()

    def call(self, input):   
        x, b, c = input           
        x = self.inputs(x)
        b = self.inputs(b)
        c = self.inputs(c)
    
        x = self.x1(x)
        x = self.batch_x1(x)
        x = self.x2(x)
        x = self.batch_x2(x)
        x = self.dr1(x)
        x = self.x3(x)
        x = self.batch_x3(x)
        x = self.x4(x)
        x = self.batch_x4(x)
        x = self.dr2(x)
        x = self.x5(x)
        x = self.x6(x)
        x = self.x7(x)
        x = self.out_x(x)
        
        b = self.b1(b)
        b = self.batch_b1(b)
        b = self.bdr1(b)
        b = self.b2(b)
        b = self.batch_b2(b)
        b = self.b3(b)
        b = self.batch_b3(b)
        b = self.b4(b)
        b = self.batch_b4(b) 
        b = self.b5(b)
        b = self.b6(b)
        b = self.b7(b)
        b = self.out_b(b)
    
        c = self.c1(c)
        c = self.batch_c1(c)
        c = self.c2(c)
        c = self.batch_c2(c)
        c = self.cdr1(c)
        c = self.c3(c)
        c = self.batch_c3(c)
        c = self.c4(c)
        c = self.c5(c)
        c = self.out_c(c)
        
        return self.out([x,b,c])

In [None]:
model = Model()

In [None]:
@tf.function
def train_step(investment, labels):
    with tf.GradientTape() as tape:
        predictions = model(investment)
        loss_value = LOSS_FN(labels, predictions)
        
    LOSS_HISTORY.append(TRAIN_LOSS(loss_value))
    grads = tape.gradient(loss_value, model.trainable_variables)
    OPTIMIZER.apply_gradients(zip(grads, model.trainable_variables))
    
    TRAIN_LOSS(loss_value)

In [None]:
@tf.function
def test_step(investment, labels):
    predictions = model(investment)
    test_loss_value = LOSS_FN(labels, predictions)
    TEST_LOSS_HISTORY.append(TEST_LOSS(test_loss_value))
    
    TEST_LOSS(test_loss_value)

In [None]:
print(train1.shape,
      train2.shape,
      train3.shape,
      investment_id.shape,
      target.shape)

In [None]:
for num_fold, (train_index, valid_index) in enumerate(skfolds.split(train1, investment_id)):
    print('num_fold:', num_fold+1)
    
    if num_fold > 0:
        del train_dataset
        del test_dataset
    
    X_train1, X_valid1 = train1.iloc[train_index], train1.iloc[valid_index]
    X_train2, X_valid2 = train2.iloc[train_index], train2.iloc[valid_index]
    X_train3, X_valid3 = train3.iloc[train_index], train3.iloc[valid_index]

    Y_train, Y_valid = target.iloc[train_index], target.iloc[valid_index] 

    train_dataset = tf.data.Dataset.from_tensor_slices(
    ((X_train1, X_train2, X_train3), Y_train))
    train_dataset = train_dataset.batch(BATCH)
    
    test_dataset = tf.data.Dataset.from_tensor_slices(
    ((X_valid1, X_valid2, X_valid3), Y_valid))
    test_dataset = test_dataset.batch(BATCH)
    
    del X_train1
    del X_valid1
    del X_train2
    del X_valid2
    del X_train3
    del X_valid3
    del Y_train
    del Y_valid
    del train_index
    del valid_index
    
    for epoch in notebook.tqdm(range(EPOCHS)):
        # set_learning_rate(num_fold, epoch)
        TRAIN_LOSS.reset_states()
        TEST_LOSS.reset_states()
        
        for (batch, (investment, labels)) in enumerate(train_dataset):
            train_step(investment, labels)
            
        for (batch, (investment, labels)) in enumerate(test_dataset):
            test_step(investment, labels)
            
        print(
        f'Epoch {epoch + 1}, '
        f'Loss: {TRAIN_LOSS.result()}, '
        f'Test Loss: {TEST_LOSS.result()}')
        
    num_fold+=1


In [None]:
del train_dataset
del test_dataset
del train1
del train2
del train3

In [None]:
def preprocess_test(feature1, feature2, feature3):
    return (feature1, feature2, feature3), 0

def make_test_dataset(feature, batch_size=512):
    
    feature1 = feature.iloc[:,0:100]
    feature2 = feature.iloc[:,100:200]
    feature3 = feature.iloc[:,200:300]
      
    ds = tf.data.Dataset.from_tensor_slices((feature1, feature2, feature3))
    ds = ds.map(preprocess_test)
    ds = ds.batch(BATCH)
    return ds

In [None]:
import ubiquant
env = ubiquant.make_env()
iter_test = env.iter_test() 

for (test_df, sample_prediction_df) in iter_test:
    ds = make_test_dataset(test_df[features])
    sample_prediction_df['target'] =  model.predict(ds)
    env.predict(sample_prediction_df) 