In [None]:
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import TimeSeriesSplit
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from scipy import stats 
from sklearn.decomposition import PCA
from tqdm import notebook

import warnings
warnings.filterwarnings('ignore')

In [None]:
train = pd.read_pickle('../input/ubiquantmarketpredictionmovingaverage/1-7scroll_mean_train.pkl')
print(train.shape)
train.head()

In [None]:
train.info()

In [None]:
investment_id = train.pop("investment_id")
time_id = train.pop("time_id")

target = train.pop("target")
target.head()

In [None]:
BATCH = 512 
FOLD = 6
SEED = 42
EPOCHS = 3

skfolds = StratifiedKFold(n_splits=FOLD, random_state=SEED)                       
                    
def ret(a):
    return  a

In [None]:
model = Sequential()

model.add(Lambda(ret, input_shape = [train.shape[1]], dtype=tf.float16))

model.add(Dense(256, activation = 'swish'))
model.add(BatchNormalization())
model.add(Dense(256, activation = 'swish'))
model.add(BatchNormalization())
model.add(Dense(256, activation = 'swish'))
model.add(BatchNormalization())

model.add(Dense(128,  kernel_regularizer="l2", activation = 'swish'))
model.add(BatchNormalization())
model.add(Dense(128,  kernel_regularizer="l2", activation = 'swish'))
model.add(BatchNormalization())

model.add(Dense(32, activation = 'swish'))
model.add(Dense(32, activation = 'swish'))
model.add(Dense(1))

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss=tf.keras.losses.MeanAbsoluteError())

In [None]:
for num_fold, (train_index, valid_index) in enumerate(skfolds.split(train, investment_id)):
    print('num_fold:', num_fold+1)
    
    if num_fold > 0:
        del train_dataset
        del test_dataset
    
    X_train, X_valid = train.iloc[train_index], train.iloc[valid_index]
    Y_train, Y_valid = target.iloc[train_index], target.iloc[valid_index] 

    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
    train_dataset = train_dataset.batch(BATCH)
    
    test_dataset = tf.data.Dataset.from_tensor_slices((X_valid, Y_valid))
    test_dataset = test_dataset.batch(BATCH)
    
    del X_train
    del X_valid
    del Y_train
    del Y_valid
    del train_index
    del valid_index
    
    model.fit(train_dataset, validation_data = test_dataset, epochs=EPOCHS, batch_size=512)
        
    num_fold+=1

In [None]:
del train_dataset
del test_dataset
del train

In [None]:
scroll_mean = ['scroll_mean_two', 'scroll_mean_three', 'scroll_mean_four', 'scroll_mean_five', 
               'scroll_mean_six', 'scroll_mean_seven']


attributes_to_add = ['f_67', 'f_73', 'f_148', 'f_226', 'f_204', 'f_140', 'f_228', 'f_25', 'f_274', 
                     'f_101', 'f_205', 'f_193', 'f_146']

In [None]:
def preprocess_test(feature):
    return (feature), 0

def make_test_dataset(feature, batch_size=512):
  
    ds = tf.data.Dataset.from_tensor_slices((feature))
    ds = ds.map(preprocess_test)
    ds = ds.batch(BATCH)
    return ds

In [None]:
def get_moving_average(feature):   
    
    scroll_mean_to_add = pd.DataFrame()  
    
    for WINDOWS, scroll_ in enumerate(scroll_mean):
        WINDOWS = WINDOWS+2
       
        window_dataframe = pd.DataFrame()  
        
        for i_id in feature.investment_id.unique():  
            unique_id_dataframe = feature[feature['investment_id'] == i_id]  # unique_id_dataframe
            
            if unique_id_dataframe.shape[0] > WINDOWS:
                unique_id_dataframe = unique_id_dataframe.rolling(window=WINDOWS).mean()
                
                for columns, values_isnm in zip(unique_id_dataframe.columns, unique_id_dataframe.iloc[WINDOWS-1:WINDOWS,:].values[0]):
                    for index in unique_id_dataframe.iloc[:WINDOWS-1,:].index:
                        unique_id_dataframe.at[index, columns] = values_isnm
            
            unique_id_dataframe.set_axis(['{}_{}'.format(scroll_, h) for h in unique_id_dataframe], axis='columns', inplace=True)
            window_dataframe = pd.concat([window_dataframe, unique_id_dataframe])
            
            
        scroll_mean_to_add = pd.concat([scroll_mean_to_add, window_dataframe.iloc[:, 1:]], axis=1)
        
    return scroll_mean_to_add

In [None]:
import ubiquant
env = ubiquant.make_env()
iter_test = env.iter_test() 

for (test_df, sample_prediction_df) in iter_test:
    test_df_attributes_to_add =  get_moving_average(test_df[['investment_id'] + attributes_to_add])
    test_df = pd.concat([test_df, test_df_attributes_to_add], axis=1)
    
    test_df_investment_id = test_df.pop("investment_id")
    test_df_time_id = test_df.pop("row_id")
    
    ds = make_test_dataset(test_df)
    sample_prediction_df['target'] =  model.predict(ds)
    env.predict(sample_prediction_df) 