# SeriesNet
I'm quite fond of using dilated convolutional neural networks (especially [SeriesNet](https://github.com/kristpapadopoulos/seriesnet)) when it comes to forecasting tasks, so this notebook is dedicated to doing just that. The advantage of using dilated convolutions is that it allows for a very large receptive field (i.e. look far back into the past). This is a work in progress, and I expect it'll be heavily modified. The notebook may of course also be abandoned if it turns out this approach is stupid for the task at hand :)

The architecture for SeriesNet looks as follows:

<img src="https://i.postimg.cc/4xmbR8cH/Screenshot-from-2020-11-24-09-20-56.png" width="50%" />

[Taken from official GitHub](https://github.com/kristpapadopoulos/seriesnet/blob/master/seriesnet-Krist-Papadopoulos-v1.pdf)

In [None]:
import gc
import math 
from tqdm import tqdm

import numpy as np
import pandas as pd

# Data Prep
Without having investigated the data too much at this point, I'll just treat it as a simple multivariate timeseries with a matrix `[timesteps X features]`, and the `resp` as the target.

In [None]:
# Load the data
df = pd.read_csv('/kaggle/input/jane-street-market-prediction/train.csv').fillna(0)

# Get a list of the feature columns
FEATURES = [c for c in df.columns if 'feature' in c]
TARGET   = 'resp'

In [None]:
# Scale the features in the data
means = np.nanmean(df[FEATURES], axis=0)
stds = np.nanstd(df[FEATURES], axis=0)
df[FEATURES] = (df[FEATURES] - means) / stds

# Define Model
SeriesNet is basically just a convolutional neural network with dilations, allowing for a big receptive field. And since it's a convolutional net, we can put in multiple features and their previous values as well. Adapted directly from [SeriesNet github](https://github.com/kristpapadopoulos/seriesnet/blob/master/seriesnet.py)

In [None]:
from tensorflow.keras.layers import Conv1D, Input, Add, Activation, Dropout
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.layers import LeakyReLU, ELU
from tensorflow.keras.utils import Sequence
from tensorflow.keras import optimizers


def DC_CNN_Block(nb_filter, filter_length, dilation, l2_layer_reg):
    def f(input_):        
        residual =    input_        
        layer_out =   Conv1D(filters=nb_filter, kernel_size=filter_length, 
                      dilation_rate=dilation, 
                      activation='linear', padding='causal', use_bias=False,
                      kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.05, 
                      seed=42), kernel_regularizer=l2(l2_layer_reg))(input_)                    
        layer_out =   Activation('selu')(layer_out)        
        skip_out =    Conv1D(1,1, activation='linear', use_bias=False, 
                      kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.05, 
                      seed=42), kernel_regularizer=l2(l2_layer_reg))(layer_out)        
        network_in =  Conv1D(1,1, activation='linear', use_bias=False, 
                      kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.05, 
                      seed=42), kernel_regularizer=l2(l2_layer_reg))(layer_out)                      
        network_out = Add()([residual, network_in])        
        return network_out, skip_out    
    return f


def DC_CNN_Model(length, features):    
    input = Input(shape=(length,features))    
    l1a, l1b = DC_CNN_Block(32,2,1,0.001)(input)    
    l2a, l2b = DC_CNN_Block(32,2,2,0.001)(l1a) 
    l3a, l3b = DC_CNN_Block(16,2,4,0.001)(l2a)
    l4a, l4b = DC_CNN_Block(16,2,8,0.001)(l3a)
    l5a, l5b = DC_CNN_Block(16,2,16,0.001)(l4a)
    l6a, l6b = DC_CNN_Block(16,2,32,0.001)(l5a)
    l6b = Dropout(0.8)(l6b) #dropout used to limit influence of earlier data
    l7a, l7b = DC_CNN_Block(16,2,64,0.001)(l6a)
    l7b = Dropout(0.8)(l7b) #dropout used to limit influence of earlier data
    
    l8 =   Add()([l1b, l2b, l3b, l4b, l5b, l6b, l7b])    
    l9 =   Activation('relu')(l8)           
    l21 =  Conv1D(1,1, activation='linear', use_bias=False, 
           kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.05, seed=42),
           kernel_regularizer=l2(0.001))(l9)
    model = Model(inputs=input, outputs=l21)    
    adam = optimizers.Adam(lr=0.00075, beta_1=0.9, beta_2=0.999, epsilon=None, 
                           decay=0.0, amsgrad=False)
    model.compile(loss='mae', optimizer=adam, metrics=['mse'])    
    return model

class DataIterator(Sequence):

    def __init__(self, df, batch_size):        
        length = len(df)
        self.x = df[FEATURES].values.reshape(1, length, len(FEATURES))
        self.y = df[TARGET].values.reshape(1, length, 1)    
        self.batch_size = batch_size

    def __len__(self):
        return math.floor(len(self.x[0]) / self.batch_size)

    def __getitem__(self, idx):
        batch_x = self.x[:, idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[:, idx * self.batch_size:(idx + 1) * self.batch_size]
        gc.collect()
        return batch_x, batch_y

In [None]:
# We cant fit all the data, so we chop it up in pieces
BATCH_SIZE = 1000

# Instantiate the model
model = DC_CNN_Model(BATCH_SIZE, len(FEATURES))
model.summary()

# Train Model

In [None]:
# Data generator. Can't fit it all in memory, so we chop it up
generator = DataIterator(df=df, batch_size=BATCH_SIZE)

# Train the model
model.fit_generator(generator, epochs=2, workers=2, use_multiprocessing=True)

# Creating Submission
Finally time for doing the predictions

In [None]:
def shiftArray(arr, fill_value=np.nan):
    result = np.empty_like(arr)
    result[:1] = fill_value
    result[1:] = arr[:-1]
    return result

import janestreet
env = janestreet.make_env() # initialize the environment
iter_test = env.iter_test() # an iterator which loops over the test set

# Use last data for initial timeseries data
eval_data = df.iloc[-BATCH_SIZE:][FEATURES].values

# Delete data
del df
gc.collect()

# Loop through the environment
for (test_df, sample_prediction_df) in tqdm(iter_test):
    
    # Update evaluation data
    eval_data = shiftArray(eval_data, fill_value=(test_df[FEATURES].values - means) / stds)
    
    # Prediction
    pred = model(eval_data.reshape(1, BATCH_SIZE, len(FEATURES)), training=False)[0, -1, 0]
    
    # Submit prediction
    sample_prediction_df.action = 1 if pred > 0 else 0
    env.predict(sample_prediction_df)    