In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
df=pd.read_csv('../input/g-research-crypto-forecasting/train.csv')

In [None]:
df=df.drop(['timestamp'],axis=1)
df.replace([np.inf, -np.inf], np.nan, inplace=True)

In [None]:
df.isna().value_counts()

In [None]:
na_idx=np.unique(list(df.Target[df.Target.isna()==True].index)+list(df.VWAP[df.VWAP.isna()==True].index))

In [None]:
df=df.drop(na_idx.tolist())

In [None]:
df.Target.isna().value_counts()

* There are 14 different pattern time series

In [None]:
df.Asset_ID.unique().shape[0]

In [None]:
for i in range(14):
    plt.plot(range(50),df[df.Asset_ID==i][:50].Target,label=f'Asset:{i}')
    
plt.legend()

plt.show()

In [None]:
dfs={}

for i in range(14):
    dfs[i]=(df[df.Asset_ID==i].drop(['Target','Asset_ID'],axis=1),df[df.Asset_ID==i].Target)

In [None]:
lengths=[]

for k,v in dfs.items():
    print(f'length of asset {k} : ',v[0].shape[0])
    
    lengths.append(v[0].shape[0])

* End to End idea 

   * Since RNN pass zero initial hidden input ,doing preorder padding to the same length
   
   * Use asset_id to representation which asset 
   
   * Each asset series size : (1,seq slice length , dim)
   
       * So total size will be : (14,   seq slice length , dim)
       
   * When Using lstm or gru , pass last hidden output to next initial hidden input to retain time dependency
   

In [None]:
max_len=np.max(lengths)

In [None]:
#this operation is very memory intensive 

x=[]
y=[]

for k,v in dfs.items():
    print(k)
    x.append(np.array(v[0]))
    y.append(np.array(v[1]))

In [None]:
#this operation is very memory intensive 
x=pad_sequences(x,dtype='float32')

In [None]:
x.shape #14 subbatch , 1955978 sequence length , each have 8 dimension

In [None]:
y=pad_sequences(y,dtype='float32')

In [None]:
y.shape #14 subbatch, 1955978 sequence length

In [None]:
x.shape

In [None]:
x=np.transpose(x,[1,0,2])
y=np.transpose(y,[1,0])

In [None]:
x.shape,y.shape

## Modeling

In [None]:
mmin=x.min(axis=0)
mmax=x.max(axis=0)

In [None]:
def norm(x):
    return (x-mmin)/(mmax-mmin+1)

In [None]:
mmin.shape,mmax.shape

In [None]:
def build_ds(X,y,time_slice,batch_size):
    
    ds_X=tf.keras.preprocessing.timeseries_dataset_from_array(
      data=X,
      targets=tf.ones((X.shape[0])),
      sequence_length=time_slice,
      sequence_stride=time_slice,
      shuffle=False,
      batch_size=batch_size
    ).map(lambda x,y : x)

    ds_Y=tf.keras.preprocessing.timeseries_dataset_from_array(
      data=y,
      targets=tf.ones((y.shape[0])),
      sequence_length=time_slice,
      sequence_stride=time_slice,
      shuffle=False,
      batch_size=batch_size
    ).map(lambda x,y : x)
    ds=tf.data.Dataset.zip((ds_X,ds_Y))
    ds=ds.map(lambda x,y:(norm(x),y)) #broadcast
    ds=ds.map(lambda x,y:(tf.transpose(x,perm=[0,2,1,3]),tf.transpose(y,perm=[0,2,1])))
    ds=ds.prefetch(tf.data.experimental.AUTOTUNE)
    return ds

* Hyperparameter

In [None]:
time_slice=1024
batch_size=128

epochs=3
units=512
lr=0.0001

In [None]:
ds=build_ds(x,y,time_slice,batch_size)

In [None]:
for seq,label in ds:
    print(seq.shape,label.shape)  #x:(batch,seq length, sub batch ,dim)
    break

In [None]:
#check operation bug

tf.reduce_sum(tf.cast(tf.math.is_nan(seq),'float32'))

* Model

   * Stateful : Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch
   
   * If lstm is stateful , we need to specify batch input shape in the input layer

In [None]:
class Model(tf.keras.Model):
    def __init__(self,units,batch_size=14,dim=7):
        super().__init__()
        self.batch_size=batch_size
        self.units=units
        self.mask=layers.Masking(batch_input_shape=(batch_size, time_slice,dim))
        self.lstm=layers.LSTM(units=units,
                            return_sequences=True,
                            return_state=True,
                            dropout=0.3)
        self.d1=layers.Dense(units=int(units/2),activation='relu')
        self.d2=layers.Dense(units=1,activation=None)
        
    def call(self,x,hidden=None,training=False):
        x=self.mask(x)
        if hidden==None:
            x, h, c=self.lstm(x,initial_state =self.initialize_hidden_state(),training=training)
        else:
            x, h, c=self.lstm(x,initial_state=hidden,training=training)
        x=self.d1(x)
        x=self.d2(x)
        return x,h, c
    
    
    def  initialize_hidden_state(self):
        return [tf.zeros((self.batch_size, self.units)), tf.zeros((self.batch_size, self.units))]
        

In [None]:
@tf.function
def flow(x,y,model,loss_func,opt,init=None,):
    with tf.GradientTape() as tape:
        y_pred,h,c=model(x,hidden=init,training=True)
        loss=loss_func(y,y_pred)
    grad=tape.gradient(loss,model.trainable_weights)
    opt.apply_gradients(zip(grad,model.trainable_weights))
    
    return loss,h,c

In [None]:
def train():
    model=Model(units)
    opt=tf.keras.optimizers.Adam(learning_rate=lr)
    mse=MeanSquaredError()
    ckpt = tf.train.Checkpoint(opt=opt, model=model)
    manager = tf.train.CheckpointManager(ckpt, './ckpt', max_to_keep=1)
    
    print('start training')
    for epoch in range(epochs):
        init=None
        for i,(x,y) in enumerate(ds): #batch
            for j,(seq,label) in enumerate(zip(x,y)):
                label=tf.expand_dims(label,axis=-1)
                loss,h,c=flow(seq,label,model,mse,opt,init=init)
                init=[h,c]
                if j%50==0:
                    print(f'epoch :{epoch} , batch:{i} sub: {j} , loss={loss}')
        manager.save()
    return model

In [None]:
model=train()

In [None]:
model.summary()

## Submit

*  Predict each data once , passing hidden state to next batch

In [None]:
import gresearch_crypto

h,c=0,0
env = gresearch_crypto.make_env()  
iter_test = env.iter_test()    
model.reset_states() #h0-->h1-->h2--->h3
for i,(test_df, sample_prediction_df) in enumerate(iter_test):
    
    test_df=test_df.drop(['timestamp','row_id','Asset_ID'],axis=1)
    k=tf.reshape(test_df,(test_df.shape[0],1,test_df.shape[1]))
    mmax_=tf.cast(tf.reshape(mmax,(14,1,7)),'float64')
    mmin_=tf.cast(tf.reshape(mmin,(14,1,7)),'float64')
    k=(k-mmin_)/(mmax_-mmin_+1)
    
    y_pred,h,c=model(k,hidden=None if i==0 else [h,c],training=False)
    sample_prediction_df['Target']=tf.cast(y_pred,'float64')[:,0,0]
    env.predict(sample_prediction_df)   