In [5]:
import numpy as np 
import pandas as pd 
from tensorflow.keras.models import *
from tensorflow.keras.layers import *  
from tensorflow.keras.callbacks import *
import tensorflow_addons as tfa
from tqdm import tqdm
import time
import random
import math
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow_probability import distributions as tfd
import seaborn as sns
from keras.utils.np_utils import to_categorical
import os
from glob import glob


In [6]:
## season 2 dataframes 
train_x_2 = pd.read_csv('train_x_df.csv')
train_y_2 = pd.read_csv('train_y_df.csv') 
test_x_2 = pd.read_csv('test_x_df.csv') 
submission = pd.read_csv('sample_submission.csv') 

train_x_2.shape, train_y_2.shape, test_x_2.shape, submission.shape


((10572180, 12), (919320, 12), (738300, 12), (535, 3))

In [7]:
train_x_2.head()

Unnamed: 0,sample_id,time,coin_index,open,high,low,close,volume,quote_av,trades,tb_base_av,tb_quote_av
0,0,0,9,0.983614,0.983614,0.983128,0.983246,0.001334,10.650987,0.009855,0.000848,6.771755
1,0,1,9,0.983245,0.983612,0.982453,0.982693,0.001425,11.375689,0.016137,0.000697,5.565188
2,0,2,9,0.982694,0.983612,0.982403,0.983002,0.001542,12.301942,0.014166,0.000905,7.225459
3,0,3,9,0.983009,0.984848,0.983009,0.984486,0.00252,20.134695,0.021557,0.001171,9.353
4,0,4,9,0.984233,0.984606,0.983612,0.984164,0.002818,22.515448,0.021434,0.001799,14.372534


In [8]:
def df2d_to_array3d(df_2d):
    feature_size = df_2d.iloc[:,2:].shape[1]
    time_size = len(df_2d.time.value_counts())
    sample_size = len(df_2d.sample_id.value_counts())
    array_3d = df_2d.iloc[:,2:].values.reshape([sample_size, time_size, feature_size])
    return array_3d


x_train = df2d_to_array3d(train_x_2) 
y_train = df2d_to_array3d(train_y_2) 
x_test = df2d_to_array3d(test_x_2) 

x_train.shape, y_train.shape, x_test.shape


((7661, 1380, 10), (7661, 120, 10), (535, 1380, 10))

In [9]:
def plot_series(x_series, y_series, y_predicted):
    #입력 series와 출력 series를 연속적으로 연결하여 시각적으로 보여주는 코드 입니다.
    plt.plot(x_series, label = 'input_series')
    plt.plot(np.arange(len(x_series), len(x_series)+len(y_series)),
             y_series, label = 'actual_series') 
    plt.plot(np.arange(len(x_series), len(x_series)+len(y_predicted)),
             y_predicted, label = 'predicted_series') 
    #plt.axhline(1, c = 'red')
    plt.legend()


In [10]:
def plot_predicted_series(x_series, y_predicted):
    #입력 series와 출력 series를 연속적으로 연결하여 시각적으로 보여주는 코드 입니다.
    plt.plot(x_series, label = 'input_series')
    plt.plot(np.arange(len(x_series), len(x_series)+len(y_predicted)),
             y_predicted, label = 'predicted_series') 
    #plt.axhline(1, c = 'red')
    plt.legend()


In [11]:
full_df = np.concatenate([x_train, y_train], axis = 1) 
full_df.shape


(7661, 1500, 10)

In [12]:
OHLC = full_df[:,:,[1,2,3,4]] 
OHLC.shape


(7661, 1500, 4)

In [13]:
volume_features = full_df[:,:,[5,6,7,8,9]]
volume_features.shape


(7661, 1500, 5)

# Moving Average features for price 

In [14]:
def moving_average(x, w):
    return np.convolve(x, np.ones(w), 'valid') / w

In [15]:
def preprocess_ma(arr, window): 
    ret = np.zeros((arr.shape[0],arr.shape[1])) 
    for i in tqdm(range(arr.shape[0])):
        ma = moving_average(arr[i,:], window)
        ret[i,window-1:] = ma  
    ret[:,:window-1] = -1 # masking value 
    return ret 

In [16]:
open_ma5 = preprocess_ma(OHLC[:,:,0], 5) 

open_ma10 = preprocess_ma(OHLC[:,:,0], 10)

open_ma20 = preprocess_ma(OHLC[:,:,0], 20)

open_ma60 = preprocess_ma(OHLC[:,:,0], 60)

open_ma120 = preprocess_ma(OHLC[:,:,0], 120)


100%|██████████| 7661/7661 [00:00<00:00, 49781.57it/s]
100%|██████████| 7661/7661 [00:00<00:00, 44516.96it/s]
100%|██████████| 7661/7661 [00:00<00:00, 20367.31it/s]
100%|██████████| 7661/7661 [00:00<00:00, 12934.86it/s]
100%|██████████| 7661/7661 [00:00<00:00, 13857.56it/s]


# Moving Average features for volume

In [17]:
vol_ma5 = preprocess_ma(volume_features[:,:,0], 5) 

vol_ma10 = preprocess_ma(volume_features[:,:,0], 10)

vol_ma20 = preprocess_ma(volume_features[:,:,0], 20)

vol_ma60 = preprocess_ma(volume_features[:,:,0], 60)

vol_ma120 = preprocess_ma(volume_features[:,:,0], 120)


100%|██████████| 7661/7661 [00:00<00:00, 42122.39it/s]
100%|██████████| 7661/7661 [00:00<00:00, 37783.81it/s]
100%|██████████| 7661/7661 [00:00<00:00, 20358.05it/s]
100%|██████████| 7661/7661 [00:00<00:00, 13017.29it/s]
100%|██████████| 7661/7661 [00:00<00:00, 13586.52it/s]


# Slice Data

In [18]:
K = 120 
N = 30
seq_len = 1500 
features_price = 4 
features_volume = 5 

In [19]:
X_price = np.zeros((full_df.shape[0]*(seq_len-N-K), N, features_price))
X_volume = np.zeros((full_df.shape[0]*(seq_len-N-K), N, features_volume))
Y = np.zeros((full_df.shape[0]*(seq_len-N-K),)) 

X_open_ma5 = np.zeros((full_df.shape[0]*(seq_len-N-K),N,1)) 

X_open_ma10 = np.zeros((full_df.shape[0]*(seq_len-N-K),N,1)) 

X_open_ma20 = np.zeros((full_df.shape[0]*(seq_len-N-K),N,1)) 

X_open_ma60 = np.zeros((full_df.shape[0]*(seq_len-N-K),N,1)) 

X_open_ma120 = np.zeros((full_df.shape[0]*(seq_len-N-K),N,1)) 

X_vol_ma5 = np.zeros((full_df.shape[0]*(seq_len-N-K),N,1)) 

X_vol_ma10 = np.zeros((full_df.shape[0]*(seq_len-N-K),N,1)) 

X_vol_ma20 = np.zeros((full_df.shape[0]*(seq_len-N-K),N,1)) 

X_vol_ma60 = np.zeros((full_df.shape[0]*(seq_len-N-K),N,1)) 

X_vol_ma120 = np.zeros((full_df.shape[0]*(seq_len-N-K),N,1)) 


In [20]:
cnt = 0 
for j in tqdm(range(OHLC.shape[0]), position = 0, leave = True): 
    i = 0
    while i+N+K < 1500: 
        X_price[cnt, :, :] = OHLC[j, i:i+N, :] 
        X_volume[cnt,:,:] = volume_features[j, i:i+N, :] 
        Y[cnt] = OHLC[j, i+N+K, 0] # get open prices as target  
        
        X_open_ma5[cnt,:,:] = open_ma5[j,i:i+N].reshape((N,1))
        X_open_ma10[cnt,:,:] = open_ma10[j,i:i+N].reshape((N,1)) 
        X_open_ma20[cnt,:,:] = open_ma20[j,i:i+N].reshape((N,1)) 
        X_open_ma60[cnt,:,:] = open_ma60[j,i:i+N].reshape((N,1)) 
        X_open_ma120[cnt,:,:] = open_ma120[j,i:i+N].reshape((N,1)) 
        
        X_vol_ma5[cnt,:,:] = vol_ma5[j,i:i+N].reshape((N,1)) 
        X_vol_ma10[cnt,:,:] = vol_ma10[j,i:i+N].reshape((N,1)) 
        X_vol_ma20[cnt,:,:] = vol_ma20[j,i:i+N].reshape((N,1)) 
        X_vol_ma60[cnt,:,:] = vol_ma60[j,i:i+N].reshape((N,1)) 
        X_vol_ma120[cnt,:,:] = vol_ma120[j,i:i+N].reshape((N,1)) 
        
        
        i += 1   
        cnt += 1 
        

X_price.shape, X_volume.shape, Y.shape, X_open_ma5.shape, X_open_ma120.shape, X_vol_ma5.shape, X_vol_ma120.shape 


100%|██████████| 7661/7661 [03:02<00:00, 41.91it/s]


((10342350, 30, 4),
 (10342350, 30, 5),
 (10342350,),
 (10342350, 30, 1),
 (10342350, 30, 1),
 (10342350, 30, 1),
 (10342350, 30, 1))

# Define Model

In [21]:
class Time2Vector(Layer):
    def __init__(self, seq_len, **kwargs):
        super(Time2Vector, self).__init__()
        self.seq_len = seq_len

    def build(self, input_shape):
        '''Initialize weights and biases with shape (batch, seq_len)'''
        self.weights_linear = self.add_weight(name='weight_linear',
                                              shape=(int(self.seq_len),),
                                              initializer='glorot_uniform',
                                              trainable=True)
    
        self.bias_linear = self.add_weight(name='bias_linear',
                                           shape=(int(self.seq_len),),
                                           initializer='glorot_uniform',
                                           trainable=True)
    
        self.weights_periodic = self.add_weight(name='weight_periodic',
                                                shape=(int(self.seq_len),),
                                                initializer='glorot_uniform',
                                                trainable=True)

        self.bias_periodic = self.add_weight(name='bias_periodic',
                                             shape=(int(self.seq_len),),
                                             initializer='glorot_uniform',
                                             trainable=True)

    def call(self, x):
        '''Calculate linear and periodic time features'''
        x = tf.math.reduce_mean(x[:,:,:], axis=-1) 
        time_linear = self.weights_linear * x + self.bias_linear # Linear time feature
        time_linear = tf.expand_dims(time_linear, axis=-1) # Add dimension (batch, seq_len, 1)
    
        time_periodic = tf.math.sin(tf.multiply(x, self.weights_periodic) + self.bias_periodic)
        time_periodic = tf.expand_dims(time_periodic, axis=-1) # Add dimension (batch, seq_len, 1)
        return tf.concat([time_linear, time_periodic], axis=-1) # shape = (batch, seq_len, 2)
   
    def get_config(self): # Needed for saving and loading model with custom layer
        config = super().get_config().copy()
        config.update({'seq_len': self.seq_len})
        return config


In [22]:
def transformer_block(inputs, node, drop_rate, activation): 
    attn_output = MultiHeadAttention(num_heads = 4, key_dim = node)(inputs, inputs) 
    attn_output = Dropout(drop_rate)(attn_output) 
    out1 = LayerNormalization(epsilon=1e-6)(inputs + attn_output) 
    ffn_output = Dense(node, activation = activation)(out1) 
    ffn_output = Dense(node)(ffn_output) 
    ffn_output = Dropout(drop_rate)(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output) 
    return out2


def transformer_model(inputs, node = 64, activation = 'relu', drop_rate = 0.2, num_layers = 3):  
    time_embedding = Time2Vector(N)
    masked_input = Masking(mask_value=-1)(inputs)
    bn = BatchNormalization()(masked_input) 
    x = time_embedding(bn) 
    x = Concatenate()([bn, x]) 
    x = Conv1D(node*2, 3, activation = activation, padding = 'same')(x) 
    x = MaxPooling1D()(x) 
    x = Dropout(drop_rate)(x)  
    x = Conv1D(node, 3, activation = activation, padding = 'same')(x) 
    x = MaxPooling1D()(x)
    x = Dropout(drop_rate)(x) 
    positions = tf.range(start=0, limit=x.shape[1], delta=1) 
    positions = Embedding(input_dim = x.shape[1], output_dim = node)(positions) 
    x = x + positions 
    for i in range(num_layers): 
        x = transformer_block(x, node, drop_rate, activation)  
    x = GlobalMaxPooling1D()(x)
    return x 

def LSTM_model(inputs, output_dim=64, drop_rate = 0.2):  
    time_embedding = Time2Vector(N)
    masked_input = Masking(mask_value = -1)(inputs)
    bn = BatchNormalization()(masked_input) 
    x = time_embedding(bn) 
    x = Concatenate()([bn, x]) 
    x = LSTM(output_dim, return_sequences=False)(x) 
    x = Dropout(drop_rate)(x)
    return x  
    

def build_model(): 
    price_inputs = Input((N, features_price)) 
    volume_inputs = Input((N, features_volume))
    open_ma5_inputs = Input((N,1)) 
    open_ma10_inputs = Input((N,1)) 
    open_ma20_inputs = Input((N,1)) 
    open_ma60_inputs = Input((N,1)) 
    open_ma120_inputs = Input((N,1)) 
    vol_ma5_inputs = Input((N,1)) 
    vol_ma10_inputs = Input((N,1)) 
    vol_ma20_inputs = Input((N,1)) 
    vol_ma60_inputs = Input((N,1)) 
    vol_ma120_inputs = Input((N,1)) 

    x_p = transformer_model(price_inputs) 
    x_v = transformer_model(volume_inputs) 
    x_open_ma5 = transformer_model(open_ma5_inputs) 
    x_open_ma10 = transformer_model(open_ma10_inputs) 
    x_open_ma20 = transformer_model(open_ma20_inputs) 
    x_open_ma60 = transformer_model(open_ma60_inputs) 
    x_open_ma120 = transformer_model(open_ma120_inputs) 
    x_vol_ma5 = transformer_model(vol_ma5_inputs) 
    x_vol_ma10 = transformer_model(vol_ma10_inputs) 
    x_vol_ma20 = transformer_model(vol_ma20_inputs) 
    x_vol_ma60 = transformer_model(vol_ma60_inputs) 
    x_vol_ma120 = transformer_model(vol_ma120_inputs) 
    
    merge = Concatenate()([x_p, 
                           x_v,
                           x_open_ma5,
                           x_open_ma10,
                           x_open_ma20,
                           x_open_ma60,
                           x_open_ma120,
                           x_vol_ma5,
                           x_vol_ma10,
                           x_vol_ma20,
                           x_vol_ma60,
                           x_vol_ma120]) 
    
    outputs = Dense(128, activation = 'relu')(merge) 
    outputs = BatchNormalization()(outputs)
    outputs = Dense(64, activation = 'relu')(outputs) 
    outputs = BatchNormalization()(outputs) 
    outputs = Dense(1, activation = 'relu')(outputs)
    
    model = Model(inputs=[price_inputs,
                          volume_inputs,
                          open_ma5_inputs,
                          open_ma10_inputs,
                          open_ma20_inputs,
                          open_ma60_inputs,
                          open_ma120_inputs,
                          vol_ma5_inputs,
                          vol_ma10_inputs, 
                          vol_ma20_inputs, 
                          vol_ma60_inputs, 
                          vol_ma120_inputs], outputs=outputs) 
    model.compile(loss='mape',optimizer='adam',metrics=['mape','mse','mae'])
    return model


In [23]:
model = build_model()
model.summary() 

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 30, 4)]      0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 30, 5)]      0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 30, 1)]      0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 30, 1)]      0                                            
______________________________________________________________________________________________

In [24]:
model_path = 'transformer_feature_engineering_ma_epoch_{epoch:03d}_val_{val_loss:.3f}.h5'
learning_rate_reduction = ReduceLROnPlateau(monitor = 'val_loss', patience = 3, verbose = 1, factor = 0.5)
checkpoint = ModelCheckpoint(filepath = model_path, monitor = 'val_loss', verbose = 1, save_best_only = True)
early_stopping = EarlyStopping(monitor = 'val_loss', patience = 10) 


history = model.fit([X_price,
                     X_volume,
                     X_open_ma5,
                     X_open_ma10,
                     X_open_ma20,
                     X_open_ma60,
                     X_open_ma120,
                     X_vol_ma5,
                     X_vol_ma10, 
                     X_vol_ma20, 
                     X_vol_ma60, 
                     X_vol_ma120], 
                     Y, 
                     batch_size = 256, 
                     epochs = 50, 
                     callbacks = [learning_rate_reduction, checkpoint], 
                     validation_split = 0.1) 

Epoch 1/50

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00001: val_loss improved from inf to 1.39963, saving model to transformer_feature_engineering_ma_epoch_001_val_1.400.h5
Epoch 2/50

Epoch 00002: val_loss did not improve from 1.39963
Epoch 3/50

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 4806/36360 [==>...........................] - ETA: 57:11 - loss: 1.0916 - mape: 1.0916 - mse: 3.6003e-04 - mae: 0.0109

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 5/50
 5102/36360 [===>..........................] - ETA: 56:47 - loss: 1.0473 - mape: 1.0473 - mse: 3.1417e-04 - mae: 0.0105

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00005: val_loss improved from 1.00709 to 1.00640, saving model to transformer_feature_engineering_ma_epoch_005_val_1.006.h5
Epoch 6/50
  331/36360 [..............................] - ETA: 1:04:19 - loss: 1.0475 - mape: 1.0475 - mse: 2.8063e-04 - mae: 0.0105

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 7413/36360 [=====>........................] - ETA: 52:40 - loss: 1.0427 - mape: 1.0427 - mse: 2.8647e-04 - mae: 0.0104

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 3208/36360 [=>............................] - ETA: 59:56 - loss: 1.0351 - mape: 1.0351 - mse: 2.8325e-04 - mae: 0.0104

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00008: val_loss did not improve from 0.97948
Epoch 9/50

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00009: val_loss did not improve from 0.97948
Epoch 10/50

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 00010: val_loss did not improve from 0.97948
Epoch 11/50

Epoch 00011: val_loss did not improve from 0.97948
Epoch 12/50

Epoch 00012: val_loss did not improve from 0.97948
Epoch 13/50

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 00013: val_loss did not improve from 0.97948
Epoch 14/50
 3882/36360 [==>...........................] - ETA: 1:06:15 - loss: 0.9891 - mape: 0.9891 - mse: 2.6308e-04 - mae: 0.0099

KeyboardInterrupt: 

# Make Prediction

In [25]:
best_model = load_model('transformer_feature_engineering_ma_epoch_007_val_0.979.h5', custom_objects = {'Time2Vector':Time2Vector})
best_model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 30, 4)]      0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 30, 5)]      0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 30, 1)]      0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 30, 1)]      0                                            
______________________________________________________________________________________________

In [26]:
X_test_price = [] 
X_test_volume = []

for j in tqdm(range(x_test.shape[0]), position = 0, leave = True): 
    for i in range(seq_len-K-N-120, seq_len-K-N):
        X_test_price.append(x_test[j, i:i+N, [1,2,3,4]])  
        X_test_volume.append(x_test[j, i:i+N, [5,6,7,8,9]])

X_test_price = np.asarray(X_test_price) 
X_test_volume = np.asarray(X_test_volume) 

X_test_price = X_test_price.reshape((-1,N,features_price)) 
X_test_volume = X_test_volume.reshape((-1,N,features_volume))

X_test_price.shape, X_test_volume.shape


100%|██████████| 535/535 [00:00<00:00, 999.70it/s] 


((64200, 30, 4), (64200, 30, 5))

In [38]:
test_open_ma5 = preprocess_ma(x_test[:,:,1],5) 
test_open_ma10 = preprocess_ma(x_test[:,:,1],10) 
test_open_ma20 = preprocess_ma(x_test[:,:,1],20) 
test_open_ma60 = preprocess_ma(x_test[:,:,1],60) 
test_open_ma120 = preprocess_ma(x_test[:,:,1],120) 

test_vol_ma5 = preprocess_ma(x_test[:,:,5],5) 
test_vol_ma10 = preprocess_ma(x_test[:,:,5],10) 
test_vol_ma20 = preprocess_ma(x_test[:,:,5],20) 
test_vol_ma60 = preprocess_ma(x_test[:,:,5],60) 
test_vol_ma120 = preprocess_ma(x_test[:,:,5],120) 


100%|██████████| 535/535 [00:00<00:00, 37721.10it/s]
100%|██████████| 535/535 [00:00<00:00, 43973.20it/s]
100%|██████████| 535/535 [00:00<00:00, 19320.44it/s]
100%|██████████| 535/535 [00:00<00:00, 12786.56it/s]
100%|██████████| 535/535 [00:00<00:00, 11234.31it/s]
100%|██████████| 535/535 [00:00<00:00, 47665.58it/s]
100%|██████████| 535/535 [00:00<00:00, 43324.57it/s]
100%|██████████| 535/535 [00:00<00:00, 19069.72it/s]
100%|██████████| 535/535 [00:00<00:00, 12647.97it/s]
100%|██████████| 535/535 [00:00<00:00, 12056.03it/s]


In [41]:
X_test_open_ma5 = np.zeros((64200,30,1)) 
X_test_open_ma10 = np.zeros((64200,30,1))
X_test_open_ma20 = np.zeros((64200,30,1)) 
X_test_open_ma60 = np.zeros((64200,30,1)) 
X_test_open_ma120 = np.zeros((64200,30,1)) 

X_test_vol_ma5 = np.zeros((64200,30,1)) 
X_test_vol_ma10 = np.zeros((64200,30,1)) 
X_test_vol_ma20 = np.zeros((64200,30,1)) 
X_test_vol_ma60 = np.zeros((64200,30,1)) 
X_test_vol_ma120 = np.zeros((64200,30,1)) 

cnt = 0
for j in tqdm(range(x_test.shape[0]), position = 0, leave = True): 
    for i in range(seq_len-K-N-120, seq_len-K-N):
        X_test_open_ma5[cnt,:,:] = test_open_ma5[j,i:i+N].reshape((N,1)) 
        X_test_open_ma10[cnt,:,:] = test_open_ma10[j,i:i+N].reshape((N,1)) 
        X_test_open_ma20[cnt,:,:] = test_open_ma20[j,i:i+N].reshape((N,1))
        X_test_open_ma60[cnt,:,:] = test_open_ma60[j,i:i+N].reshape((N,1)) 
        X_test_open_ma120[cnt,:,:] = test_open_ma120[j,i:i+N].reshape((N,1)) 
        
        X_test_vol_ma5[cnt,:,:] = test_vol_ma5[j,i:i+N].reshape((N,1)) 
        X_test_vol_ma10[cnt,:,:] = test_vol_ma10[j,i:i+N].reshape((N,1)) 
        X_test_vol_ma20[cnt,:,:] = test_vol_ma20[j,i:i+N].reshape((N,1)) 
        X_test_vol_ma60[cnt,:,:] = test_vol_ma60[j,i:i+N].reshape((N,1)) 
        X_test_vol_ma120[cnt,:,:] = test_vol_ma120[j,i:i+N].reshape((N,1))  
        cnt += 1 


100%|██████████| 535/535 [00:01<00:00, 501.12it/s]


In [49]:
predicted = best_model.predict([X_test_price, 
                                X_test_volume,
                                X_test_open_ma5,
                                X_test_open_ma10,
                                X_test_open_ma20,
                                X_test_open_ma60,
                                X_test_open_ma120,
                                X_test_vol_ma5,
                                X_test_vol_ma10, 
                                X_test_vol_ma20,
                                X_test_vol_ma60,
                                X_test_vol_ma120])  
predicted = predicted.reshape((-1,120)) 
predicted.shape 

(535, 120)

In [50]:
x_test_open = x_test[:,:,1] 

## Adjust predicted prices to be connected with the buy price 
for i in tqdm(range(predicted.shape[0]), position = 0, leave = True):
    buy_price = x_test_open[i,-1] 
    if predicted[i,0] > buy_price:
        diff = predicted[i,0] - buy_price 
        predicted[i,:] -= diff 
    elif predicted[i,0] < buy_price:  
        diff = buy_price - predicted[i,0] 
        predicted[i,:] += diff


100%|██████████| 535/535 [00:00<00:00, 113761.86it/s]


In [51]:
## create submission dataframe  
buy_quantities = [] 
sell_times = [] 

for i in tqdm(range(predicted.shape[0]), position = 0, leave = True): 
    sell_time = np.argmax(predicted[i,:]) 
    sell_times.append(sell_time) 
    buy_price = x_test_open[i,-1] 
    cnt = 0 
    for j in range(120): 
        if predicted[i,j] >= buy_price:    
            cnt += 1 
    buy_quantity = cnt/120 
    buy_quantities.append(buy_quantity)
    
submission.iloc[:,1] = buy_quantities 
submission.iloc[:,2] = sell_times  
submission.to_csv('transformers_ma_features.csv',index=False)


100%|██████████| 535/535 [00:00<00:00, 3811.65it/s]


In [52]:
submission

Unnamed: 0,sample_id,buy_quantity,sell_time
0,7661,0.975000,104
1,7662,0.950000,37
2,7663,1.000000,86
3,7664,0.175000,14
4,7665,0.725000,119
...,...,...,...
530,8191,0.008333,0
531,8192,0.016667,3
532,8193,0.883333,73
533,8194,0.008333,0


# Visualize Prediction

In [53]:
pred_train = best_model.predict([X_price[:3600,],
                                 X_volume[:3600,],
                                 X_open_ma5[:3600], 
                                 X_open_ma10[:3600],
                                 X_open_ma20[:3600], 
                                 X_open_ma60[:3600],
                                 X_open_ma120[:3600], 
                                 X_vol_ma5[:3600], 
                                 X_vol_ma10[:3600], 
                                 X_vol_ma20[:3600], 
                                 X_vol_ma60[:3600],
                                 X_vol_ma120[:3600]]) 
pred_train = pred_train.reshape((-1,120))
pred_train.shape


(30, 120)

In [None]:
x_train_open = x_train[:,:,1]

for i in tqdm(range(pred_train.shape[0]), position = 0, leave = True): 
    buy_price = x_train_open[i,-1] 
    if pred_train[i,0] > buy_price:
        diff = pred_train[i,0] - buy_price 
        pred_train[i,:] -= diff 
    elif pred_train[i,0] < buy_price:  
        diff = buy_price - pred_train[i,0] 
        pred_train[i,:] += diff


In [None]:
y_train_open = y_train[:,:,1]

for i in range(30): 
    plt.plot() 
    plot_series(x_train_open[i,:], y_train_open[i,:], pred_train[i,:])
    plt.show()
