In [1]:
import numpy as np 
import pandas as pd 
from tensorflow.keras.models import * 
from tensorflow.keras.layers import * 
from tensorflow.keras.callbacks import * 
import tensorflow_addons as tfa
from tqdm import tqdm 
import time 
import random 
import math 
import matplotlib.pyplot as plt 
from sklearn.preprocessing import StandardScaler 
import tensorflow as tf 
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import MinMaxScaler 
from tensorflow_probability import distributions as tfd 
import seaborn as sns 
import requests
from tcn import TCN, tcn_full_summary

In [2]:
train_x_2 = pd.read_csv('train_x_df.csv') 
train_y_2 = pd.read_csv('train_y_df.csv') 
test_x_2 = pd.read_csv('test_x_df.csv') 

submission = pd.read_csv('sample_submission.csv') 

train_x_2.shape, train_y_2.shape, test_x_2.shape, submission.shape

((10572180, 12), (919320, 12), (738300, 12), (535, 3))

In [3]:
def df2d_to_array3d(df_2d): 
    feature_size = df_2d.iloc[:,2:].shape[1] 
    time_size = len(df_2d.time.value_counts()) 
    sample_size = len(df_2d.sample_id.value_counts()) 
    array_3d = df_2d.iloc[:,2:].values.reshape([sample_size, time_size, feature_size]) 
    return array_3d 

x_train = df2d_to_array3d(train_x_2) 
y_train = df2d_to_array3d(train_y_2) 
x_test = df2d_to_array3d(test_x_2) 

x_train.shape, y_train.shape, x_test.shape

((7661, 1380, 10), (7661, 120, 10), (535, 1380, 10))

In [4]:
def plot_series(x_series, y_series, y_predicted):
    #입력 series와 출력 series를 연속적으로 연결하여 시각적으로 보여주는 코드 입니다.
    plt.plot(x_series, label = 'input_series')
    plt.plot(np.arange(len(x_series), len(x_series)+len(y_series)),
             y_series, label = 'actual_series') 
    plt.plot(np.arange(len(x_series), len(x_series)+len(y_predicted)),
             y_predicted, label = 'predicted_series') 
    #plt.axhline(1, c = 'red')
    plt.legend()


In [5]:
def plot_predicted_series(x_series, y_predicted):
    #입력 series와 출력 series를 연속적으로 연결하여 시각적으로 보여주는 코드 입니다.
    plt.plot(x_series, label = 'input_series')
    plt.plot(np.arange(len(x_series), len(x_series)+len(y_predicted)),
             y_predicted, label = 'predicted_series') 
    #plt.axhline(1, c = 'red')
    plt.legend()


In [6]:
full_df = np.concatenate([x_train, y_train], axis = 1) 
full_df.shape

(7661, 1500, 10)

In [7]:
price_data = full_df[:,:,[1,2,3,4]] 
volume_data = full_df[:,:,[5,6,7,8,9]] 

price_data.shape, volume_data.shape

((7661, 1500, 4), (7661, 1500, 5))

# Preprocess Data

In [8]:
K = 120 
N = 60
seq_len = 1500 
features_price = 4 
features_volume = 5 
X_price = np.zeros(((seq_len-N-K)*price_data.shape[0], N, features_price))
X_volume = np.zeros(((seq_len-N-K)*price_data.shape[0], N, features_volume))
Y = np.zeros(((seq_len-N-K)*price_data.shape[0]))

cnt = 0 
for j in tqdm(range(price_data.shape[0]), position = 0, leave = True): 
    i = 0
    while i+N+K < 1500: 
        X_price[cnt,:,:] = price_data[j, i:i+N, :] 
        X_volume[cnt,:,:] = volume_data[j, i:i+N, :] 
        Y[cnt] = price_data[j, i+N+K, 0] 
        i += 1   
        cnt += 1 
        

X_price.shape, X_volume.shape, Y.shape


100%|██████████| 7661/7661 [00:57<00:00, 132.88it/s]


((10112520, 60, 4), (10112520, 60, 5), (10112520,))

# Modeling

In [9]:
class Time2Vector(Layer):
    def __init__(self, seq_len, **kwargs):
        super(Time2Vector, self).__init__()
        self.seq_len = seq_len

    def build(self, input_shape):
        '''Initialize weights and biases with shape (batch, seq_len)'''
        self.weights_linear = self.add_weight(name='weight_linear',
                                              shape=(int(self.seq_len),),
                                              initializer='glorot_uniform',
                                              trainable=True)
    
        self.bias_linear = self.add_weight(name='bias_linear',
                                           shape=(int(self.seq_len),),
                                           initializer='glorot_uniform',
                                           trainable=True)
    
        self.weights_periodic = self.add_weight(name='weight_periodic',
                                                shape=(int(self.seq_len),),
                                                initializer='glorot_uniform',
                                                trainable=True)

        self.bias_periodic = self.add_weight(name='bias_periodic',
                                             shape=(int(self.seq_len),),
                                             initializer='glorot_uniform',
                                             trainable=True)

    def call(self, x):
        '''Calculate linear and periodic time features'''
        x = tf.math.reduce_mean(x[:,:,:], axis=-1) 
        time_linear = self.weights_linear * x + self.bias_linear # Linear time feature
        time_linear = tf.expand_dims(time_linear, axis=-1) # Add dimension (batch, seq_len, 1)
    
        time_periodic = tf.math.sin(tf.multiply(x, self.weights_periodic) + self.bias_periodic)
        time_periodic = tf.expand_dims(time_periodic, axis=-1) # Add dimension (batch, seq_len, 1)
        return tf.concat([time_linear, time_periodic], axis=-1) # shape = (batch, seq_len, 2)
   
    def get_config(self): # Needed for saving and loading model with custom layer
        config = super().get_config().copy()
        config.update({'seq_len': self.seq_len})
        return config


In [10]:
def transformer_block(inputs, node, drop_rate, activation): 
    attn_output = MultiHeadAttention(num_heads = 4, key_dim = node)(inputs, inputs) 
    attn_output = Dropout(drop_rate)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(inputs + attn_output) 
    ffn_output = Dense(node, activation = activation)(out1) 
    ffn_output = Dense(node)(ffn_output) 
    ffn_output = Dropout(drop_rate)(ffn_output) 
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output) 
    return out2


def transformer(inputs, node = 32, activation = 'relu', drop_rate = 0.2, num_layers = 2):
    time_embedding = Time2Vector(N) 
    bn = BatchNormalization()(inputs)
    x = time_embedding(bn) 
    x = Concatenate()([bn, x])  
    
    x = Conv1D(node, 5, activation = activation, padding = 'same')(x) 
    x = MaxPooling1D()(x) 
    x = Dropout(drop_rate)(x) 
    
    x = Conv1D(node*2, 5, activation = activation, padding = 'same')(x) 
    x = MaxPooling1D()(x) 
    x = Dropout(drop_rate)(x)   
    
    positions = tf.range(start=0, limit=x.shape[1], delta=1)  
    positions = Embedding(input_dim = x.shape[1], output_dim = node*2)(positions) 
    x = x + positions 
    for i in range(num_layers): 
        x = transformer_block(x, node*2, drop_rate, activation)
    return x 

def build_model(): 
    price_inputs = Input((N, features_price)) 
    x_p = transformer(price_inputs) 
    
    volume_inputs = Input((N, features_volume)) 
    x_v = transformer(volume_inputs) 
    
    attn_p_v = MultiHeadAttention(num_heads=2,key_dim=64)(x_p,x_v) 
    attn_v_p = MultiHeadAttention(num_heads=2,key_dim=64)(x_v,x_p) 
    attn = Dropout(0.25)(attn_p_v + attn_v_p)
    attn = Dense(32)(attn) 
    attn = Activation('tanh')(attn) 
    output = GlobalMaxPooling1D()(attn) 
    output = Dense(16, activation = 'relu')(output) 
    output = Dropout(0.25)(output) 
    output = Dense(1, activation = 'relu')(output)
    
    model = Model(inputs=[price_inputs, volume_inputs], outputs=output) 
    model.compile(loss='mape', optimizer = 'adam', metrics=['mse','mae','mape']) 
    return model

In [11]:
model = build_model()
model.summary() 

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 60, 4)]      0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 60, 5)]      0                                            
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 60, 4)        16          input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 60, 5)        20          input_2[0][0]                    
______________________________________________________________________________________________

In [12]:
model_path = 'attn_transformer_price_volume_epoch_{epoch:03d}_val_{val_loss:.3f}.h5'
learning_rate_reduction = ReduceLROnPlateau(monitor = 'val_loss', patience = 3, verbose = 1, factor = 0.5)
checkpoint = ModelCheckpoint(filepath = model_path, monitor = 'val_loss', verbose = 1, save_best_only = True)
early_stopping = EarlyStopping(monitor = 'val_loss', patience = 10) 


history = model.fit([X_price, X_volume], 
                     Y, 
                     batch_size = 512, 
                     epochs = 20, 
                     callbacks = [learning_rate_reduction, checkpoint], 
                     validation_split = 0.1)


Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.99901, saving model to attn_transformer_price_volume_epoch_001_val_0.999.h5
Epoch 2/20

Epoch 00002: val_loss did not improve from 0.99901
Epoch 3/20

Epoch 00003: val_loss did not improve from 0.99901
Epoch 4/20

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 00004: val_loss did not improve from 0.99901
Epoch 5/20

Epoch 00005: val_loss improved from 0.99901 to 0.99096, saving model to attn_transformer_price_volume_epoch_005_val_0.991.h5
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.99096
Epoch 7/20

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00010: val_loss did not improve from 0.99096
Epoch 11/20
 1053/17776 [>.............................] - ETA: 6:29 - loss: 1.0798 - mse: 3.1717e-04 - mae: 0.0108 - mape: 1.0798

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.

Epoch 00011: val_loss did not improve from 0.99096
Epoch 12/20

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00012: val_loss did not improve from 0.99096
Epoch 13/20

Epoch 00013: val_loss did not improve from 0.99096
Epoch 14/20
 2370/17776 [==>...........................] - ETA: 5:32 - loss: 1.0650 - mse: 3.0615e-04 - mae: 0.0107 - mape: 1.0650

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00016: val_loss did not improve from 0.99096
Epoch 17/20

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00017: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.

Epoch 00017: val_loss did not improve from 0.99096
Epoch 18/20

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00018: val_loss did not improve from 0.99096
Epoch 19/20

Epoch 00019: val_loss did not improve from 0.99096
Epoch 20/20
 1664/17776 [=>............................] - ETA: 5:49 - loss: 1.0646 - mse: 3.0827e-04 - mae: 0.0106 - mape: 1.0646

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00020: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.

Epoch 00020: val_loss did not improve from 0.99096


# Inference

In [13]:
best_model = load_model('attn_transformer_price_volume_epoch_005_val_0.991.h5', custom_objects = {'Time2Vector':Time2Vector})
best_model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 60, 4)]      0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 60, 5)]      0                                            
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 60, 4)        16          input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 60, 5)        20          input_2[0][0]                    
______________________________________________________________________________________________

In [14]:
X_test_price = [] 
X_test_volume = []

for j in tqdm(range(x_test.shape[0]), position = 0, leave = True): 
    for i in range(seq_len-K-N-120, seq_len-K-N):
        X_test_price.append(x_test[j, i:i+N, [1,2,3,4]])  
        X_test_volume.append(x_test[j, i:i+N, [5,6,7,8,9]])

X_test_price = np.asarray(X_test_price) 
X_test_volume = np.asarray(X_test_volume) 

X_test_price = X_test_price.reshape((-1,N,features_price)) 
X_test_volume = X_test_volume.reshape((-1,N,features_volume))

X_test_price.shape, X_test_volume.shape


100%|██████████| 535/535 [00:00<00:00, 787.34it/s]


((64200, 60, 4), (64200, 60, 5))

In [15]:
predicted = best_model.predict([X_test_price, X_test_volume]) 
predicted = predicted.reshape((-1,120)) 
predicted.shape

(535, 120)

In [16]:
x_test_open = x_test[:,:,1] 

## Adjust predicted prices to be connected with the buy price 
for i in tqdm(range(predicted.shape[0]), position = 0, leave = True):
    buy_price = x_test_open[i,-1] 
    if predicted[i,0] > buy_price:
        diff = predicted[i,0] - buy_price 
        predicted[i,:] -= diff 
    elif predicted[i,0] < buy_price:  
        diff = buy_price - predicted[i,0] 
        predicted[i,:] += diff


100%|██████████| 535/535 [00:00<00:00, 115086.30it/s]


In [17]:
buy_quantities = [] 
sell_times = [] 

for i in tqdm(range(predicted.shape[0]), position = 0, leave = True): 
    sell_time = np.argmax(predicted[i,:]) 
    sell_times.append(sell_time) 
    buy_price = x_test_open[i,-1] 
    cnt = 0 
    for j in range(120): 
        if predicted[i,j] >= buy_price:    
            cnt += 1 
    buy_quantity = cnt/120 
    buy_quantities.append(buy_quantity)
    
submission.iloc[:,1] = buy_quantities 
submission.iloc[:,2] = sell_times  
submission.to_csv('transformers_attention_full_features.csv',index=False)


100%|██████████| 535/535 [00:00<00:00, 3867.30it/s]


In [18]:
submission

Unnamed: 0,sample_id,buy_quantity,sell_time
0,7661,0.966667,105
1,7662,0.941667,34
2,7663,0.991667,73
3,7664,0.183333,15
4,7665,0.716667,97
...,...,...,...
530,8191,0.100000,10
531,8192,0.425000,2
532,8193,0.950000,71
533,8194,0.025000,18
