# Stock Price Prediction Transformer
> Transformer architecture utilized for learning and predicting stock price time series
> data, incorporating _Time2Vec_ vectorization of the time dimension.

#### Sources
1.  https://towardsdatascience.com/stock-predictions-with-state-of-the-art-transformer-and-time-embeddings-3a4485237de6

In [1]:
import sqlite3
import warnings
from datetime import datetime as dt

import numpy as np
import pandas as pd
import plotly.subplots
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import *

import marketml

warnings.filterwarnings("ignore")

[I 200715 23:34:18 __init__:15] Project directory: /Users/taras/Code/projects/MarketML


## Hyperparameters

In [2]:
train_size = 0.5
test_size = 0.5

batch_size = 32
sequence_length = 10
num_heads = 18

## Load Data

In [5]:
db_path = marketml.project_dir.joinpath("data/db/stocks.sqlite")
connection = sqlite3.connect(str(db_path))
df = pd.read_sql_query(
    'SELECT * FROM prices WHERE ticker="AAPL"',
    connection,
    index_col="date",
    parse_dates=True
)

df.drop(columns=["ticker", "dividend_amount", "split_coefficient"], inplace=True)
df["volume"].replace(to_replace=0, method="ffill", inplace=True)
df.sort_index(inplace=True)
df.head(10)

Unnamed: 0_level_0,open,high,low,close,adjusted_close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-03-23,142.0,150.38,140.0,141.31,4.3802,5022900
2000-03-24,142.44,143.94,135.5,138.69,4.299,3990300
2000-03-27,137.63,144.75,136.88,139.56,4.3259,2492700
2000-03-28,137.25,142.0,137.13,139.13,4.3126,1812200
2000-03-29,139.38,139.44,133.83,135.94,4.2137,2141400
2000-03-30,133.56,137.69,125.44,125.75,3.8979,3700000
2000-03-31,127.44,137.25,126.0,135.81,4.2097,3612800
2000-04-03,135.5,139.5,129.44,133.31,4.1322,2933600
2000-04-04,132.63,133.0,116.75,127.31,3.9462,5895800
2000-04-05,126.47,132.88,124.0,130.38,4.0414,4086300


In [6]:
figure = plotly.subplots.make_subplots(rows=2, cols=1)
figure.add_scatter(x=df.index, y=df["adjusted_close"], connectgaps=True, row=1, col=1)
figure.add_scatter(x=df.index, y=df["volume"], connectgaps=True, row=2, col=1)
figure.update_layout(title="AAPL Close Price and Trading Volume")

In [7]:
columns = df.columns
for column in columns:
    df[column] = df[column].pct_change()
df.dropna(how="any", axis=0, inplace=True)

In [8]:
price_columns = columns.to_list()
price_columns.pop(-1)
min_return = min(df[price_columns].min(axis=0))
max_return = max(df[price_columns].max(axis=0))
delta_return = max_return-min_return

min_volume = df["volume"].min(axis=0)
max_volume = df["volume"].max(axis=0)
delta_volume = max_volume-min_volume

In [9]:
for column in price_columns:
    df[column] = (df[column] - min_return) / delta_return
df["volume"] = (df["volume"] - min_volume) / delta_volume

In [10]:
length = len(list(sorted(df.index.values)))
train_df = df.iloc[:int(length * train_size)]
intermediate_df = df.iloc[int(length * train_size):]
test_df = intermediate_df.iloc[:int(len(intermediate_df) * test_size)]
valid_df = intermediate_df.iloc[int(len(intermediate_df) * test_size):]

In [11]:
# Convert pandas columns into arrays
train_data = train_df.values
valid_data = valid_df.values
test_data = test_df.values
print('Training data shape: {}'.format(train_data.shape))
print('Validation data shape: {}'.format(valid_data.shape))
print('Test data shape: {}'.format(test_data.shape))

train_df.head()

Training data shape: (2515, 6)
Validation data shape: (1258, 6)
Test data shape: (1258, 6)


Unnamed: 0_level_0,open,high,low,close,adjusted_close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-03-24,0.776366,0.734944,0.744579,0.756848,0.75685,0.04661
2000-03-27,0.743113,0.778647,0.782758,0.77923,0.779215,0.033357
2000-03-28,0.771081,0.756435,0.775219,0.770792,0.770798,0.041346
2000-03-29,0.787569,0.75731,0.751865,0.752891,0.752886,0.076848
2000-03-30,0.735908,0.762251,0.717025,0.705959,0.705971,0.119498


In [12]:
def get_model_inputs(data: np.ndarray):
    x, y = [], []
    for i in range(sequence_length, len(data)):
      x.append(train_data[i-sequence_length:i]) # Chunks of training data with a length of 128 df-rows
      y.append(train_data[:, 3][i]) #Value of 4th column (Close Price) of df-row 128+1
    return np.array(x), np.array(y)

X_train, y_train = get_model_inputs(train_data)
X_test, y_test = get_model_inputs(test_data)
X_valid, y_valid = get_model_inputs(valid_data)

In [13]:
class Time2Vector(Layer):
  def __init__(self, vector_length):
    super(Time2Vector, self).__init__()
    self.vector_length = vector_length

  def build(self, input_shape):
    """Initialize weights and biases with shape (batch, vector_length)"""
    self.weights_linear = self.add_weight(name='weight_linear',
                                shape=(int(self.vector_length),),
                                initializer='uniform',
                                trainable=True)

    self.bias_linear = self.add_weight(name='bias_linear',
                                shape=(int(self.vector_length),),
                                initializer='uniform',
                                trainable=True)

    self.weights_periodic = self.add_weight(name='weight_periodic',
                                shape=(int(self.vector_length),),
                                initializer='uniform',
                                trainable=True)

    self.bias_periodic = self.add_weight(name='bias_periodic',
                                shape=(int(self.vector_length),),
                                initializer='uniform',
                                trainable=True)

  def call(self, x, **kwargs):
    """Calculate linear and periodic time features."""
    x = tf.math.reduce_mean(x[:,:,:4], axis=-1)
    time_linear = self.weights_linear * x + self.bias_linear # Linear time feature
    time_linear = tf.expand_dims(time_linear, axis=-1) # Add dimension (batch, sequence_length, 1)

    time_periodic = tf.math.sin(tf.multiply(x, self.weights_periodic) + self.bias_periodic)
    time_periodic = tf.expand_dims(time_periodic, axis=-1) # Add dimension (batch, sequence_length, 1)
    return tf.concat([time_linear, time_periodic], axis=-1) # shape = (batch, sequence_length, 2)

  def get_config(self): # Needed for saving and loading model with custom layer
    config = super().get_config().copy()
    config.update({'sequence_length': self.vector_length})
    return config

In [14]:
class SingleAttention(Layer):
  def __init__(self, d_k, d_v):
    super(SingleAttention, self).__init__()
    self.d_k = d_k
    self.d_v = d_v

  def build(self, input_shape):
    self.query = Dense(self.d_k,
                       input_shape=input_shape,
                       kernel_initializer='glorot_uniform',
                       bias_initializer='glorot_uniform')

    self.key = Dense(self.d_k,
                     input_shape=input_shape,
                     kernel_initializer='glorot_uniform',
                     bias_initializer='glorot_uniform')

    self.value = Dense(self.d_v,
                       input_shape=input_shape,
                       kernel_initializer='glorot_uniform',
                       bias_initializer='glorot_uniform')

  def call(self, inputs, **kwargs): # inputs = (in_seq, in_seq, in_seq)
    q = self.query(inputs[0])
    k = self.key(inputs[1])

    attn_weights = tf.matmul(q, k, transpose_b=True)
    attn_weights = tf.map_fn(lambda x: x/np.sqrt(self.d_k), attn_weights)
    attn_weights = tf.nn.softmax(attn_weights, axis=-1)

    v = self.value(inputs[2])
    attn_out = tf.matmul(attn_weights, v)
    return attn_out

In [15]:
class MultiAttention(Layer):
  def __init__(self, d_k, d_v, n_heads):
    super(MultiAttention, self).__init__()
    self.d_k = d_k
    self.d_v = d_v
    self.n_heads = n_heads
    self.attn_heads = list()

  def build(self, input_shape):
    for n in range(self.n_heads):
      self.attn_heads.append(SingleAttention(self.d_k, self.d_v))

    # input_shape[0]=(batch, sequence_length, 7), input_shape[0][-1]=7
    self.linear = Dense(input_shape[0][-1],
                        input_shape=input_shape,
                        kernel_initializer='glorot_uniform',
                        bias_initializer='glorot_uniform')

  def call(self, inputs):
    attn = [self.attn_heads[i](inputs) for i in range(self.n_heads)]
    concat_attn = tf.concat(attn, axis=-1)
    multi_linear = self.linear(concat_attn)
    return multi_linear

#############################################################################

class TransformerEncoder(Layer):
  def __init__(self, d_k, d_v, n_heads, ff_dim, dropout=0.1, **kwargs):
    super(TransformerEncoder, self).__init__()
    self.d_k = d_k
    self.d_v = d_v
    self.n_heads = n_heads
    self.ff_dim = ff_dim
    self.attn_heads = list()
    self.dropout_rate = dropout

  def build(self, input_shape):
    self.attn_multi = MultiAttention(self.d_k, self.d_v, self.n_heads)
    self.attn_dropout = Dropout(self.dropout_rate)
    self.attn_normalize = LayerNormalization(input_shape=input_shape, epsilon=1e-6)

    self.ff_conv1D_1 = Conv1D(filters=self.ff_dim, kernel_size=1, activation='relu')
    # input_shape[0]=(batch, sequence_length, 7), input_shape[0][-1] = 7
    self.ff_conv1D_2 = Conv1D(filters=input_shape[0][-1], kernel_size=1)
    self.ff_dropout = Dropout(self.dropout_rate)
    self.ff_normalize = LayerNormalization(input_shape=input_shape, epsilon=1e-6)

  def call(self, inputs): # inputs = (in_seq, in_seq, in_seq)
    attn_layer = self.attn_multi(inputs)
    attn_layer = self.attn_dropout(attn_layer)
    attn_layer = self.attn_normalize(inputs[0] + attn_layer)

    ff_layer = self.ff_conv1D_1(attn_layer)
    ff_layer = self.ff_conv1D_2(ff_layer)
    ff_layer = self.ff_dropout(ff_layer)
    ff_layer = self.ff_normalize(inputs[0] + ff_layer)
    return ff_layer

  def get_config(self): # Needed for saving and loading model with custom layer
    config = super().get_config().copy()
    config.update({'d_k': self.d_k,
                   'd_v': self.d_v,
                   'n_heads': self.n_heads,
                   'ff_dim': self.ff_dim,
                   'attn_heads': self.attn_heads,
                   'dropout_rate': self.dropout_rate})
    return config

In [16]:
def create_model():
  '''Initialize time and transformer layers'''
  time_embedding = Time2Vector(sequence_length)
  attn_layer1 = TransformerEncoder(128, 128, num_heads, 128)
  attn_layer2 = TransformerEncoder(128, 128, num_heads, 128)
  attn_layer3 = TransformerEncoder(128, 128, num_heads, 128)

  '''Construct model'''
  in_seq = Input(shape=(sequence_length, 5))
  x = time_embedding(in_seq)
  x = Concatenate(axis=-1)([in_seq, x])
  x = attn_layer1((x, x, x))
  x = attn_layer2((x, x, x))
  x = attn_layer3((x, x, x))
  x = GlobalAveragePooling1D(data_format='channels_first')(x)
  x = Dropout(0.1)(x)
  x = Dense(64, activation='relu')(x)
  x = Dropout(0.1)(x)
  out = Dense(1, activation='linear')(x)

  model = Model(inputs=in_seq, outputs=out)
  model.compile(loss='mse', optimizer='adam', metrics=['mae', 'mape'])
  return model


In [17]:
model = create_model()
model.summary()

callback = tf.keras.callbacks.ModelCheckpoint('Transformer+TimeEmbedding.hdf5',
                                              monitor='val_loss',
                                              save_best_only=True, verbose=1)

history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    epochs=5,
                    callbacks=[callback],
                    validation_data=(X_valid, y_valid))



Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 10, 5)]      0                                            
__________________________________________________________________________________________________
time2_vector (Time2Vector)      (None, 10, 2)        40          input_1[0][0]                    
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 10, 7)        0           input_1[0][0]                    
                                                                 time2_vector[0][0]               
__________________________________________________________________________________________________
transformer_encoder (Transforme (None, 10, 7)        73386       concatenate[0][0]            

ValueError: in user code:

    /Users/taras/Code/virtual-environments/MarketML/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:571 train_function  *
        outputs = self.distribute_strategy.run(
    <ipython-input-15-3e67a7c4304a>:49 call  *
        attn_layer = self.attn_multi(inputs)
    <ipython-input-15-3e67a7c4304a>:20 call  *
        attn = [self.attn_heads[i](inputs) for i in range(self.n_heads)]
    <ipython-input-14-67fcf14149a4>:24 call  *
        q = self.query(inputs[0])
    /Users/taras/Code/virtual-environments/MarketML/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:885 __call__  **
        input_spec.assert_input_compatibility(self.input_spec, inputs,
    /Users/taras/Code/virtual-environments/MarketML/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py:212 assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer dense_1 is incompatible with the layer: expected axis -1 of input shape to have value 7 but received input with shape [None, 10, 8]


In [None]:
model = tf.keras.models.load_model('/content/Transformer+TimeEmbedding.hdf5',
                                   custom_objects={'Time2Vector': Time2Vector,
                                                   'SingleAttention': SingleAttention,
                                                   'MultiAttention': MultiAttention,
                                                   'TransformerEncoder': TransformerEncoder})


In [22]:
'''Calculate predictions and metrics'''

#Calculate predication for training, validation and test data
train_pred = model.predict(X_train)
valid_pred = model.predict(X_valid)
test_pred = model.predict(X_test)

#Print evaluation metrics for all datasets
train_eval = model.evaluate(X_train, y_train, verbose=0)
valid_eval = model.evaluate(X_valid, y_valid, verbose=0)
test_eval = model.evaluate(X_test, y_test, verbose=0)
print(' ')
print('Evaluation metrics')
print('Training Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(train_eval[0], train_eval[1], train_eval[2]))
print('Validation Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(valid_eval[0], valid_eval[1], valid_eval[2]))
print('Test Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(test_eval[0], test_eval[1], test_eval[2]))

 
Evaluation metrics
Training Data - Loss: 0.0008, MAE: 0.0168, MAPE: 9.8916
Validation Data - Loss: 0.0021, MAE: 0.0293, MAPE: 4.0979
Test Data - Loss: 0.0021, MAE: 0.0292, MAPE: 4.0982


In [None]:
figure = plotly.subplots.make_subplots(rows=3, cols=1)

figure.add_scatter(x=train_df.index, y=train_data[:, 3], connectgaps=True, opacity=0.5, row=1, col=1)
figure.add_scatter(x=np.arange(sequence_length, train_pred.shape[0]+sequence_length), y=train_pred, opacity=0.5, row=1, col=1)

figure.add_scatter(x=valid_df.index, y=valid_data[:, 3], connectgaps=True, opacity=0.5, row=2, col=1)
figure.add_scatter(x=np.arange(sequence_length, valid_pred.shape[0]+sequence_length), y=valid_pred, opacity=0.5, row=2, col=1)

figure.add_scatter(x=test_df.index, y=test_data[:, 3], connectgaps=True, opacity=0.5, row=3, col=1)
figure.add_scatter(x=np.arange(sequence_length, test_pred.shape[0]+sequence_length), y=test_pred, opacity=0.5, row=3, col=1)

