Attention: All the hyperparamters will be kept hidden, but the structure of the model can be observed. If you see {x}, that is a hyperparamter. This is because of all the time and money used to train and test the models.

In [None]:
pip install tensorflow-gpu

In [None]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
!pip install yfinance

In [None]:
import pandas as pd
from collections import deque
import random
import numpy as np
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint
from scipy.stats import t
import time
from sklearn import preprocessing
import yfinance as yf
from datetime import date, datetime, timedelta
from tqdm import tqdm
from keras.regularizers import l2
#from keras.layers import BatchNormalization

In [None]:
Start='2010-09-02'
End='2022-08-10'
IndexEndDays = yf.download("TSLA",start=Start,  end=End, progress=False).index
Lag=3 #how many days in the future you want to predict
LagSD=5 #how many past days do you want to include in your standard deviation 
Dropout={x}
LearningRate={x}
Epochs={x}; Alpha={x}; Timestep={x}; Batch_Size={x}

In [None]:
#Classifying if the return in the future is positive or negative
def classify(future):
    if float(future) > 0:  # if the future price is higher than the current, that's a buy, or a 1
        return 1
    else:  # otherwise... it's a 0!
        return 0

In [None]:
#Return calculation from x days before current date where x is Lag
def Daily-Return (Database, Lag=1):
    dimension=Database.shape[0];Out=np.zeros([dimension-Lag])
    for i in range(Lag, dimension):
         Out[i - Lag] = (np.log(Database['Close'][i]) - np.log(Database['Close'][i - Lag]))
    return np.append(np.repeat(np.nan, Lag),Out), Database.index

In [None]:
# Standard Deviation Calculation of the past X days where X is the LagSD
def STD (DailyReturns, LagSD):
    dimension=DailyReturns.shape[0]; dif=LagSD; Out=np.zeros([dimension-dif])
    for i in range(dif, dimension):
        Out[i - dif]=np.std(DailyReturns[i-dif:i],ddof=1)
    return np.append(np.repeat(np.nan, dif),Out)

In [None]:
#Calculates the closing return X days in the future where X is Lag
def Future-Return (Database, Lag):
    dimension=Database.shape[0];Out=np.zeros([dimension-Lag])
    for i in range(dimension-Lag):
         Out[i] = (np.log(Database['Close'][i + Lag]) - np.log(Database['Close'][i]))
    return np.append(Out,np.repeat(np.nan, Lag)), Database.index

In [None]:
#Generates Database for 438 trading days of TSLA, SPY, APPL, and VIX closing returns and volume, and also standard deviation
def DataCreation (Lag, IndexEndDays, LagSD, i):
    DatabaseT = yf.download("TSLA",start= IndexEndDays[i].date(), end=IndexEndDays[i + 438].date(), progress=False)
    DatabaseT.dropna(inplace=True)
    DatabaseS = yf.download("SPY", start=IndexEndDays[i].date(), end=IndexEndDays[i + 438].date(),
                           progress=False)
    DatabaseS.dropna(inplace=True)
    DatabaseA = yf.download("AAPL", start=IndexEndDays[i].date(), end=IndexEndDays[i + 438].date(),
                           progress=False)
    DatabaseA.dropna(inplace=True)
    DatabaseV = yf.download("^VIX", start=IndexEndDays[i].date(), end=IndexEndDays[i + 438].date(),
                           progress=False)
    DatabaseV.dropna(inplace=True)
    DailyReturnsOld, Index = Future-Return(DatabaseT, Lag)
    DailyReturnsT, Index = Daily-Return(DatabaseT)
    DailyReturnsS, Index = Daily-Return(DatabaseS)
    DailyReturnsA, Index = Daily-Return(DatabaseA)
    DailyReturnsV, Index = Daily-Return(DatabaseV)
    SD = STD(DailyReturnsT, LagSD)
    Data = pd.DataFrame({'TSLA_Day': DailyReturnsT, 'TSLA_Volume': DatabaseT['Volume'], 'APPL_Day': DailyReturnsA, 'APPL_Volume': DatabaseA['Volume'], 'VIX_Day': DailyReturnsV,  'SD': SD, 'SPY_Day': DailyReturnsS, 'SPY_Volume': DatabaseS['Volume'], 'DailyReturnsOld': DailyReturnsOld})
    Data = Data.set_index(Index)
    Data.dropna(inplace=True)
    Data['Target'] = list(map(classify, Data['DailyReturnsOld']))
    Data = Data.drop("DailyReturnsOld", 1)
    return Data.dropna()

In [None]:
#Generating the same Database without Future-Return or Target
def Forecast(Lag, IndexEndDays, LagSD, i):
    DatabaseT = yf.download("TSLA", start= IndexEndDays[i].date(), end=IndexEndDays[i + 438].date(), progress=False)
    DatabaseT.dropna(inplace=True)
    DatabaseS = yf.download("SPY", start= IndexEndDays[i].date(), end=IndexEndDays[i + 438].date(), progress=False)
    DatabaseS.dropna(inplace=True)
    DatabaseA = yf.download("AAPL", start= IndexEndDays[i].date(), end=IndexEndDays[i + 438].date(), progress=False)
    DatabaseA.dropna(inplace=True)
    DatabaseV = yf.download("^VIX", start= IndexEndDays[i].date(), end=IndexEndDays[i + 438].date(), progress=False)
    DatabaseV.dropna(inplace=True)
    DailyReturnsT, Index = Daily-Return(DatabaseT)
    DailyReturnsS, Index = Daily-Return(DatabaseS)
    DailyReturnsA, Index = Daily-Return(DatabaseA)
    DailyReturnsV, Index = Daily-Return(DatabaseV)
    SD = STD(DailyReturnsT, LagSD)
    Data = pd.DataFrame({'TSLA_Day': DailyReturnsT, 'TSLA_Volume': DatabaseT['Volume'], 'APPL_Day': DailyReturnsA, 'APPL_Volume': DatabaseA['Volume'], 'VIX_Day': DailyReturnsV,  'SD': SD, 'SPY_Day': DailyReturnsS, 'SPY_Volume': DatabaseS['Volume']})
    Data = Data.set_index(Index)
    return Data

In [None]:
#It generates the database for fitting transformer. No positional encoding is needed as LSTM plays this role in the model structure
#It basically formulates sequences to input into the lstm and changes your data from [number_of_samples, number_of_features] to [number_of_samples, seq_length, number_of_features]
def ForecastLSTM_Formatter (Timestep, XData_AR, YData_AR):
    Features = XData_AR.shape[1]; Sample = XData_AR.shape[0]-Timestep+1
    XDataTrainScaledRNN=np.zeros([Sample, Timestep, Features]); YDataTrainRNN=np.zeros([Sample])
    for i in range(Sample):
        XDataTrainScaledRNN[i,:,:] = XData_AR[i:(Timestep+i)]
        YDataTrainRNN[i] = YData_AR[Timestep+i-1]
    return XDataTrainScaledRNN, YDataTrainRNN

The Following Classes, MultiHeadSelfAttention and TransformerBlock are taken from https://arxiv.org/pdf/2109.12621.pdf

In [None]:
class MultiHeadSelfAttention(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}")
        self.projection_dim = embed_dim // num_heads
        self.query_dense = tf.keras.layers.Dense(embed_dim)
        self.key_dense = tf.keras.layers.Dense(embed_dim)
        self.value_dense = tf.keras.layers.Dense(embed_dim)
        self.combine_heads = tf.keras.layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(query, batch_size)  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(key, batch_size)  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(value, batch_size)  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(concat_attention)  # (batch_size, seq_len, embed_dim)
        return output

In [None]:
# Transformer Keras Block
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        # self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.nb_dict = {};
        self.Bagging = {x}
        #check this bagging too
        for i in range(self.Bagging):
            self.nb_dict["att{0}".format(i)] = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = tf.keras.Sequential(
            [tf.keras.layers.Dense(ff_dim, activation="relu"), tf.keras.layers.Dense(embed_dim), ])
        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)

    def call(self, inputs, training):
        self.att_dict = {}
        for i in range(self.Bagging):
            self.att_dict["att{0}".format(i)] = self.nb_dict["att{0}".format(i)](tf.keras.layers.Dropout(.1)(inputs))
            if i == 0:
                self.att_dict["attn_output"] = self.att_dict["att{0}".format(i)] / self.Bagging
            else:
                self.att_dict["attn_output"] = self.att_dict["attn_output"] + self.att_dict[
                    "att{0}".format(i)] / self.Bagging
        attn_output = self.dropout1(self.att_dict["attn_output"], training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [None]:
def Transformer_Model(Shape1, Shape2, HeadsAttention, Dropout, LearningRate):
    # Model struture is defined
    Input = tf.keras.Input(shape=(Shape1, Shape2), name="Input")
    # LSTM is applied on top of the transformer
    X = tf.keras.layers.LSTM(units={x}, activation='tanh', dropout=Dropout, recurrent_dropout={x}, kernel_regularizer=l2({x}), recurrent_regularizer=l2({x}), bias_regularizer=l2({x}), return_sequences=True)(Input)
    X = tf.keras.layers.LSTM(units={x}, activation='tanh', dropout=Dropout, recurrent_dropout={x}, kernel_regularizer=l2({x}), recurrent_regularizer=l2({x}), bias_regularizer=l2({x}), return_sequences=True)(Input)
    # Tranformer architecture is implemented
    transformer_block_1 = TransformerBlock(embed_dim=32, num_heads=HeadsAttention, ff_dim=8, rate=Dropout)
    X = transformer_block_1(X)
    # Dense layers are used
    # X = BatchNormalization()(X)
    X = tf.keras.layers.GlobalAveragePooling1D()(X)
    X = tf.keras.layers.Dense({x}, activation=tf.nn.sigmoid)(X)
    X = tf.keras.layers.Dropout(Dropout)(X)
    Output = tf.keras.layers.Dense({x}, activation='sigmoid', name="Output")(X)
    #Dense layers are used
    model = tf.keras.Model(inputs=Input, outputs=Output)
    # Optimizer is defined
    Opt = tf.keras.optimizers.Nadam(learning_rate=LearningRate, beta_1={x}, beta_2={x}, epsilon={x}, name='Nadam')
    # Model is compiled
    model.compile(optimizer=Opt, loss='binary_crossentropy', metrics=['accuracy'])
    return model
    


In [None]:
#Fitting of Transformed LSTM Model of Up or Down in 5 days
model = Transformer_Model(Timestep, 8, HeadsAttention=8, Dropout=0.1, LearningRate=LearningRate)
ResultsCollection=pd.DataFrame({'Date_Forecast': [], 'Forecast': [], 'Loss': []})
#model = keras.models.load_model('example_model_path')
#ResultsCollection = pd.read_csv ('example_csv_path')

for i in tqdm(range(2566)):
    #Database is downloaded from yahoo finance and lag of returns defined
    Data = DataCreation (Lag, IndexEndDays, LagSD, i)
    XData_AR = Data.drop(Data.columns[[8]], axis=1)
    YData_AR = Data['Target']
    Scaled_Norm = preprocessing.StandardScaler().fit(XData_AR) #StandardScaler() will normalize the features i.e. each column of X, INDIVIDUALLY, so that each column or feature will have μ = 0 and σ = 1
    XData_AR_Norm = Scaled_Norm.transform(XData_AR)
    XData_AR_Norm_T, YData_AR_Norm_T = ForecastLSTM_Formatter(Timestep, XData_AR_Norm, YData_AR)
    #Model predicting on Past Test data if the return in 3 days positive or negative 
    callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience={x}) #epoch dynamically changes and each training iteration 'stops early' when the loss from epoch to epoch increases instead of decreasing x times
    history = model.fit(XData_AR_Norm_T, YData_AR_Norm_T, epochs=Epochs, batch_size=Batch_Size, callbacks=[callback], verbose=0)
    model.save('example_model_path') 
    #Adding the next trading day without doing Future-Return on it so the model doesn't know if its 1 or 0
    XData_Forecast = Forecast(Lag, IndexEndDays, LagSD, i)
    Index_Forecast = XData_Forecast.index[(-Lag)]
    XData_Forecast = XData_Forecast.iloc[[(-Lag)]]
    XDataForecast = pd.concat([XData_AR,XData_Forecast])
    XDataForecast = XDataForecast.iloc[1: , :]
    XDataForecastTotalScaled = Scaled_Norm.transform(XDataForecast)
    XDataForecastTotalScaled_T, Y_T = ForecastLSTM_Formatter(Timestep, XDataForecastTotalScaled, np.zeros(XDataForecastTotalScaled.shape[0]))
    #Model predicting if the next trading day will have a positive or negative return in 3 days
    TransformerPrediction = model.predict(XDataForecastTotalScaled_T, batch_size=Batch_Size)
    IterResults={'Date_Forecast': Index_Forecast, 'Forecast' : TransformerPrediction[-1], 'Loss': history.history['loss'] }
    print(history.history['loss'])
    ResultsCollection=ResultsCollection.append(IterResults, ignore_index=True)
    #Results are saved
    ResultsCollection.to_csv('example_csv_path',index=False)
