In [None]:
pip install tensorflow-gpu

In [1]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
!pip install yfinance

In [3]:
import pandas as pd
from collections import deque
import random
import numpy as np
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint
from scipy.stats import t
import time
from sklearn import preprocessing
import yfinance as yf
from datetime import date, datetime, timedelta
from tqdm import tqdm
from keras.regularizers import l2

#from keras.layers import BatchNormalization

In [4]:
#Classifying if the return in the future is positive or negative
def classify(future):
    if float(future) > 0:  # if the future price is higher than the current, that's a buy, or a 1
        return 1
    else:  # otherwise... it's a 0!
        return 0

In [5]:
#Return calculation from x days before current date where x is Lag
def ReturnCalculation (Database, Lag=1):
    dimension=Database.shape[0];Out=np.zeros([dimension-Lag])
    for i in range(Lag, dimension):
         Out[i - Lag] = (np.log(Database['Close'][i]) - np.log(Database['Close'][i - Lag]))
    return np.append(np.repeat(np.nan, Lag),Out), Database.index

In [6]:
# Standard Deviation Calculation of the past X days where X is the LagSD
def SDCalculation (DailyReturns, LagSD):
    dimension=DailyReturns.shape[0]; dif=LagSD; Out=np.zeros([dimension-dif])
    for i in range(dif, dimension):
        Out[i - dif]=np.std(DailyReturns[i-dif:i],ddof=1)
    return np.append(np.repeat(np.nan, dif),Out)

In [44]:
#Generating the same Database without FutureCalculation or Target
def DatabaseGenerationForecast_predict(Lag, IndexEndDays, LagSD, i, sequence_len):
    DatabaseT = yf.download("TSLA", start= IndexEndDays[i].date(), end=IndexEndDays[i + sequence_len].date(), progress=False)
    DatabaseT.dropna(inplace=True)
    DatabaseS = yf.download("SPY", start= IndexEndDays[i].date(), end=IndexEndDays[i + sequence_len].date(), progress=False)
    DatabaseS.dropna(inplace=True)
    DatabaseA = yf.download("AAPL", start= IndexEndDays[i].date(), end=IndexEndDays[i + sequence_len].date(), progress=False)
    DatabaseA.dropna(inplace=True)
    DatabaseV = yf.download("^VIX", start= IndexEndDays[i].date(), end=IndexEndDays[i + sequence_len].date(), progress=False)
    DatabaseV.dropna(inplace=True)
    DailyReturnsT, Index = ReturnCalculation(DatabaseT)
    DailyReturnsS, Index = ReturnCalculation(DatabaseS)
    DailyReturnsA, Index = ReturnCalculation(DatabaseA)
    DailyReturnsV, Index = ReturnCalculation(DatabaseV)
    SD = SDCalculation(DailyReturnsT, LagSD)
    Data = pd.DataFrame({'TSLA_Day': DailyReturnsT, 'TSLA_Volume': DatabaseT['Volume'], 'APPL_Day': DailyReturnsA, 'APPL_Volume': DatabaseA['Volume'], 'VIX_Day': DailyReturnsV,  'SD': SD, 'SPY_Day': DailyReturnsS, 'SPY_Volume': DatabaseS['Volume']})
    Data = Data.set_index(Index)
    return Data.dropna()

In [8]:
#It generates the database for fitting transformer. No positional encoding is needed as LSTM plays this role in the model structure
#It basically formulates sequences to input into the lstm and changes your data from [number_of_samples, number_of_features] to [number_of_samples, seq_length, number_of_features]
def Transformer_Database (Timestep, XData_AR, YData_AR):
    Features = XData_AR.shape[1]; Sample = XData_AR.shape[0]-Timestep+1
    XDataTrainScaledRNN=np.zeros([Sample, Timestep, Features]); YDataTrainRNN=np.zeros([Sample])
    for i in range(Sample):
        XDataTrainScaledRNN[i,:,:] = XData_AR[i:(Timestep+i)]
        YDataTrainRNN[i] = YData_AR[Timestep+i-1]
    return XDataTrainScaledRNN, YDataTrainRNN

In [None]:
Start='2020-11-14'
End='2022-08-26'
IndexEndDays = yf.download("SPY",start=Start,  end=End, progress=False).index
print(IndexEndDays[447])
Target = "TSLA"
Lag=5 #how many days in the future you want to predict
LagSD=8 #how many past days do you want to include in your standard deviation 
Batch_Size=75
Timestep=60 #how many days do you want include in one sequence you input into LSTM

In [None]:
#Prediction of Transformed LSTM Model of Up or Down in 5 days
model = keras.models.load_model('/content/drive/MyDrive/models_2_nonstateful')
ResultsCollection=pd.DataFrame({'Model Type': [], 'Date_Forecast': [], 'Forecast': []})
for i in tqdm(range(5)):
    #Database is downloaded from yahoo finance and lag of returns defined
    XData_Forecast = DatabaseGenerationForecast_predict(Lag, IndexEndDays, LagSD, i, 443)
    Index_Forecast = XData_Forecast.index[-1]
    Scaled_Norm = preprocessing.StandardScaler().fit(XData_Forecast) #StandardScaler() will normalize the features i.e. each column of X, INDIVIDUALLY, so that each column or feature will have μ = 0 and σ = 1
    XDataForecastTotalScaled = Scaled_Norm.transform(XData_Forecast)
    XDataForecastTotalScaled_T, Y_T = Transformer_Database(Timestep, XDataForecastTotalScaled, np.zeros(XDataForecastTotalScaled.shape[0]))
    #Model predicting if the next trading day will have a positive or negative return in 5 days
    TransformerPrediction = model.predict(XDataForecastTotalScaled_T, batch_size=Batch_Size)
    IterResults={'Model Type': 'NonStateful', 'Date_Forecast': Index_Forecast, 'Forecast' : TransformerPrediction[-1]}
    #print(history.history['loss'])
    ResultsCollection=ResultsCollection.append(IterResults, ignore_index=True)
    #Results are saved
    ResultsCollection.to_csv('/content/drive/MyDrive/Colab_Results/pred_nonstateful.csv',index=False)

In [14]:
Start='2020-10-27'
End='2022-08-24'
IndexEndDays = yf.download("SPY",start=Start,  end=End, progress=False).index
Target = "TSLA"
Lag=5 #how many days in the future you want to predict
LagSD=8 #how many past days do you want to include in your standard deviation 
DF=4; Timestep=45; Batch_Size=50

In [None]:
#Prediction of Transformed LSTM Model of Up or Down in 5 days
model = keras.models.load_model('/content/drive/MyDrive/models_1_stateful')
for i in tqdm(range(5)):
    #Database is downloaded from yahoo finance and lag of returns defined
    XData_Forecast = DatabaseGenerationForecast_predict(Lag, IndexEndDays, LagSD, i, 453)
    XDataForecast = XData_Forecast.iloc[1: , :]
    Index_Forecast = XDataForecast.index[-1]
    Scaled_Norm = preprocessing.StandardScaler().fit(XDataForecast) #StandardScaler() will normalize the features i.e. each column of X, INDIVIDUALLY, so that each column or feature will have μ = 0 and σ = 1
    XDataForecastTotalScaled = Scaled_Norm.transform(XDataForecast)
    XDataForecastTotalScaled_T, Y_T = Transformer_Database(Timestep, XDataForecastTotalScaled, np.zeros(XDataForecastTotalScaled.shape[0]))
    #Model predicting if the next trading day will have a positive or negative return in 5 days
    TransformerPrediction = model.predict(XDataForecastTotalScaled_T, batch_size=Batch_Size)
    IterResults={'Model Type': 'Stateful', 'Date_Forecast': Index_Forecast, 'Forecast' : TransformerPrediction[-1]}
    #print(history.history['loss'])
    ResultsCollection=ResultsCollection.append(IterResults, ignore_index=True)
    #Results are saved
    ResultsCollection.to_csv('/content/drive/MyDrive/Colab_Results/pred_nonstateful.csv',index=False)

In [38]:
Start='2020-11-20'
End='2022-08-24'
IndexEndDays = yf.download("SPY",start=Start,  end=End, progress=False).index
Target = "TSLA"
Lag=3 #how many days in the future you want to predict
LagSD=5 #how many past days do you want to include in your standard deviation 
Timestep=30; Batch_Size=50

In [None]:
#Prediction of Transformed LSTM Model of Up or Down in 5 days
model = keras.models.load_model('/content/drive/MyDrive/models_3_lag3')
for i in tqdm(range(5)):
    #Database is downloaded from yahoo finance and lag of returns defined
    XData_Forecast = DatabaseGenerationForecast_predict(Lag, IndexEndDays, LagSD, i, 435)
    XDataForecast = XData_Forecast.iloc[1: , :]
    Index_Forecast = XDataForecast.index[-1]
    Scaled_Norm = preprocessing.StandardScaler().fit(XDataForecast) #StandardScaler() will normalize the features i.e. each column of X, INDIVIDUALLY, so that each column or feature will have μ = 0 and σ = 1
    XDataForecastTotalScaled = Scaled_Norm.transform(XDataForecast)
    XDataForecastTotalScaled_T, Y_T = Transformer_Database(Timestep, XDataForecastTotalScaled, np.zeros(XDataForecastTotalScaled.shape[0]))
    #Model predicting if the next trading day will have a positive or negative return in 5 days
    TransformerPrediction = model.predict(XDataForecastTotalScaled_T, batch_size=Batch_Size)
    IterResults={'Model Type': 'Lag3', 'Date_Forecast': Index_Forecast, 'Forecast' : TransformerPrediction[-1]}
    #print(history.history['loss'])
    ResultsCollection=ResultsCollection.append(IterResults, ignore_index=True)
    #Results are saved
    ResultsCollection.to_csv('/content/drive/MyDrive/Colab_Results/pred_nonstateful.csv',index=False)

In [None]:
# Train model
print(model.summary())