In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf 
import plotly.graph_objects as go
import sqlite3 

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM,Dense, Dropout, BatchNormalization
from keras.preprocessing.sequence import TimeseriesGenerator

%matplotlib inline  


In [4]:
database_path = "./stocktrx.db"
# Create your connection.
cnx = sqlite3.connect(database_path)
df = pd.read_sql_query("SELECT * FROM stocks", cnx)
df.head()

Unnamed: 0,TrxDate,Ticker,Open,High,Low,Close,AdjClose,Volume
0,2017-07-19 00:00:00,AAPL,37.619999,37.855,37.487499,37.755001,35.945583,83692000.0
1,2017-07-19 00:00:00,BA,209.429993,210.929993,208.990005,210.880005,198.707397,2258400.0
2,2017-07-19 00:00:00,DAL,53.77,54.34,53.099998,53.790001,50.100189,10733000.0
3,2017-07-19 00:00:00,GE,25.817308,25.990385,25.807692,25.903847,24.509535,25784096.0
4,2017-07-19 00:00:00,GOOG,967.840027,973.039978,964.030029,970.890015,970.890015,1224500.0


In [5]:
# Rename TrxDate column to Date
df = df.rename(columns={'TrxDate': 'Date'})
df.head()

Unnamed: 0,Date,Ticker,Open,High,Low,Close,AdjClose,Volume
0,2017-07-19 00:00:00,AAPL,37.619999,37.855,37.487499,37.755001,35.945583,83692000.0
1,2017-07-19 00:00:00,BA,209.429993,210.929993,208.990005,210.880005,198.707397,2258400.0
2,2017-07-19 00:00:00,DAL,53.77,54.34,53.099998,53.790001,50.100189,10733000.0
3,2017-07-19 00:00:00,GE,25.817308,25.990385,25.807692,25.903847,24.509535,25784096.0
4,2017-07-19 00:00:00,GOOG,967.840027,973.039978,964.030029,970.890015,970.890015,1224500.0


In [6]:
#GE is the tf (test)
tf = df.loc[df["Ticker"] == 'GE']
tf = tf.drop(columns=[ 'Ticker', 'Open', 'High', 'Low', 'AdjClose', 'Volume'])
tf.head()

Unnamed: 0,Date,Close
3,2017-07-19 00:00:00,25.903847
10,2017-07-20 00:00:00,25.663462
17,2017-07-21 00:00:00,24.913462
24,2017-07-24 00:00:00,24.451923
31,2017-07-25 00:00:00,24.461538


In [8]:
# Leave only NONE-GE stock in df.
df = df.loc[df.index[df["Ticker"] != 'GE']]
df.head()


Unnamed: 0,Date,Ticker,Open,High,Low,Close,AdjClose,Volume
0,2017-07-19 00:00:00,AAPL,37.619999,37.855,37.487499,37.755001,35.945583,83692000.0
1,2017-07-19 00:00:00,BA,209.429993,210.929993,208.990005,210.880005,198.707397,2258400.0
2,2017-07-19 00:00:00,DAL,53.77,54.34,53.099998,53.790001,50.100189,10733000.0
4,2017-07-19 00:00:00,GOOG,967.840027,973.039978,964.030029,970.890015,970.890015,1224500.0
5,2017-07-19 00:00:00,IBM,150.020004,150.25,146.710007,147.529999,122.362503,14293600.0


In [9]:
# drop columns we wont use
df = df.drop(columns=[ 'Ticker', 'Open', 'High', 'Low', 'AdjClose', 'Volume'])
df.head()

Unnamed: 0,Date,Close
0,2017-07-19 00:00:00,37.755001
1,2017-07-19 00:00:00,210.880005
2,2017-07-19 00:00:00,53.790001
4,2017-07-19 00:00:00,970.890015
5,2017-07-19 00:00:00,147.529999


In [10]:
# other stocks close and date
close_data = df['Close'].values
close_data = close_data.reshape((-1,1))

test_data = tf['Close'].values
test_data = test_data.reshape((-1,1))

split_percent = 0.80
split = int(split_percent*len(close_data))

close_train = close_data
close_test = test_data

date_train = df['Date']
date_test = tf['Date']

print(len(close_train))
print(len(close_test))


6000
1000


In [11]:
look_back = 30 # looking back 30 days

# look_back = 4
train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size=40)     
test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size=1)

In [12]:
# Create the model
model = Sequential()
# add to model: LTSM algo
model.add(
    LSTM(10,
        activation='relu',
        input_shape=(look_back,1))
)
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# initialize the number of epochs we are going to train our data for
num_epochs = 30
# use fit_generator to train the data (may use .fit for 2.2.0 or higher versions of tf - works the same)
model.fit_generator(train_generator, epochs=num_epochs, verbose=1)
# model.fit(train_generator, epochs=num_epochs, verbose=1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f86d07330b8>

In [33]:
print(model)

<tensorflow.python.keras.engine.sequential.Sequential object at 0x7fefaa7953c8>


In [13]:
#Test
prediction = model.predict_generator(test_generator)

close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = prediction.reshape((-1))

trace1 = go.Scatter(
    x = date_train,
    y = close_train,
    mode = 'lines',
    name = 'Data'
)
trace2 = go.Scatter(
    x = date_test,
    y = prediction,
    mode = 'lines',
    name = 'Prediction'
)
trace3 = go.Scatter(
    x = date_test,
    y = close_test,
    mode='lines',
    name = 'Actual'
)
layout = go.Layout(
    title = "GE Stock",
    xaxis = {'title' : "Transaction Date"},
    yaxis = {'title' : "Close"}
)
fig = go.Figure(data=[trace2, trace3], layout=layout)
fig.show()

In [14]:
# close_data = close_data.reshape((-1))

def predict(num_prediction, model):
    prediction_list = close_data[-look_back:]
    
    for _ in range(num_prediction):
        x = prediction_list[-look_back:]
        x = x.reshape((1, look_back, 1))
        out = model.predict(x)[0][0]
        prediction_list = np.append(prediction_list, out)
    prediction_list = prediction_list[look_back-1:]
        
    return prediction_list
    
def predict_dates(num_prediction):
    # last_date = df['Date'].values[-1]
    last_date = df['Date'].values[-1]
    prediction_dates = pd.date_range(last_date, periods=num_prediction+1).tolist()
    return prediction_dates

num_prediction = 4
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)
print(forecast_dates)

[Timestamp('2021-07-08 00:00:00', freq='D'), Timestamp('2021-07-09 00:00:00', freq='D'), Timestamp('2021-07-10 00:00:00', freq='D'), Timestamp('2021-07-11 00:00:00', freq='D'), Timestamp('2021-07-12 00:00:00', freq='D')]


In [15]:

def predict(num_prediction, model):
    prediction_list = test_data[-look_back:]
    
    for _ in range(num_prediction):
        x = prediction_list[-look_back:]
        x = x.reshape((1, look_back, 1))
        out = model.predict(x)[0][0]
        prediction_list = np.append(prediction_list, out)
    prediction_list = prediction_list[look_back-1:]
        
    return prediction_list
    
def predict_dates(num_prediction):
    last_date = tf['Date'].values[-1]
    prediction_dates = pd.date_range(last_date, periods=num_prediction+1).tolist()
    return prediction_dates

num_prediction = 30
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)
print(forecast_dates)

[Timestamp('2021-07-08 00:00:00', freq='D'), Timestamp('2021-07-09 00:00:00', freq='D'), Timestamp('2021-07-10 00:00:00', freq='D'), Timestamp('2021-07-11 00:00:00', freq='D'), Timestamp('2021-07-12 00:00:00', freq='D'), Timestamp('2021-07-13 00:00:00', freq='D'), Timestamp('2021-07-14 00:00:00', freq='D'), Timestamp('2021-07-15 00:00:00', freq='D'), Timestamp('2021-07-16 00:00:00', freq='D'), Timestamp('2021-07-17 00:00:00', freq='D'), Timestamp('2021-07-18 00:00:00', freq='D'), Timestamp('2021-07-19 00:00:00', freq='D'), Timestamp('2021-07-20 00:00:00', freq='D'), Timestamp('2021-07-21 00:00:00', freq='D'), Timestamp('2021-07-22 00:00:00', freq='D'), Timestamp('2021-07-23 00:00:00', freq='D'), Timestamp('2021-07-24 00:00:00', freq='D'), Timestamp('2021-07-25 00:00:00', freq='D'), Timestamp('2021-07-26 00:00:00', freq='D'), Timestamp('2021-07-27 00:00:00', freq='D'), Timestamp('2021-07-28 00:00:00', freq='D'), Timestamp('2021-07-29 00:00:00', freq='D'), Timestamp('2021-07-30 00:00:00'

In [16]:
trace1 = go.Scatter(
    x = date_test,
    y = close_test,
    mode = 'lines',
    name = 'Data'
)
trace2 = go.Scatter(
    x = forecast_dates,
    y = forecast,
    mode = 'lines',
    name = 'Prediction'
)

layout = go.Layout(
    title = "GE Stock",
    xaxis = {'title' : "Date"},
    yaxis = {'title' : "Close"}
)
pfig = go.Figure(data=[trace1, trace2], layout=layout)
pfig.show()

In [17]:
# close db connection
cnx.close()
# END