In [1]:
!pip install yfinance
!pip install chart_studio
import pandas as pd
import numpy as np
import keras
import tensorflow as tf
from keras.preprocessing.sequence import TimeseriesGenerator
import yfinance as yf
import chart_studio.plotly as py
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

Collecting yfinance
  Downloading yfinance-0.1.54.tar.gz (19 kB)
Collecting multitasking>=0.0.7
  Downloading multitasking-0.0.9.tar.gz (8.1 kB)
Building wheels for collected packages: yfinance, multitasking
  Building wheel for yfinance (setup.py) ... [?25ldone
[?25h  Created wheel for yfinance: filename=yfinance-0.1.54-py2.py3-none-any.whl size=22409 sha256=b044a22fbe3c9f02b839bdf0bdb5ec91fcbc221ece45511a48f05cf9ae9555fe
  Stored in directory: /root/.cache/pip/wheels/6f/ad/f4/4a269deab015672fd1ab353d6b2c3fcf64f413980737c13541
  Building wheel for multitasking (setup.py) ... [?25ldone
[?25h  Created wheel for multitasking: filename=multitasking-0.0.9-py3-none-any.whl size=8366 sha256=47ab27418fe7ece4aa527034e8901a85915890ca74b8c85f51f3eb7e098d00d7
  Stored in directory: /root/.cache/pip/wheels/5e/8a/c4/59c699498647c7c94b14c87a904ca7540646107b3d94b7c320
Successfully built yfinance multitasking
Installing collected packages: multitasking, yfinance
Successfully installed multitasking

Using TensorFlow backend.


In [2]:
!ls ../input/GLD.csv

../input/GLD.csv


In [3]:
filename = "../input/GLD.csv"
df = pd.read_csv(filename)
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2517 entries, 0 to 2516
Data columns (total 7 columns):
Date         2517 non-null object
Open         2517 non-null float64
High         2517 non-null float64
Low          2517 non-null float64
Close        2517 non-null float64
Adj Close    2517 non-null float64
Volume       2517 non-null int64
dtypes: float64(5), int64(1), object(1)
memory usage: 137.8+ KB
None


In [4]:
df['Date'] = pd.to_datetime(df['Date'])
df.set_axis(df['Date'], inplace=True)
df.drop(columns=['Open', 'High', 'Low', 'Volume'], inplace=True)

In [5]:
raw_seq = df['Close'].values
n_features = raw_seq.ndim
data_size = len(raw_seq)

print("n_features:", n_features) 
print("data_size:", data_size)

n_features: 1
data_size: 2517


In [6]:
close_data = df['Close'].values
close_data = close_data.reshape((-1,1))

split_percent = 0.80 #80% training 
split = int(split_percent*len(close_data))

close_train = close_data[:split]
close_test = close_data[split:]

date_train = df['Date'][:split]
date_test = df['Date'][split:]

print(len(close_train))
print(len(close_test))

2013
504


In [7]:
look_back = 14
#14/Current gold market change the range of look back period
train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size=20)     
test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size=1)

In [8]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

model = Sequential()
model.add(
    LSTM(128,
        activation='relu',
        input_shape=(look_back,1))
)
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

model.summary()

num_epochs = 8
model.fit_generator(train_generator, epochs=num_epochs, verbose=1)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               66560     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 129       
Total params: 66,689
Trainable params: 66,689
Non-trainable params: 0
_________________________________________________________________


In [9]:
prediction = model.predict_generator(test_generator)

close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = prediction.reshape((-1))

trace1 = go.Scatter(
    x = date_train,
    y = close_train,
    mode = 'lines',
    name = 'Data'
)
trace2 = go.Scatter(
    x = date_test,
    y = prediction,
    mode = 'lines',
    name = 'Test Prediction'
)
trace3 = go.Scatter(
    x = date_test,
    y = close_test,
    mode='lines',
    name = 'Ground Truth'
)
layout = go.Layout(
    title = "Google Stock",
    xaxis = {'title' : "Date"},
    yaxis = {'title' : "Close"}
)
fig = go.Figure(data=[trace1, trace2, trace3], layout=layout)
fig.show()

In [10]:
close_data = close_data.reshape((-1))

def predict(num_prediction, model):
    prediction_list = close_data[-look_back:]
    
    for _ in range(num_prediction):
        x = prediction_list[-look_back:]
        x = x.reshape((1, look_back, 1))
        out = model.predict(x)[0][0]
        prediction_list = np.append(prediction_list, out)
    prediction_list = prediction_list[look_back-1:]
        
    return prediction_list
    
def predict_dates(num_prediction):
    last_date = df['Date'].values[-1]
    prediction_dates = pd.date_range(last_date, periods=num_prediction+1).tolist()
    return prediction_dates

num_prediction = 30
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)



In [11]:
#prediction = model.predict_generator(test_generator)

#close_train = close_train.reshape((-1))
#close_test = close_test.reshape((-1))
#prediction = prediction.reshape((-1))

trace1 = go.Scatter(
    x = forecast_dates,
    y = forecast,
    mode = 'lines',
    name = 'Future Prediction'
)
trace2 = go.Scatter(
    x = date_test,
    y = close_test,
    mode='lines',
    name = 'Ground Truth'
)
layout = go.Layout(
    title = "Gold ETF (GLD)",
    xaxis = {'title' : "Date"},
    yaxis = {'title' : "Close"}
)
fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()

In [12]:
trace1 = go.Scatter(
    x = forecast_dates,
    y = forecast,
    mode = 'lines',
    name = 'Prediction'
)
layout = go.Layout(
    title = "Gold ETF (GLD)",
    xaxis = {'title' : "Date"},
    yaxis = {'title' : "Close"}
)
fig = go.Figure(data=[trace1], layout=layout)
fig.show()