In [2]:
pip install tensorflow==2.0

Collecting tensorflow==2.0
[?25l  Downloading https://files.pythonhosted.org/packages/46/0f/7bd55361168bb32796b360ad15a25de6966c9c1beb58a8e30c01c8279862/tensorflow-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl (86.3MB)
[K     |████████████████████████████████| 86.3MB 74kB/s 
Collecting tensorflow-estimator<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/fc/08/8b927337b7019c374719145d1dceba21a8bb909b93b1ad6f8fb7d22c1ca1/tensorflow_estimator-2.0.1-py2.py3-none-any.whl (449kB)
[K     |████████████████████████████████| 450kB 54.4MB/s 
Collecting tensorboard<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/76/54/99b9d5d52d5cb732f099baaaf7740403e83fe6b0cedde940fabd2b13d75a/tensorboard-2.0.2-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 36.8MB/s 
Installing collected packages: tensorflow-estimator, tensorboard, tensorflow
  Found existing installation: tensorflow-estimator 1.15.1
    Uninstalling tensorflow-estimato

In [1]:
import tensorflow 
print(tensorflow.__version__)

2.0.0


In [0]:
pip install yahoo_fin

Collecting yahoo_fin
  Downloading https://files.pythonhosted.org/packages/fe/bd/27f0066d596c87817b7d8f4a3533fdb666b1649007daee1965751adf07e8/yahoo_fin-0.8.4-py3-none-any.whl
Installing collected packages: yahoo-fin
Successfully installed yahoo-fin-0.8.4


In [0]:
pip install requests_html

Collecting requests_html
  Downloading https://files.pythonhosted.org/packages/24/bc/a4380f09bab3a776182578ce6b2771e57259d0d4dbce178205779abdc347/requests_html-0.10.0-py3-none-any.whl
Collecting w3lib
  Downloading https://files.pythonhosted.org/packages/6a/45/1ba17c50a0bb16bd950c9c2b92ec60d40c8ebda9f3371ae4230c437120b6/w3lib-1.21.0-py2.py3-none-any.whl
Collecting fake-useragent
  Downloading https://files.pythonhosted.org/packages/d1/79/af647635d6968e2deb57a208d309f6069d31cb138066d7e821e575112a80/fake-useragent-0.1.11.tar.gz
Collecting parse
  Downloading https://files.pythonhosted.org/packages/4a/ea/9a16ff916752241aa80f1a5ec56dc6c6defc5d0e70af2d16904a9573367f/parse-1.14.0.tar.gz
Collecting pyquery
  Downloading https://files.pythonhosted.org/packages/78/43/95d42e386c61cb639d1a0b94f0c0b9f0b7d6b981ad3c043a836c8b5bc68b/pyquery-1.4.1-py2.py3-none-any.whl
Collecting pyppeteer>=0.0.14
[?25l  Downloading https://files.pythonhosted.org/packages/b0/16/a5e8d617994cac605f972523bb25f12e3ff9c30b

In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from yahoo_fin import stock_info as si
from collections import deque
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import time
import os

In [0]:
# Preparing the Dataset
def load_data(ticker, n_steps=50, scale=True, shuffle=True, lookup_step=1, test_size=0.2,
              feature_columns=['adjclose', 'volume', 'open', 'high', 'low']):
  #see if ticker is already a loaded stock from yahoo finance
  if isinstance(ticker, str):
    #load it from yahoo_fin library
    df = si.get_data(ticker)
  elif isinstance(ticker, pd.DataFrame):
    df = ticker

  # this will contain all the elements we want to return from this function
  result = {}
  #we will also return original dataframe from this function
  result['df'] = df.copy()
  # check column passed is in the dataframe itself
  for column in feature_columns:
    assert column in df.columns

  if scale:
    column_scaler = {}
    # scale the data(prices) from 0 to 1
    for column in feature_columns:
      scalar = preprocessing.MinMaxScaler()
      df[column] = scalar.fit_transform(np.expand_dims(df[column].values, axis=1))
      column_scaler[column] = scalar

    # add the MinMaxScalar instances to the result returned
    result['column_scaler'] = column_scaler

  # add the target column(label) by shifting by lookup_step
  df['future'] = df['adjclose'].shift(-lookup_step)

  # last lookup_state columns contains NaN in future column
  # get them before dropping NaN
  last_sequence = np.array(df[feature_columns].tail(lookup_step))

  #drop NaNs
  df.dropna(inplace=True)

  sequence_data = []
  sequences = deque(maxlen=n_steps)
  for entry, target in zip(df[feature_columns].values, df['future'].values):
    sequences.append(entry)
    if len(sequences) == n_steps:
      sequence_data.append([np.array(sequences), target])

  # get the last sequence by appending the last 'n_step' sequence with 'lookup_step' sequence
  # for instance, if n_steps = 50 and lookup_step=10, last_sequence should beof 59(50+10-1) length
  # this last_sequence will be used to predict in future dates that are not available in the dataset
  last_sequence = list(sequences) + list(last_sequence)
  # shift the last sequence by -1
  last_sequence = np.array(pd.DataFrame(last_sequence).shift(-1).dropna())
  # add to result
  result['last_sequence'] = last_sequence
  # construct the X's and Y's
  X, y = [], []

  for seq, target in sequence_data:
    X.append(seq)
    y.append(target)

  # convert to numpy array
  X = np.array(X)
  y = np.array(y)

  # reshape X to fit the neural network
  X = X.reshape((X.shape[0], X.shape[2], X.shape[1]))

  # split the dataset
  result["X_train"], result["X_test"], result["y_train"], result["y_test"] = train_test_split(X,y,test_size= test_size, shuffle=shuffle)

  # return the result
  return result


In [0]:
# Model Creation

def create_model(input_length, units=256, cell=LSTM, n_layers=2, dropout=0.3,
                 loss="mean_absolute_error", optimizer="rmsprop"):
  model = Sequential()
  for i in range(n_layers):
    if i==0:
      #First layer
      model.add(cell(units, return_sequences=True, input_shape=(None, input_length)))
    elif i == n_layers - 1:
      # Last layer
      model.add(cell(units, return_sequences=False))
    else:
      # hidden layer
      model.add(cell(units, return_sequences=True))

    # add dropout after each layer
    model.add(Dropout(dropout))

  model.add(Dense(1, activation="linear"))
  model.compile(loss=loss, metrics=["mean_absolute_error"], optimizer=optimizer)

  return model

In [0]:
# Training the model
# Window size or the sequence length
N_STEPS = 50
# Lookup step, 1 is the next day
LOOKUP_STEP = 1
# test ratio size, 0.2 is 20%
TEST_SIZE = 0.2
# features to use
FEATURE_COLUMNS = ["adjclose", "volume", "open", "high", "low"]
# date now
date_now = time.strftime("%Y-%m-%d")
### model parameters
N_LAYERS = 3
# LSTM cell
CELL = LSTM
# 256 LSTM neurons
UNITS = 256
# 40% dropout
DROPOUT = 0.4
### training parameters
# mean squared error loss
LOSS = "mse"
OPTIMIZER = "rmsprop"
BATCH_SIZE = 64
EPOCHS = 300
# Apple stock market
ticker = "AAPL"
ticker_data_filename = os.path.join("data", f"{ticker}_{date_now}.csv")
# model name to save
model_name = f"{date_now}_{ticker}-{LOSS}-{CELL.__name__}-seq-{N_STEPS}-step-{LOOKUP_STEP}-layers-{N_LAYERS}-units-{UNITS}"

In [0]:
# create these folders if they does not exist
if not os.path.isdir("results"):
    os.mkdir("results")
if not os.path.isdir("logs"):
    os.mkdir("logs")
if not os.path.isdir("data"):
    os.mkdir("data")

In [0]:
# load the CSV file from disk (dataset) if it already exists (without downloading)
if os.path.isfile(ticker_data_filename):
    ticker = pd.read_csv(ticker_data_filename)

In [0]:
# load the data

data = load_data(ticker, N_STEPS, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE, feature_columns=FEATURE_COLUMNS)
if not os.path.isfile(ticker_data_filename):
  # save the CSV file(dataset)
  data["df"] .to_csv(ticker_data_filename)

# construct the model
model = create_model(N_STEPS, loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS, dropout=DROPOUT, optimizer=OPTIMIZER)

#some tensorflow callbacks
checkpointer = ModelCheckpoint(os.path.join("results", model_name), save_best_only=True, verbose=1)
tensorboard = TensorBoard(log_dir=os.path.join("logs", model_name))

history = model.fit(data["X_train"], data["y_train"],
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=(data["X_test"], data["y_test"]),
                    callbacks=[checkpointer, tensorboard],
                    verbose=1)

model.save(os.path.join("results", model_name) + ".h5")

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 7861 samples, validate on 1966 samples
Epoch 1/300
Epoch 00001: val_loss improved from inf to 0.00051, saving model to results/2020-02-18_AAPL-mse-LSTM-seq-50-step-1-layers-3-units-256
Epoch 2/300
Epoch 00002: val_loss improved from 0.00051 to 0.00034, saving model to results/2020-02-18_AAPL-mse-LSTM-seq-50-step-1-layers-3-units-256
Epoch 3/300
Epoch 00003: val_loss did not improve from 0.00034
Epoch 4/300
Epoch 00004: val_loss did not improve from 0.00034
Epoch 5/300
Epoch 00005: val_loss did not improve from 0.00034
Epoch 6/300
Epoch 00006: val_loss did not improve from 0.00034
Epoch 7/300
Epoch 00007: val_loss did not improve from 0.00034
Epoch 8/300
Epoch 00008: val_loss improved from 0.00034 to 0.00022, saving model to results/2020-02-18_AAPL-mse-LSTM-seq-50-step-1-layers-3-units-256
Epoch 9/300
Epoc

In [0]:
 !tensorboard --logdir="logs" --bind_all

In [0]:
# evaluate the model
mse, mae = model.evaluate(data["X_test"], data["y_test"])
# calculate the mean absolute error (inverse scaling)
mean_absolute_error = data["column_scalar"]["adjclose"].inverse_transform(mae.reshape(1,-1))[0][0]
print("Mean Absolute Error : ", mean_absolute_error)

In [0]:
data

In [0]:
# Predict 
def predict(model, data, classification=False):
  # retrieve the last sequence from data
  last_sequence = data["last_sequence"][:N_STEPS]
  # retrieve the column scalars
  column_scaler = data["column_scaler"]
  # reshape the last sequence
  last_sequence = last_sequence.reshape((last_sequence.shape[1], last_sequence.shape[0]))
  # expand dims
  last_sequence = np.expand_dims(last_sequence, axis=0)
  # get the prediction (scaled 0 to 1)
  prediction = model.predict(last_sequence)
  # get the price(by inverting the scaling)
  predicted_price = column_scaler["adjclose"].inverse_transform(prediction)[0][0]
  return predicted_price

In [0]:
# last_seq = data["last_sequence"][:N_STEPS]
# column_scaler = data["column_scaler"]
# last_seq = last_seq.reshape((last_seq.shape[1], last_seq.shape[0]))
# last_seq = np.expand_dims(last_seq, axis=0)
# pred = model.predict(last_seq)
# print(pred)
# print(column_scaler["adjclose"].inverse_transform(pred))

In [0]:
# predict the future price
future_price = predict(model, data)
print(f"Future price after {LOOKUP_STEP} days is {future_price: .2f}$")

In [0]:
# plot the graph
def plot_graph(model, data):
  y_test = data["y_test"]
  X_test = data["X_test"]
  y_pred = model.predict(X_test)
  y_test = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(np.expand_dims(y_test,axis=0)))
  y_pred = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(y_pred))
  plt.plot(y_test[-200:], c='b')
  plt.plot(y_pred[-200:], c='r')
  plt.xlabel("Days")
  plt.ylabel("Price")
  plt.legend(["Acutal Price", "Predicted Price"])
  plt.show()

In [0]:
plot_graph(model, data)

In [0]:
def get_accuracy(model, data):
    y_test = data["y_test"]
    X_test = data["X_test"]
    y_pred = model.predict(X_test)
    y_test = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(np.expand_dims(y_test, axis=0)))
    y_pred = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(y_pred))
    y_pred = list(map(lambda current, future: int(float(future) > float(current)), y_test[:-LOOKUP_STEP], y_pred[LOOKUP_STEP:]))
    y_test = list(map(lambda current, future: int(float(future) > float(current)), y_test[:-LOOKUP_STEP], y_test[LOOKUP_STEP:]))
    return accuracy_score(y_test, y_pred)

In [0]:
print(str(LOOKUP_STEP) + ":", "Accuracy Score:", get_accuracy(model, data))