In [14]:
from pandas_datareader import data
import matplotlib.pyplot as plt
import pandas as pd
from pandas import datetime
import datetime as dt
import urllib.request, json
import os
import numpy as np
import math, time
import itertools
from sklearn import preprocessing
from operator import itemgetter
from sklearn.metrics import mean_squared_error
from math import sqrt
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.recurrent import LSTM

Using TensorFlow backend.


In [3]:
data_source = 'alphavantage'

if data_source == 'alphavantage':
    # ====================== Loading Data from Alpha Vantage ==================================

    api_key = '7TONQ8CM5PXZ4YEO'

    # American Airlines stock market prices
    ticker = "GOOGL"

    # JSON file with all the stock market data for AAL from the last 20 years
    url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=%s&outputsize=full&apikey=%s"%(ticker,api_key)

    # Save data to this file
    file_to_save = 'stock_market_data-%s.csv'%ticker

    # If you haven't already saved data,
    # Go ahead and grab the data from the url
    # And store date, low, high, volume, close, open values to a Pandas DataFrame
    if not os.path.exists(file_to_save):
        with urllib.request.urlopen(url_string) as url:
            data = json.loads(url.read().decode())
            # extract stock market data
            data = data['Time Series (Daily)']
            df = pd.DataFrame(columns=['Date','Low','High','Close','Open'])
            for k,v in data.items():
                date = dt.datetime.strptime(k, '%Y-%m-%d')
                data_row = [date.date(),float(v['3. low']),float(v['2. high']),
                            float(v['4. close']),float(v['1. open'])]
                df.loc[-1,:] = data_row
                df.index = df.index + 1
        print('Data saved to : %s'%file_to_save)        
        df.to_csv(file_to_save)

    # If the data is already there, just load it from the CSV
    else:
        print('File already exists. Loading data from CSV')
        df = pd.read_csv(file_to_save)

Data saved to : stock_market_data-GOOGL.csv


In [4]:
df = pd.read_csv("stock_market_data-GOOGL.csv")

In [9]:
df.drop(df.columns[[0,1,2]], inplace=True, axis=1)
df.head()

Unnamed: 0,High,Close,Open
0,95.1939,91.2818,92.9018
1,205.6599,205.5746,204.3709
2,189.0135,186.2249,188.5771
3,936.3,923.59,935.0
4,195.6038,193.4973,193.7932


In [10]:
df.tail()

Unnamed: 0,High,Close,Open
3549,229.3731,228.9869,227.4772
3550,308.2265,303.552,306.12
3551,294.1129,293.4609,293.3405
3552,289.3883,289.0322,288.3902
3553,148.1874,146.4972,140.5739


In [11]:
df.describe()

Unnamed: 0,High,Close,Open
count,3554.0,3554.0,3554.0
mean,439.990599,436.019538,436.137733
std,290.723186,288.760928,288.723885
min,51.0275,50.1598,49.6984
25%,235.985925,233.57225,233.548475
50%,307.99535,305.4153,305.11435
75%,589.445,584.745,586.60875
max,1291.44,1285.5,1289.12


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3554 entries, 0 to 3553
Data columns (total 3 columns):
High     3554 non-null float64
Close    3554 non-null float64
Open     3554 non-null float64
dtypes: float64(3)
memory usage: 83.4 KB


In [13]:
df['High'] = df['High'] / 1000
df['Open'] = df['Open'] / 1000
df['Close'] = df['Close'] / 1000
df.head(5)

Unnamed: 0,High,Close,Open
0,0.095194,0.091282,0.092902
1,0.20566,0.205575,0.204371
2,0.189014,0.186225,0.188577
3,0.9363,0.92359,0.935
4,0.195604,0.193497,0.193793


In [15]:
def load_data(stock, seq_len):
    amount_of_features = len(stock.columns)
    data = stock.as_matrix() #pd.DataFrame(stock)
    sequence_length = seq_len + 1
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])

    result = np.array(result)
    row = round(0.9 * result.shape[0])
    train = result[:int(row), :]
    x_train = train[:, :-1]
    y_train = train[:, -1][:,-1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1][:,-1]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], amount_of_features))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], amount_of_features))  

    return [x_train, y_train, x_test, y_test]

In [16]:
def build_model(layers):
    model = Sequential()

    model.add(LSTM(
        input_dim=layers[0],
        output_dim=layers[1],
        return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(
        layers[2],
        return_sequences=False))
    model.add(Dropout(0.2))

    model.add(Dense(
        output_dim=layers[2]))
    model.add(Activation("linear"))

    start = time.time()
    model.compile(loss="mse", optimizer="rmsprop",metrics=['accuracy'])
    print("Compilation Time : ", time.time() - start)
    return model

def build_model2(layers):
        d = 0.2
        model = Sequential()
        model.add(LSTM(128, input_shape=(layers[1], layers[0]), return_sequences=True))
        model.add(Dropout(d))
        model.add(LSTM(64, input_shape=(layers[1], layers[0]), return_sequences=False))
        model.add(Dropout(d))
        model.add(Dense(16,init='uniform',activation='relu'))        
        model.add(Dense(1,init='uniform',activation='relu'))
        model.compile(loss='mse',optimizer='adam',metrics=['accuracy'])
        return model

In [17]:
window = 5
X_train, y_train, X_test, y_test = load_data(df[::-1], window)
print("X_train", X_train.shape)
print("y_train", y_train.shape)
print("X_test", X_test.shape)
print("y_test", y_test.shape)

X_train (3193, 5, 3)
y_train (3193,)
X_test (355, 5, 3)
y_test (355,)


In [21]:
# model = build_model([3,lag,1])
model = build_model2([3,window,1])

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x000001D6C18C2E48>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:


TypeError: while_loop() got an unexpected keyword argument 'maximum_iterations'