In [None]:
from datetime import datetime

import numpy as np

filename = '../input/apple201718/AAPL.csv'

get_date = lambda x: datetime.strptime(x.decode("utf-8"), '%Y-%m-%d %H:%M')
data = np.genfromtxt(filename, names=True, delimiter=',', dtype=None, converters = {0: get_date})

In [None]:
prices = data[['Open', 'High', 'Low', 'Close', 'Volume']]
dates = data[['Date']]
prices = np.array([list(price) for price in prices[['Open', 'High', 'Low', 'Close', 'Volume']]])
dates = np.array([item for sublist in dates[['Date']] for item in sublist])

In [None]:
# Remove rows with nan values
count = np.argwhere(np.isnan(prices))
count = np.unique(count)

dates = np.delete(dates, count)
prices = np.delete(prices, count, axis=0)

In [None]:
# Scale train and test data
from sklearn.preprocessing import MinMaxScaler

def scale_data(scaler: MinMaxScaler, data: np.array, window=250):
    length = len(data)
    iterations = 0
    for i in range(0, length, window):
        data[i:i+window,:] = scaler.fit_transform(data[i:i+window,:])
    return data

scaler = MinMaxScaler(feature_range=(0,1))
print(prices[0:5])
prices = scale_data(scaler, prices)
print(prices[0:5])

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt


def ewma(data, alpha=0.1, window=10):
    """Compute exponentially weighted moving average for input array."""
    length = len(data)
    weights = (1 - alpha) ** np.arange(window)
    weights /= weights.sum()
    data = np.convolve(data, weights)
    return data[:length]


open_ewma_v1 = ewma(prices[:,0])
open_ewma_v2 = ewma(prices[:,0], 0.01, 5)
open_ewma_v3 = ewma(prices[:,0], 0.1, 5)


# Plot EWMA and Actual opening price
plt.figure(figsize=(12,6))
plt.plot(range(100), prices[0:100, 0], color='darkblue', label='Actual Open')
plt.plot(range(100), open_ewma_v1[0:100], color='orange', label='EWMA 10WAL0.01 Open')
plt.plot(range(100), open_ewma_v2[0:100], color='red', label='EWMA 5WAL0.01 Open')
plt.plot(range(100), open_ewma_v3[0:100], color='green', label='EWMA 5WAL0.1 Open')
plt.xlabel('Trading Minute')
plt.ylabel('Scaled Price')
plt.legend(fontsize=14)
plt.savefig('EWMA vs Actual V2')
plt.show()

In [None]:
y = prices[:,3] # y = scaled close prices
print(prices[10:20])
prices = np.array([ewma(prices[:,i], 0.1, 5) for i in range(prices.shape[1])]).transpose()
print(prices[10:20])

In [None]:
# Get dates after first 5 days to account for EWMA delay
dates = dates[5:len(dates)]
prices = prices[5:len(prices)]
print(f"Rows in prices: {len(prices)}")
print(f"Rows in dates: {len(dates)}")

In [None]:
def create_dataset(X: np.array, Y: np.array, window=200):
    length = len(X)
    x_out, y_out = [], []
    for i in range(window, length-1):
        x_out.append(X[i-window:i,:])
        y_out.append(Y[i])
    return np.array(x_out), np.array(y_out)

y = prices[1:len(prices),3] # Set y to Close prices of next minute
X = prices[0:len(prices)-1,:] # offset date by 1 to predict next output
X, y = create_dataset(X, y, 250)
print(f"Shape of y: {y.shape}")
print(f"Shape of X: {X.shape}")

In [None]:
# Split data
split = int(0.8*len(X))
X_train, X_test = X[0:split,:,:], X[split:len(X),:,:]
y_train, y_test = y[0:split], y[split:len(y)]
print(f"shape of X_train: {X_train.shape}")
print(f"shape of X_test: {X_test.shape}")
print(f"shape of y_train: {y_train.shape}")
print(f"shape of y_test: {y_test.shape}")

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

def LSTMModel(window: int, n_features: int):
    """Create keras model for LSTM with 3-layers."""
    model = Sequential()
    model.add(LSTM(25, input_shape=(window, n_features), dropout=0.1,
                   recurrent_dropout=0.1, return_sequences=True))
    model.add(LSTM(50, dropout=0.1, recurrent_dropout=0.1, return_sequences=True))
    model.add(LSTM(25, dropout=0.1, recurrent_dropout=0.1))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mae', optimizer='adam')
    return model

model = LSTMModel(X_train.shape[1], X_train.shape[2])

In [None]:
# Run with GPU
import tensorflow as tf

with tf.device('/GPU:0'):
    model.fit(X_train, y_train, epochs=10, batch_size=64)
    model.summary()


In [None]:
# serialize model to JSON
model_json = model.to_json()
with open("LSTM3L10E.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("LSTM3L10E.h5")

In [None]:
from keras.models import model_from_json

json_file = open('LSTM3L10E.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights('LSTM3L10E.h5')

predictions = loaded_model.predict(X_test)
scaled_MSE = mean_squared_error(y_test, prediction)
print(f"Scaled MSE for 3-layer LSTM = {scaled_MSE}")

In [None]:
from sklearn.metrics import mean_squared_error

prediction = model.predict(X_test)
scaled_MSE = mean_squared_error(y_test, prediction)
print(f"Scaled MSE for 3-layer LSTM = {scaled_MSE}")

In [None]:
def LSTMModelV2(window: int, n_features: int):
    """Create LSTM model with 2 layers."""
    model = Sequential()
    model.add(LSTM(15, input_shape=(window, n_features), dropout=0.1,
                   recurrent_dropout=0.2, return_sequences=True))
    model.add(LSTM(25, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mae', optimizer='adam')
    return model

def LSTMModelV3(window: int, n_features: int):
    """Create LSTM model with 4 layers."""
    model = Sequential()
    model.add(LSTM(10, input_shape=(window, n_features), dropout=0.1,
               recurrent_dropout=0.1, return_sequences=True))
    model.add(LSTM(20, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
    model.add(LSTM(15, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
    model.add(LSTM(10, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mae', optimizer='adam')


model_v2 = LSTMModelV2(X_train.shape[1], X_train.shape[2])
with tf.device('/GPU:0'):
    model.fit(X_train, y_train, epochs=10, batch_size=64)
    model.summary()

In [None]:
prediction_v2 = model_v2.predict(X_test)
scaled_MSE = mean_squared_error(y_test, prediction_v2)
print(f"Scaled MSE for 2-layer LSTM = {scaled_MSE}")

In [None]:
# serialize model to JSON
model_json = model_v2.to_json()
with open("LSTM2L10E.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("LSTM2L10E.h5")

In [None]:
model_v3 = LSTMModelV3(X_train.shape[1], X_train.shape[2])
with tf.device('/GPU:0'):
    model.fit(X_train, y_train, epochs=10, batch_size=64)
    model.summary()

In [None]:
prediction_v3 = model_v3.predict(X_test)
scaled_MSE = mean_squared_error(y_test, prediction_v3)
print(f"Scaled MSE for 4-layer LSTM = {scaled_MSE}")

In [None]:
# serialize model to JSON
model_json = model_v3.to_json()
with open("LSTM4L10E.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("LSTM4L10E.h5")