In [59]:
import csv
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
import torch
import copy
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from alpha_vantage.timeseries import TimeSeries

In [2]:
API_KEY = 'PZZ6AOYX66Q8H83Q'
ts = TimeSeries(key=API_KEY, output_format='pandas', indexing_type='date')
data, meta_data = ts.get_intraday('FB')
meta_data

{'1. Information': 'Intraday (15min) open, high, low, close prices and volume',
 '2. Symbol': 'FB',
 '3. Last Refreshed': '2019-03-29 16:00:00',
 '4. Interval': '15min',
 '5. Output Size': 'Compact',
 '6. Time Zone': 'US/Eastern'}

In [3]:
df = pd.read_csv('./data/FAANG_13_18_CSV.csv')
df.columns = ['PermNo', 'Date', 'Ticker', 'Low', 'High', 'Close', 'Volume', 'Open']
df['Date'] = pd.to_datetime(df.Date, format='%m/%d/%Y')
df = df.sort_values(['Date'])
fb_df = df[df.Ticker == 'FB']
aapl_df = df[df.Ticker == 'AAPL']
amzn_df = df[df.Ticker == 'AMZN']
nflx_df = df[df.Ticker == 'NFLX']
googl_df = df[df.Ticker == 'GOOGL']

In [4]:
def load_data(df, training_window=5, prediction_window=3):
    data = df['High'].values
    training_size = math.floor(0.9 * len(data))
    test_size = len(data) - training_size
    train_data = data[0:training_size]
    test_data = data[training_size:]
    
    train_data = train_data.reshape(-1, 1)
    test_data = test_data.reshape(-1, 1)
    scaler = MinMaxScaler()
    scaler.fit(train_data)
    
    train_data = scaler.transform(train_data)
    test_data = scaler.transform(test_data)
    
    train_data = train_data.reshape(-1)
    test_data = test_data.reshape(-1)
    
    x_train, y_train, x_val, y_val = [], [], [], []

    for i in range(0, training_size - prediction_window - training_window + 1):
        x_train.append(train_data[i:i+training_window])
        y_train.append(train_data[i+training_window:i+training_window+prediction_window])
    
    for i in range(0, test_size - prediction_window - training_window + 1):
        x_val.append(test_data[i:i+training_window])
        y_val.append(test_data[i+training_window:i+training_window+prediction_window])
        
    return np.asarray(x_train), np.asarray(y_train), np.asarray(x_val), np.asarray(y_val)

In [5]:
class StockPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, device, prediction_window, batch_size=1, p=0):
        super(StockPredictor, self).__init__()
        
        ################### Model Properties ####################
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.p = p
        self.batch_size = batch_size
        self.prediction_window = prediction_window
        self.device = device
        #########################################################
        
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, dropout=self.p)
        self.hidden_states = self.initialize_hidden_states()
        self.output = nn.Linear(self.hidden_size, self.prediction_window)
        
    def initialize_hidden_states(self):
        return (torch.zeros((self.num_layers, self.batch_size, self.hidden_size), device=self.device),
                torch.zeros((self.num_layers, self.batch_size, self.hidden_size), device=self.device))
    
    def forward(self, x):
        # x is of shape torch.size([batch_size, training_window])
        model_in = torch.tensor(x, dtype=torch.float, device=self.device).view(x.shape[1], self.batch_size, self.input_size)
        lstm_out, self.hidden_states = self.lstm(model_in, self.hidden_states)
        
        # Need the output of the last timestep of the LSTM only 
        prediction = self.output(lstm_out[-1].view(self.batch_size, -1))
        
        return prediction
        
        

In [6]:
class BatchIterator(object):
    def __init__(self, data, batch_size):
        self.x = data[0]
        self.y = data[1]
        self.batch_size = batch_size
        self.low = 0
        self.high = batch_size
    
    def __getitem__(self, i):
        return (self.x[i], self.y[i])

    def __len__(self):
        return len(self.x)

    def __iter__(self):
        return self

    def __next__(self):
        if self.low >= len(self):
            raise StopIteration
        else:
            batch_x = self.x[self.low : self.high] if self.high < len(self.x) else self.x[self.low:]
            batch_y = self.y[self.low : self.high] if self.high < len(self.y) else self.y[self.low:]
            self.low += self.batch_size
            self.high += self.batch_size
            return batch_x, batch_y
    
    def reset(self):
        self.low = 0
        self.high = self.batch_size
        
        

In [63]:
batch_size = 1
input_size = 1
hidden_size = 50
num_layers = 2
prediction_window = 3
training_window = 10
dropout=0.2
epochs = 40

x_train, y_train, x_val, y_val = load_data(fb_df, training_window=training_window, prediction_window=prediction_window)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dev_cpu = torch.device("cpu")
net = StockPredictor(input_size, hidden_size, num_layers, device, prediction_window, batch_size=batch_size, p=0.2)
train_itr = BatchIterator((x_train, y_train), batch_size)
val_itr = BatchIterator((x_val, y_val), batch_size)

loss_function = nn.MSELoss(reduction='sum')
optimizer = optim.Adam(net.parameters(), lr=1e-3)

In [64]:
losses = []
min_test_mse = 200 # Arbitrary value larger than the largest error
for epoch in range(epochs):
    i = 0
    train_itr.reset()
    val_itr.reset()
    avg_loss = 0
    test_mse = 0
    net.train()
    for x, y in train_itr:
        net.zero_grad()
        net.hidden_states = net.initialize_hidden_states()
        
        y_pred = net(x)
        y_true = torch.tensor(y, dtype=torch.float, device=device)
        
        loss = loss_function(y_pred, y_true)
        loss.backward()
        optimizer.step()
        
        avg_loss += loss
        i += 1

    print('Epoch:{}, Average loss:{}'.format(epoch, avg_loss/i))
    losses.append(avg_loss)
    
    num_val = 0
    net.eval()
    for x, y_true in val_itr:
        y_pred = net(x).cpu().detach().numpy()
        test_mse += (np.square(y_true - y_pred)).mean(axis=1).item()
        num_val += 1
    
    test_mse /= num_val
    print('Average test MSE = {}'.format(test_mse))
    
    if test_mse < min_test_mse:
        del best_model_params
        min_test_mse = test_mse
        best_model_params = copy.deepcopy(net.state_dict())

print('************ Finished Training **************')
print('Minimum test MSE = {}'.format(min_test_mse))
print('Saving best model..')
torch.save(best_model_params, './models/best_model.pt')

Epoch:0, Average loss:0.0018392401980236173
Average test MSE = 0.03420280585943912
Epoch:1, Average loss:0.010904445312917233
Average test MSE = 0.034379753994572444
Epoch:2, Average loss:0.013457505963742733
Average test MSE = 0.034185560574914675
Epoch:3, Average loss:0.012979981489479542
Average test MSE = 0.03256920246029041
Epoch:4, Average loss:0.014227927662432194
Average test MSE = 0.03083106111451062
Epoch:5, Average loss:0.01500963419675827
Average test MSE = 0.03134741359539339
Epoch:6, Average loss:0.013699657283723354
Average test MSE = 0.030432446186764103
Epoch:7, Average loss:0.015362556092441082
Average test MSE = 0.033107143878541294
Epoch:8, Average loss:0.018753657117486
Average test MSE = 0.03075031483160859
Epoch:9, Average loss:0.01699778623878956
Average test MSE = 0.029459110320427105
Epoch:10, Average loss:0.01687926985323429
Average test MSE = 0.03278120037677338
Epoch:11, Average loss:0.027460768818855286
Average test MSE = 0.023753502727915172
Epoch:12, Ave

In [65]:
net.load_state_dict(torch.load('./models/best_model.pt'))
net.eval()
print('Sucessfully loaded model {}'.format('./models/best_model.pt'))

Sucessfully loaded model ./models/best_model.pt


In [66]:
num_val = 0
val_itr.reset()
test_mse = 0
for x, y_true in val_itr:
    y_pred = net(x).cpu().detach().numpy()
    test_mse += (np.square(y_true - y_pred)).mean(axis=1).item()
    num_val += 1

test_mse /= num_val
print('Average test MSE = {}'.format(test_mse))

Average test MSE = 0.019728801922865944


In [68]:
x = x_val[115].reshape(1, -1)
print(x_val[115])
print(x)
y_pred = net(x)
print(y_pred)
print(y_val[115])

[0.75434002 0.74232127 0.72385753 0.7080647  0.70713577 0.7112001
 0.70690352 0.68907852 0.66573764 0.64488872]
[[0.75434002 0.74232127 0.72385753 0.7080647  0.70713577 0.7112001
  0.70690352 0.68907852 0.66573764 0.64488872]]
tensor([[0.8814, 0.8869, 0.8915]], grad_fn=<AddmmBackward>)
[0.66248617 0.64686752 0.661383  ]


In [61]:
a = {1 : True, 2 : False}

if True:
    del b
    b = copy.deepcopy(a)

print(b)
a[1] = False
    
if False:
    print('Entered')
    b = a

print(b)

{1: True, 2: False}
{1: True, 2: False}
