In [None]:
import math
import torch
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import plotly.express as px
%matplotlib inline

# Retrieve data

In [None]:
def get_ticker_data(stock: str, start: datetime = None, end: datetime = None):
    beg = datetime(1970, 1, 1)
    if start == None:
        start = beg
    if end == None:
        end = datetime.now()

    sdate = int((start - beg).total_seconds())
    edate = int((end - beg).total_seconds())

    url = f'https://query1.finance.yahoo.com/v7/finance/download/{stock}?period1={sdate}&period2={edate}&interval=1d&events=history&includeAdjustedClose=true'
    return pd.read_csv(url, parse_dates=True)

In [None]:
msft = get_ticker_data('MSFT')
msft['Date'] = msft['Date'].apply(pd.to_datetime)
msft.head()

In [None]:
fig = px.line(msft, x='Date', y=['Close'], title='MSFT Stock Price')
fig.show()

In [None]:
msft_all = msft['Close'].values
msft_all

# Split
Create train / test split

In [None]:
sz = len(msft_all)
test_sz = math.floor(.1 * sz)
sz, test_sz

In [None]:
train_data = msft_all[:-test_sz]
test_data = msft_all[-test_sz:]
len(train_data)+len(test_data), len(test_data)

In [None]:
msmin, msmax = train_data.min(), train_data.max()
print(f'Min: {msmin}, Max: {msmax}')

In [None]:
train_data_scaled = (train_data - msmin) / (msmax - msmin)
fig = px.line(train_data_scaled, title='MSFT Stock Price (Scaled)')
fig.show()

# Create Series Generator

In [None]:
def gen_dataset(seq_data: np.array, sz: int = 3):
    for i in range(len(seq_data) - sz):
        yield seq_data[i:i+sz], seq_data[i+sz:i+sz+1]

In [None]:
range(len(train_data_scaled) - 100), len(train_data_scaled)

In [None]:
tries = 5
for x, y in gen_dataset(train_data_scaled):
    print(x, y)
    tries -= 1
    if tries == 0:
        break
print(train_data_scaled[0:10], train_data_scaled[11:12])

In [None]:
gen_dataset(train_data_scaled, 100)
np.fromiter(gen_dataset(train_data_scaled, 100), dtype=float)

# Model

In [None]:
# Model params
input_dim = 1
hidden_dim = 32
num_layers = 2
output_dim = 1
num_epochs = 25

In [None]:
import torch
import torch.nn as nn

class GRU(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(GRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn) = self.gru(x, (h0.detach()))
        out = self.fc(out[:, -1, :])
        return out

In [None]:
model = GRU(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)

In [None]:
criterion = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

for t in range(num_epochs):
    y_train_pred = model(x_train)
    loss = criterion(y_train_pred, y_train)
    print("Epoch ", t, "MSE: ", loss.item())
    optimiser.zero_grad()
    loss.backward()
    optimiser.step()

In [None]:
seq = torch.from_numpy(np.array([float(i) for i in range(5)]))
seq1 = torch.from_numpy(np.array([[k for k in range(3)] for i in range(5)]))

In [None]:
seq1.dim()

In [None]:
sys.path.insert(0, str(Path('..').resolve()))
from tlaloc.data import SequenceDataset