In [631]:
import sys
import json
import torch
import pandas as pd
from pathlib import Path
import tensorboardX
from typing import List, Iterator
import plotly.express as px
from datetime import datetime
import plotly.graph_objects as go

# import locals
sys.path.insert(0, str(Path('..').resolve()))
from tlaloc import StockDataModule, SeqDataset
from tlaloc import StockGRUModel

In [2]:
trnsf_path = Path('../output/models/params.json').resolve()
model_path = Path('../output/models/model.pth').resolve()

behind = 100
forward = 5

In [3]:
with open(trnsf_path, 'r') as f:
    meta = json.load(f)
print(json.dumps(meta, indent=4))

{
    "stock": "MSFT",
    "model": {
        "input_dim": 1,
        "hidden_dim": 32,
        "num_layers": 2,
        "output_dim": 1,
        "lr": 0.01
    },
    "data": {
        "min": 48.43,
        "max": 231.649994,
        "window": 100
    }
}


In [4]:
model = StockGRUModel(**meta['model'])
model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [72]:
end_date = datetime(2021, 2, 20)
start_date = datetime(2016, 2, 20)
msft = StockDataModule.get_stock_data('../data', meta['stock'], start=start_date, end=end_date)
seq = torch.FloatTensor(msft['Close'].values)
seq_adj = SeqDataset.scale(seq, meta['data']['min'], meta['data']['max'])

In [6]:
#import torch, gc
#gc.collect()
#torch.cuda.empty_cache()

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [8]:
import numpy as np
seq_test = torch.FloatTensor(np.array([i for i in range(10)]))
print(seq_test)
win, lookahead = 3, 4

overlap = []
for i in reversed(range(1, lookahead+1)):
    print(i, seq_test[-win-i:-i])
    overlap.append(float(i))

print('overlap:', overlap)

test_inputs = list(seq_test[-win:].numpy())
print('prediction:', test_inputs)
for i in range(lookahead):
    print(i, test_inputs[-win:])
    test_inputs.append(float(i))

print(test_inputs)
print(test_inputs[-lookahead:])


tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
4 tensor([3., 4., 5.])
3 tensor([4., 5., 6.])
2 tensor([5., 6., 7.])
1 tensor([6., 7., 8.])
overlap: [4.0, 3.0, 2.0, 1.0]
prediction: [7.0, 8.0, 9.0]
0 [7.0, 8.0, 9.0]
1 [8.0, 9.0, 0.0]
2 [9.0, 0.0, 1.0]
3 [0.0, 1.0, 2.0]
[7.0, 8.0, 9.0, 0.0, 1.0, 2.0, 3.0]
[0.0, 1.0, 2.0, 3.0]


In [9]:

def predict(seq: torch.Tensor, window: int, lookbehind: int, lookahead: int) -> List:
    overlap = []
    predixn = list(seq[-window:].numpy())
    with torch.no_grad():
        model.to(device)
        model.eval()
        # get previous lookahead period (overlap)
        for i in reversed(range(1, lookbehind+1)):
            x = torch.FloatTensor(seq[-window-i:-i]).view(1, -1)
            y = model(x.to(device))
            overlap.append(y.detach().cpu().item())

        # begin future lookahead period (predictions)
        for i in range(lookahead):
            x = torch.FloatTensor(predixn[-window:]).view(1, -1)
            y = model(x.to(device))
            predixn.append(y.detach().cpu().item())

    return overlap + predixn[-lookahead:]

In [73]:
#p_seq = predict(seq_adj, meta['data']['window'], behind, forward)
p_seq = model.predict(list(seq_adj.numpy()), meta['data']['window'], behind, forward)

In [74]:
seq_size = seq_adj.size(0)
actual_range = [i for i in range(seq_size)]
pred_range = [i for i in range(seq_size-behind,seq_size+forward)]
print(seq_size, seq_size-behind, seq_size+forward)

1259 1159 1264


In [75]:
dmin, dmax = meta['data']['min'], meta['data']['max']
actual = SeqDataset.inverse_scale(seq_adj, dmin, dmax)
predicted = SeqDataset.inverse_scale(torch.FloatTensor(p_seq), dmin, dmax)
fig = go.Figure()
fig.add_trace(go.Scatter(x=actual_range, y=actual.numpy(), mode='lines', name='Actual'))
fig.add_trace(go.Scatter(x=pred_range, y=predicted.numpy(), mode='lines', name='Prediction'))
fig.update_layout(title=f'Stock Prediction ({meta["stock"]})',
                   xaxis_title='Day',
                   yaxis_title='Scaled Price')
fig.show()

In [35]:
import math
from typing import Generator
def gen_sequence(size: int, periods: int = 12) -> Generator[float, None, None]:
    for i in range(size):
        yield (math.cos(((2 * math.pi) / (size/periods)) * i) + 1)

In [643]:
def gen_gbm(total: int, period: int, start_amount: float, drift: float, volatility: float) -> Iterator[float]:
    current_amt = start_amount
    for i in range(total):
        c = (current_amt * drift * period) + \
            (current_amt * volatility * np.random.normal(0, math.sqrt(period))) + \
            math.cos(2 * math.pi * i * period) + .5
        yield current_amt + c
        current_amt += c

In [644]:
def gen_df(seq: Iterator[float], start_date: datetime, store_dist=[10, 7, 4, 2, 1]):
    current = start_date
    dst = np.array([np.random.uniform(0, i) for i in store_dist])
    dst = dst / dst.sum()
    cols = ['date', 'store_id', 'earnings']
    df = pd.DataFrame(columns=cols)
    for p in seq:
        rows = pd.DataFrame(
            [(current.strftime('%Y-%m-%d'), i+1, dst[i]*p) for i in range(len(dst))],
            columns=cols)
        df = df.append(rows)
        current = current + timedelta(days=1)
    return df

In [None]:
import math
import numpy as np
import pandas as pd
def generate_sales(total: int, period: int, 
                   start_amount: float, drift: float, 
                   volatility: float, start_date: datetime, 
                   store_dist=[10, 7, 4, 2, 1], output_dir=None):
    pass

In [645]:
days = 23
gbm_seq = gen_gbm(days, (1/365.), 1500, .01, .6)
df = gen_df(gbm_seq, datetime.now() - timedelta(days=days))
print(df)

          date store_id    earnings
0   2021-03-31        1  818.237703
1   2021-03-31        2  340.985796
2   2021-03-31        3  214.169224
3   2021-03-31        4   85.356899
4   2021-03-31        5   75.057017
..         ...      ...         ...
0   2021-04-22        1  964.877622
1   2021-04-22        2  402.095336
2   2021-04-22        3  252.551417
3   2021-04-22        4  100.654079
4   2021-04-22        5   88.508310

[115 rows x 3 columns]


In [628]:
store_dist=np.array([14, 7, 4, 3, .1])
dst = np.array([np.random.uniform(0, i) for i in store_dist])
dst2 = dst / dst.sum()
print(dst2)
da = datetime.now()
[(da, i+1, dst2[i]*123.2) for i in range(len(dst2))]


[0.50620223 0.28345804 0.11301877 0.09346942 0.00385154]


[(datetime.datetime(2021, 4, 23, 12, 9, 52, 630099), 1, 62.36411471373844),
 (datetime.datetime(2021, 4, 23, 12, 9, 52, 630099), 2, 34.92203071841387),
 (datetime.datetime(2021, 4, 23, 12, 9, 52, 630099), 3, 13.923912251237965),
 (datetime.datetime(2021, 4, 23, 12, 9, 52, 630099), 4, 11.515432144898497),
 (datetime.datetime(2021, 4, 23, 12, 9, 52, 630099), 5, 0.474510171711221)]

In [556]:
from datetime import timedelta
sz = 10
start = datetime.now() - timedelta(days=sz)
[ start+timedelta(days=i) for i in range(sz) ]


[datetime.datetime(2021, 4, 13, 11, 51, 53, 40887),
 datetime.datetime(2021, 4, 14, 11, 51, 53, 40887),
 datetime.datetime(2021, 4, 15, 11, 51, 53, 40887),
 datetime.datetime(2021, 4, 16, 11, 51, 53, 40887),
 datetime.datetime(2021, 4, 17, 11, 51, 53, 40887),
 datetime.datetime(2021, 4, 18, 11, 51, 53, 40887),
 datetime.datetime(2021, 4, 19, 11, 51, 53, 40887),
 datetime.datetime(2021, 4, 20, 11, 51, 53, 40887),
 datetime.datetime(2021, 4, 21, 11, 51, 53, 40887),
 datetime.datetime(2021, 4, 22, 11, 51, 53, 40887)]

In [557]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=list(gen_gbm(365*5, (1/365.), 0.09, .01, .6)), mode='lines', name='Actual'))
fig.update_layout(title=f'Geometric Brownian Motion (with superimposed period based scaled cosine wave)', xaxis_title='index', yaxis_title='value')