In [48]:
import torch
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime

In [49]:
def get_ticker_data(stock: str, start: datetime = None, end: datetime = None):
    beg = datetime(1970, 1, 1)
    if start == None:
        start = beg
    if end == None:
        end = datetime.now()

    sdate = int((start - beg).total_seconds())
    edate = int((end - beg).total_seconds())

    url = f'https://query1.finance.yahoo.com/v7/finance/download/{stock}?period1={sdate}&period2={edate}&interval=1d&events=history&includeAdjustedClose=true'
    return pd.read_csv(url, parse_dates=True)

In [51]:
msft = get_ticker_data('MSFT')
msft['Close']

0         0.097222
1         0.100694
2         0.102431
3         0.099826
4         0.098090
           ...    
8836    249.070007
8837    247.860001
8838    249.899994
8839    253.250000
8840    255.850006
Name: Close, Length: 8841, dtype: float64

In [None]:
smin, smax = msft[['Close']].min(), msft[['Close']].max()

In [None]:
mclose = msft[['Close']]
scaled = (mclose-smin)/(smax-smin)

In [None]:
def split_data(stock_val, lookback):
    data_raw = stock_val.values # convert to numpy array
    data = []
    
    # create all possible sequences of length seq_len
    for index in range(len(data_raw) - lookback): 
        data.append(data_raw[index: index + lookback])
    
    data = np.array(data)
    test_set_size = int(np.round(0.2*data.shape[0]))
    train_set_size = data.shape[0] - (test_set_size)
    
    x_train = data[:train_set_size,:-1,:]
    y_train = data[:train_set_size,-1,:]
    
    x_test = data[train_set_size:,:-1]
    y_test = data[train_set_size:,-1,:]

    x_train = torch.from_numpy(x_train).type(torch.Tensor)
    x_test = torch.from_numpy(x_test).type(torch.Tensor)
    y_train = torch.from_numpy(y_train).type(torch.Tensor)
    y_test = torch.from_numpy(y_test).type(torch.Tensor)
    
    return [x_train, y_train, x_test, y_test]

In [None]:
xt, yt, xe, ye = split_data(mclose, 5)