In [1]:
import sys
sys.path.append('..')
from data.process import StockDataProcessor
import data.config as dataconf

In [2]:
import numpy as np

In [3]:
data_processor = StockDataProcessor()

In [4]:
storage_path = '../data/raw.csv'
data_processor.load_raw_to_df(storage_path, dataconf.tickers)

In [5]:
data_processor.df

Unnamed: 0,Open,High,Low,Close,Volume,Ticker,Collect Date
0,16.100000,16.396667,15.942000,16.312668,92439000.0,TSLA,2019-10-01
1,16.219334,16.309999,15.962000,16.208668,84471000.0,TSLA,2019-10-02
2,15.457333,15.632000,14.952000,15.535333,226267504.0,TSLA,2019-10-03
3,15.440667,15.652000,15.204667,15.428667,119925000.0,TSLA,2019-10-04
4,15.320000,15.904000,15.236667,15.848000,120963000.0,TSLA,2019-10-07
...,...,...,...,...,...,...,...
65560,2894.000000,2931.500000,2863.000000,2882.500000,24980800.0,7203.T,2025-01-24
65561,2913.500000,2941.500000,2910.500000,2922.000000,18257200.0,7203.T,2025-01-27
65562,2900.000000,2936.000000,2889.500000,2889.500000,18314000.0,7203.T,2025-01-28
65563,2917.000000,2936.500000,2898.500000,2930.000000,17997800.0,7203.T,2025-01-29


In [6]:
features = ['Close', 'Volume']
data_processor.handle_missing_data(dataconf.start_date, dataconf.end_date, features=features)
data_processor.scale(features=features)
data_processor.select_feature(features=features)

In [7]:
data_processor.df

Unnamed: 0,Close,Volume,Collect Date,Ticker
67,-0.361416,7.191827,2020-01-01,TSLA
68,-0.358438,6.637111,2020-01-02,TSLA
69,-0.355254,12.707194,2020-01-03,TSLA
70,-0.353125,7.079427,2020-01-06,TSLA
71,-0.348751,12.783453,2020-01-07,TSLA
...,...,...,...,...
65538,10.636154,2.325211,2024-12-25,7203.T
65539,11.300889,2.760062,2024-12-26,7203.T
65540,11.473159,2.040668,2024-12-27,7203.T
65541,11.315869,0.809383,2024-12-30,7203.T


In [8]:
X_train, y_train, X_val, y_val, X_test, y_test = data_processor.split_train_val_test(window_for_x=40,
                                                                                     window_for_y=5,
                                                                                     val_size=0.1,
                                                                                     test_size=0.1,
                                                                                     features=features,
                                                                                     target_col='Close')

In [9]:
X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape

((47094, 40, 2),
 (47094, 5),
 (4042, 40, 2),
 (4042, 5),
 (4042, 40, 2),
 (4042, 5))

## Baseline

### Last Days

In [10]:
from baseline import BaselineLastDayModel

In [11]:
model = BaselineLastDayModel()
y_train_pred = model.predict(X_train, ndays=5)
y_val_pred = model.predict(X_val, ndays=5)
y_test_pred = model.predict(X_test, ndays=5)
y_train_pred.shape, y_val_pred.shape, y_test_pred.shape

((47094, 5), (4042, 5), (4042, 5))

In [12]:
from common import mse, mae

In [13]:
model.evaluate_model(y_true=y_train, y_pred=y_train_pred, metric=mse)

(array([0.00857969, 0.01713958, 0.02712367, 0.03552309, 0.04417002],
       dtype=float32),
 0.02650721)

In [14]:
model.evaluate_model(y_true=y_val, y_pred=y_val_pred, metric=mse)

(array([0.01045789, 0.02028568, 0.03077288, 0.04323151, 0.05812694],
       dtype=float32),
 0.032574978)

In [15]:
y_test

array([[ 9.905915  ,  9.836011  ,  9.98318   ,  9.898558  ,  9.964784  ],
       [ 9.836011  ,  9.98318   ,  9.898558  ,  9.964784  ,  9.652048  ],
       [ 9.98318   ,  9.898558  ,  9.964784  ,  9.652048  ,  9.49752   ],
       ...,
       [-0.1274993 , -0.11877347, -0.11877347, -0.11836152, -0.12259334],
       [-0.11877347, -0.11877347, -0.11836152, -0.12259334, -0.12667538],
       [-0.11877347, -0.11836152, -0.12259334, -0.12667538, -0.1274993 ]],
      dtype=float32)

In [16]:
def inverse_y(y):
    ndays = y.shape[1]
    y_new = np.zeros(y.shape)
    for i in range(ndays):
        y_new[:, i] = data_processor.inverse_transform(y[:, i], 'Close')[:, 0]
    return y_new

In [17]:
y_test_scaled = inverse_y(y_test)
y_test_pred_scaled = inverse_y(y_test_pred)
model.evaluate_model(y_test_scaled, y_test_pred_scaled, mse)

(array([1302.00112662, 2284.88861635, 3279.70038298, 3923.86798261,
        4452.61393196]),
 3048.614408103344)

### Moving Average

In [18]:
from baseline import BaselineMAModel

In [19]:
model = BaselineMAModel()
y_train_pred = model.predict(X_train, ndays=5)
y_val_pred = model.predict(X_val, ndays=5)
y_test_pred = model.predict(X_test, ndays=5)
y_train_pred.shape, y_val_pred.shape, y_test_pred.shape

((47094, 5), (4042, 5), (4042, 5))

In [20]:
model.evaluate_model(y_true=y_train, y_pred=y_train_pred, metric=mse)

(array([0.03336056, 0.0394291 , 0.04570048, 0.05191565, 0.05828492]),
 0.04573814135792863)

In [21]:
model.evaluate_model(y_true=y_val, y_pred=y_val_pred, metric=mse)

(array([0.04430977, 0.05216096, 0.06133201, 0.07132456, 0.08231013]),
 0.06228748496916357)

In [22]:
y_test_scaled = inverse_y(y_test)
y_test_pred_scaled = inverse_y(y_test_pred)
model.evaluate_model(y_test_scaled, y_test_pred_scaled, mse)

(array([2688.92233386, 3142.65854376, 3560.8380593 , 3928.23160023,
        4266.91285109]),
 3517.5126776461357)

## Deep

In [23]:
import torch.nn as nn
import torch.optim as optim

### Transformer

In [24]:
from transformer import LocalTransformerModel

In [28]:
model = LocalTransformerModel(optimizer_class=optim.Adam, optimizer_params={"betas": (0.9, 0.999), "eps": 1e-8},
                              input_dim=2, d_model=64, nhead=8, window=5,
                              num_encoder_layers=4, dim_ff=256, dropout=0.1,
                              max_len=70, output_dim=1, ndays=5)

In [29]:
model

LocalTransformerModel(
  (input_linear): Linear(in_features=2, out_features=64, bias=True)
  (pos_encoder): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-3): 4 x LocalTransformerEncoderLayer(
        (self_attention): LocalAttention(
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
          )
        )
        (linear1): Linear(in_features=64, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
        (activation): ReLU()
      )
    )
    (norm): LayerNorm((64,), eps=1e-05, e

In [30]:
model.train_model(X=X_train, y=y_train, loss_fn=nn.MSELoss(), num_epochs=10, lr=5e-4, batch_size=32, 
                   X_val=X_val, y_val=y_val, use_warmup=True, warmup_epochs=3, 
                   scheduler_type="cosine", scheduler_params={"T_max": 25})

Epoch 1/10, Train Loss: 9.4137, Val Loss: 17.0801
Epoch 2/10, Train Loss: 3.2006, Val Loss: 3.8153
Epoch 3/10, Train Loss: 0.8778, Val Loss: 1.6462
Epoch 4/10, Train Loss: 0.4942, Val Loss: 1.2012
Epoch 5/10, Train Loss: 0.4101, Val Loss: 1.8593
Epoch 6/10, Train Loss: 0.3632, Val Loss: 0.9399
Epoch 7/10, Train Loss: 0.2976, Val Loss: 1.1735
Epoch 8/10, Train Loss: 0.2959, Val Loss: 1.0406
Epoch 9/10, Train Loss: 0.2199, Val Loss: 0.3302
Epoch 10/10, Train Loss: 0.2127, Val Loss: 0.3171


In [28]:
model = LocalTransformerModel(optimizer_class=optim.Adam, optimizer_params={"betas": (0.9, 0.999), "eps": 1e-8},
                              input_dim=2, d_model=64, nhead=8, window=5,
                              num_encoder_layers=4, dim_ff=256, dropout=0.1,
                              max_len=70, output_dim=1, ndays=5)

In [29]:
model

LocalTransformerModel(
  (input_linear): Linear(in_features=2, out_features=64, bias=True)
  (pos_encoder): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-3): 4 x LocalTransformerEncoderLayer(
        (self_attention): LocalAttention(
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
          )
        )
        (linear1): Linear(in_features=64, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
        (activation): ReLU()
      )
    )
    (norm): LayerNorm((64,), eps=1e-05, e

In [30]:
model.train_model(X=X_train, y=y_train, loss_fn=nn.MSELoss(), num_epochs=20, lr=5e-4, batch_size=32, 
                   X_val=X_val, y_val=y_val, use_warmup=True, warmup_epochs=5, 
                   scheduler_type="cosine", scheduler_params={"T_max": 25})

Epoch 1/20, Train Loss: 9.0862, Val Loss: 16.6313
Epoch 2/20, Train Loss: 3.8974, Val Loss: 5.7445
Epoch 3/20, Train Loss: 0.7506, Val Loss: 1.7251
Epoch 4/20, Train Loss: 0.4560, Val Loss: 1.4745
Epoch 5/20, Train Loss: 0.3708, Val Loss: 0.4082
Epoch 6/20, Train Loss: 0.3640, Val Loss: 0.2743
Epoch 7/20, Train Loss: 0.3741, Val Loss: 0.3060
Epoch 8/20, Train Loss: 0.2235, Val Loss: 0.4058
Epoch 9/20, Train Loss: 0.2035, Val Loss: 0.2150
Epoch 10/20, Train Loss: 0.1951, Val Loss: 0.1289
Epoch 11/20, Train Loss: 0.1947, Val Loss: 0.1144
Epoch 12/20, Train Loss: 0.1608, Val Loss: 0.3876
Epoch 13/20, Train Loss: 0.1650, Val Loss: 0.1078
Epoch 14/20, Train Loss: 0.1377, Val Loss: 0.1949
Epoch 15/20, Train Loss: 0.1285, Val Loss: 0.3619
Epoch 16/20, Train Loss: 0.1097, Val Loss: 0.6132
Epoch 17/20, Train Loss: 0.1130, Val Loss: 0.1346
Epoch 18/20, Train Loss: 0.0987, Val Loss: 0.2452
Epoch 19/20, Train Loss: 0.1004, Val Loss: 0.2346
Epoch 20/20, Train Loss: 0.0886, Val Loss: 0.3711


In [34]:
model.train_model(X=X_train, y=y_train, loss_fn=nn.MSELoss(), num_epochs=10, lr=1e-5, batch_size=32, 
                   X_val=X_val, y_val=y_val, use_warmup=False, 
                   scheduler_type="cosine", scheduler_params={"T_max": 25})

Epoch 21/10, Train Loss: 0.0701, Val Loss: 0.1636
Epoch 22/10, Train Loss: 0.0696, Val Loss: 0.2079
Epoch 23/10, Train Loss: 0.0679, Val Loss: 0.1941
Epoch 24/10, Train Loss: 0.0694, Val Loss: 0.1768
Epoch 25/10, Train Loss: 0.0694, Val Loss: 0.1619
Epoch 26/10, Train Loss: 0.0663, Val Loss: 0.1999
Epoch 27/10, Train Loss: 0.0683, Val Loss: 0.1738
Epoch 28/10, Train Loss: 0.0619, Val Loss: 0.2024
Epoch 29/10, Train Loss: 0.0657, Val Loss: 0.1821
Epoch 30/10, Train Loss: 0.0663, Val Loss: 0.1264


In [35]:
model.save_model(file_path = 'local_transformer_model_checkpoint2.pth')

In [36]:
y_train_pred = model.predict(X_train)
y_val_pred = model.predict(X_val)
y_test_pred = model.predict(X_test)
y_train_pred.shape, y_val_pred.shape, y_test_pred.shape

((47094, 5), (4042, 5), (4042, 5))

In [37]:
def evaluate_model(y_true, y_pred, metric):
    ndays = y_true.shape[1]
    perdays = np.array([metric(y_true[:, i], y_pred[:, i]) for i in range(ndays)])
    return perdays, perdays.mean()


In [38]:
evaluate_model(y_true=y_train, y_pred=y_train_pred, metric=mse)

(array([0.03489774, 0.04213201, 0.04968512, 0.0563923 , 0.0639586 ],
       dtype=float32),
 0.049413152)

In [39]:
evaluate_model(y_true=y_val, y_pred=y_val_pred, metric=mse)

(array([0.10379081, 0.11444955, 0.12340838, 0.1393262 , 0.15085594],
       dtype=float32),
 0.12636617)

In [40]:
y_test_scaled = inverse_y(y_test)
y_test_pred_scaled = inverse_y(y_test_pred)
evaluate_model(y_test_scaled, y_test_pred_scaled, mse)

(array([ 9738.89443676,  9871.06211777,  9923.38415015,  9848.7558813 ,
        10028.53088546]),
 9882.125494288204)

In [41]:
y_test_scaled

array([[2769.5090332 , 2750.84277344, 2790.14038086, 2767.54418945,
        2785.22827148],
       [2750.84277344, 2790.14038086, 2767.54418945, 2785.22827148,
        2701.72045898],
       [2790.14038086, 2767.54418945, 2785.22827148, 2701.72045898,
        2660.45776367],
       ...,
       [  90.34999847,   92.68000031,   92.68000031,   92.79000092,
          91.66000366],
       [  92.68000031,   92.68000031,   92.79000092,   91.66000366,
          90.56999969],
       [  92.68000031,   92.79000092,   91.66000366,   90.56999969,
          90.34999847]])

In [42]:
y_test_pred_scaled

array([[2705.38232422, 2710.86254883, 2707.14355469, 2713.21118164,
        2712.32861328],
       [2739.77490234, 2745.0546875 , 2741.20898438, 2747.37060547,
        2746.10839844],
       [2755.71679688, 2760.89892578, 2757.42919922, 2763.30371094,
        2762.64648438],
       ...,
       [  59.43575287,   59.54277802,   60.88783646,   62.11122513,
          57.31735229],
       [  58.12192535,   59.48018646,   59.85539246,   60.28598022,
          58.67137909],
       [  57.44210052,   59.71182251,   59.30269623,   59.55162048,
          59.93556213]])