In [26]:
%load_ext autoreload
%autoreload 2

### Load Data 

In [30]:

from typing import Callable
from pandas import DataFrame

from analysis.analyzers.data_segmenter import DataSegmenter
from analysis.featurizers.ohlcv_to_ohlcv_and_deltas import ohlcv_to_ohlcv_and_deltas
from analysis.segmenters.striding_segmenter import striding_segmenter_builder

# EXPERIMENTAL PARAMETER
transformation: Callable[..., DataFrame] = ohlcv_to_ohlcv_and_deltas

# EXPERIMENTAL PARAMETERS
length = 10
stride = 8

segmenter = striding_segmenter_builder(
    length=length,
    stride=stride,
)

m: DataSegmenter = DataSegmenter(
    'SPY',
    transformation=transformation,
    segmenter=segmenter,
)

In [33]:
m.load_historical()

### Featurize segments

In [34]:
m.transform_data()

Transforming data...
CALLING
Index(['date', 'open', 'high', 'low', 'close', 'volume'], dtype='object')


#### Segmentation

In [35]:
from pandas.core.frame import DataFrame

segments: list[DataFrame] = m.segment_data()
m.save_segmented_data()

In [36]:
m.segments[0]
# m.df

Unnamed: 0,date,open,high,low,close,volume,open_delta,high_delta,low_delta,close_delta
1,2023-08-01 13:31:00+00:00,456.225,456.39,456.14,456.285,6271,-0.000186,-0.000197,0.000197,0.000164
2,2023-08-01 13:32:00+00:00,456.265,456.535,456.135,456.515,13528,8.8e-05,0.000318,-1.1e-05,0.000504
3,2023-08-01 13:33:00+00:00,456.49,456.52,456.265,456.365,3271,0.000493,-3.3e-05,0.000285,-0.000329
4,2023-08-01 13:34:00+00:00,456.38,456.645,456.315,456.615,3839,-0.000241,0.000274,0.00011,0.000548
5,2023-08-01 13:35:00+00:00,456.625,456.77,456.545,456.74,6076,0.000537,0.000274,0.000504,0.000274
6,2023-08-01 13:36:00+00:00,456.74,456.8,456.68,456.77,10815,0.000252,6.6e-05,0.000296,6.6e-05
7,2023-08-01 13:37:00+00:00,456.77,456.955,456.665,456.75,22868,6.6e-05,0.000339,-3.3e-05,-4.4e-05
8,2023-08-01 13:38:00+00:00,456.74,456.81,456.645,456.68,6844,-6.6e-05,-0.000317,-4.4e-05,-0.000153
9,2023-08-01 13:39:00+00:00,456.67,456.74,456.505,456.74,1279,-0.000153,-0.000153,-0.000307,0.000131
10,2023-08-01 13:40:00+00:00,456.745,456.88,456.635,456.77,3904,0.000164,0.000307,0.000285,6.6e-05


# Training

We create a `model`, specify a `loss function`, and build a `trainer` to train the model on our `data`.

In [28]:
from numpy import ndarray
import torch.nn as nn
from analysis.trainers.stock_transformer import StockTransformer
from analysis.trainers.trainer import Trainer

## Data ##
data: list[ndarray] = m.vectorized_segments

# ---

# EXPERIMENTAL PARAMETERS
data_dimension: int = m.vectorized_segments[0].shape[0]
number_of_attention_heads = 6  # Must divide data_dimension and should be even.
number_of_layers = 2
feedforward_dimension = 256
## Loss function ##
loss_function = nn.MSELoss()
# loss_function = nn.L1Loss()
number_of_epochs = 17

# ---

## Model ##
model: nn.Module = StockTransformer(
            d_model=data_dimension,
            nhead=number_of_attention_heads,
            num_layers=number_of_layers,
            dim_feedforward=feedforward_dimension, 
        )

# ---

# Feed model, loss function, and data into trainer
trainer: Trainer = Trainer(
    data=data,
    model=model,
    criterion=loss_function,
    number_of_epochs=number_of_epochs,
)

In [29]:
trainer.train()

Epoch 1/17, Train Loss: 0.623952, Val Loss: 2.267972
Epoch 2/17, Train Loss: 0.518680, Val Loss: 2.164746
Epoch 3/17, Train Loss: 0.486208, Val Loss: 2.112165
Epoch 4/17, Train Loss: 0.471497, Val Loss: 2.089574
Epoch 5/17, Train Loss: 0.462668, Val Loss: 2.071944
Epoch 6/17, Train Loss: 0.458997, Val Loss: 2.059543
Epoch 7/17, Train Loss: 0.449323, Val Loss: 2.050164
Epoch 8/17, Train Loss: 0.444632, Val Loss: 2.042409
Epoch 9/17, Train Loss: 0.439994, Val Loss: 2.033816
Epoch 10/17, Train Loss: 0.436633, Val Loss: 2.028841
Epoch 11/17, Train Loss: 0.431880, Val Loss: 2.020144
Epoch 12/17, Train Loss: 0.428982, Val Loss: 2.016061
Epoch 13/17, Train Loss: 0.425830, Val Loss: 2.011138
Epoch 14/17, Train Loss: 0.423309, Val Loss: 2.006299
Epoch 15/17, Train Loss: 0.420951, Val Loss: 2.004784
Epoch 16/17, Train Loss: 0.419081, Val Loss: 2.002901
Epoch 17/17, Train Loss: 0.416345, Val Loss: 2.003996


In [30]:
test_loss = trainer.evaluate_model_on_test()

print(f'Test Loss: {test_loss}')

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices