Import relevant libraries

In [1]:
import pandas as pd
from Preprocessing.preprocessing import preprocessing
import time
import torch
from tsai.all import *

print('GPU support:', torch.cuda.is_available())
computer_setup()

GPU support: True
os              : Windows-10-10.0.19044-SP0
python          : 3.9.16
tsai            : 0.3.5
fastai          : 2.7.11
fastcore        : 1.5.28
torch           : 1.13.1
device          : 1 gpu (['NVIDIA GeForce GTX 970'])
cpu cores       : 4
threads per cpu : 1
RAM             : 15.95 GB
GPU memory      : [4.0] GB


In [2]:
from Optimizer.optimizer import optimize_model

df = pd.read_csv('Data\Stock\StockBars\MSFT_Minute')
 
preprocessing_params = {
    'df': df[:1000],
    'lag': 1,
    'dif_all': True,
    'train_size': 0.8,
    'TSAI': True,
    'CLF': True,
    'index': None,
    'data': "alpacca",
    'buckets': 1
}

model_type = 'tst_class'

opti = True
if opti:
    optimize_model(model_type=model_type, preprocessing_params=preprocessing_params, n_trials=2)

results_df = pd.read_csv(f"models/{model_type}/{model_type}_hyperparameters_results.csv")
results_df

[32m[I 2023-03-17 12:03:08,205][0m A new study created in memory with name: no-name-be9f8baa-1e4e-45e3-92a4-438578ba9b55[0m


300
300
torch.Size([699, 9, 300])


[33m[W 2023-03-17 12:03:59,382][0m Trial 0 failed with parameters: {'seq_length': 300, 'batch_size': 16, 'learning_rate': 1.3336466469356106e-06, 'd_model': 256, 'n_layers': 8, 'n_heads': 32, 'd_ff': 1024, 'dropout': 0.2} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "c:\Users\lukas\anaconda3\envs\ai\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "c:\Users\lukas\Programmering\Investment AI\Master-Thesis\Optimizer\optimizer.py", line 110, in objective
    learn.fit_one_cycle(15, lr_max=learning_rate)
  File "c:\Users\lukas\anaconda3\envs\ai\lib\site-packages\fastai\callback\schedule.py", line 119, in fit_one_cycle
    self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd, start_epoch=start_epoch)
  File "c:\Users\lukas\anaconda3\envs\ai\lib\site-packages\fastai\learner.py", line 264, in fit
    self._with_events(self._do_fit, 'fit', CancelFitEx

KeyboardInterrupt: 

In [None]:
model_type = 'lstm_class'

with open(f"models/{model_type}/{model_type}_best_params.json", "r") as f:
    best_params = json.load(f)

best_params

{'seq_length': 200,
 'batch_size': 16,
 'hidden_size': 25,
 'n_layers': 1,
 'rnn_dropout': 0.5,
 'fc_dropout': 0.4,
 'learning_rate': 0.0019302736528178678}

Select a stock for Machine Learning Model Training and preprocesses it

In [None]:
# Preprocess the data using a custom function and split it into training and testing sets
# Only the training and testing sets are used, so the third variable (a scalar) is discarded using an underscore

seq_length = best_params.pop('seq_length', None)
data_train, data_test, _ = preprocessing(**preprocessing_params, sequence_length=seq_length, print_info=True)

# Changes the data into features and labels with the split used later in TSAI for modelling
X, y, splits = combine_split_data([data_train[0], data_test[0]],[data_train[1], data_test[1]])

# Utilizes the GPU if possible
if torch.cuda.is_available(): X, y = X.cuda(), y.cuda()

batch_size = best_params.pop('batch_size', None)
dsets = TSDatasets(X, y, splits=splits)
dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=batch_size)

# Note this tabel is before sequenceing so the actuall values is total - sequence then times train_size

   Label  Count  Train count  Test count  Bucket min  Bucket max
0    0.0    432          338          93     -1.0488     -0.0001
1    1.0    566          460         106      0.0000      0.9588
2  Total    998          798         199     -1.0488      0.9588


Initializes the models and learners

In [None]:
nr_features = X.shape[1] # Number of features
nr_labels = torch.unique(y).numel() # Number of labels

model_params = {key: value for key, value in best_params.items() if key not in ('seq_length', 'batch_size', 'learning_rate')}

# Initiates the models
model_lstm_fcn = LSTM_FCNPlus(c_in=nr_features, c_out=nr_labels, **model_params, shuffle=False)
#model_lstm = LSTMPlus(c_in=nr_features, c_out=nr_labels, seq_len=sequence_length)
#model_tst = TST(c_in=nr_features, c_out=nr_labels, seq_len=sequence_length)
#model_xcm = XCMPlus(c_in=nr_features, c_out=nr_labels, seq_len=sequence_length)

models = {'LSTM_FCNPlus': model_lstm_fcn}#, 'LSTMPlus': model_lstm, 'TST': model_tst, 'XCMPlus': model_xcm}

# Create Learner objects
binary_classification_metrics = [accuracy]

learn_lstm_fcn = Learner(dls, model_lstm_fcn, loss_func=LabelSmoothingCrossEntropyFlat(), metrics=accuracy)
#learn_lstm = Learner(dls, model_lstm, loss_func=LabelSmoothingCrossEntropyFlat())
#learn_tst = Learner(dls, model_tst, loss_func=LabelSmoothingCrossEntropyFlat())
#learn_xcm = Learner(dls, model_xcm, loss_func=LabelSmoothingCrossEntropyFlat())

learners = {'LSTM_FCNPlus': learn_lstm_fcn}#, 'LSTMPlus': learn_lstm, 'TST': learn_tst, 'XCMPlus': learn_xcm}


TypeError: __init__() got an unexpected keyword argument 'n_layers'

Find optimal learning rates

In [None]:
results = pd.DataFrame(columns=['Model Name', 'Rate', 'Accuracy', 'Training Time'])

epochs = 3

learning_rate = best_params.pop('learning_rate', None)


for name, learner in learners.items():
    start_time = time.time()
    learner.fit_one_cycle(n_epoch=epochs, lr_max=learning_rate)
    end_time = time.time()
    training_time = round(end_time - start_time, 2)

    loss = learner.validate()

loss

(#2) [0.7115873694419861,0.5]