In [1]:
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

from datasets.SP100Stocks import SP100Stocks
from notebooks.models import TGCN, A3TGCN, DCGNN, train, evaluate_classification

# Buy or Sell ? Stocks classifier
The goal of this task is to classify stocks into buy or sell categories based on past performance. The model should predict if the next day is higher or lower than the current day, with past 5 weeks of knowledge. There are no patterns in stock movements, so neural networks have a hard time forecasting the next timestamps. The intuition is that they can, however, capture up/down trends.

## Loading the data
The data from the custom PyG dataset for forecasting is loaded into a PyTorch dataloader.
A "transform" is applied to change the targets `y` of the dataset to a binary buy/sell class instead of the close price. 

In [2]:
def future_close_price_to_buy_sell_class(sample: Data):
	"""
	Transforms the target y to a binary buy (1) / sell (0) class considering the close price at the end of the target vs. the last timestep of x
	:param sample: Data sample
	:return: The transformed sample
	"""
	sample.y = (sample.close_price_y[:, -1] > sample.close_price[:, -1]).float().unsqueeze(1)
	return sample

In [3]:
dataset = SP100Stocks(transform=future_close_price_to_buy_sell_class)
dataset, dataset[0]

(SP100Stocks(1209),
 Data(x=[100, 5, 25], edge_index=[2, 524], y=[100, 1], edge_weight=[524], close_price=[100, 25], close_price_y=[100, 1]))

In [4]:
for i in range(5):
	print(f"Close price a time t: {dataset[0].close_price[i, -1:].item():.2f}$ vs. Close price at time t+1: {dataset[0].close_price_y[i, -1:].item():.2f}$, Target decision: {dataset[0].y[i].item()} ({['Sell', 'Buy'][dataset[0].y[i].long()]})")

Close price a time t: 48.95$ vs. Close price at time t+1: 50.06$, Target decision: 1.0 (Buy)
Close price a time t: 55.46$ vs. Close price at time t+1: 55.02$, Target decision: 0.0 (Sell)
Close price a time t: 80.67$ vs. Close price at time t+1: 81.20$, Target decision: 1.0 (Buy)
Close price a time t: 180.94$ vs. Close price at time t+1: 181.42$, Target decision: 1.0 (Buy)
Close price a time t: 306.87$ vs. Close price at time t+1: 308.55$, Target decision: 1.0 (Buy)


In [5]:
train_part = .9
batch_size = 64

train_dataset, test_dataset = dataset[:int(train_part * len(dataset))], dataset[int(train_part * len(dataset)):]
print(f"Train dataset: {len(train_dataset)}, Test dataset: {len(test_dataset)}")
train_dataloader, test_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True), DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=True)

Train dataset: 1088, Test dataset: 121


## Training
The previously implemented models are used, trained using the training dataset and the Adam optimizer. The `weight_decay` parameter is used for L2 regularization, to follow the T-GCN papers methodology. The loss is calculated using the Binary Cross Entropy (BCE) loss function.

In [6]:
in_channels, out_channels, hidden_size, layers_nb = dataset[0].x.shape[-2], 1, 16, 2
model = A3TGCN(in_channels, out_channels, hidden_size, layers_nb)

lr, weight_decay, num_epochs = 0.005, 1e-5, 64

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
model

A3TGCN(
  (cells): ModuleList(
    (0): TGCNCell(
      (gcn): GCN(
        (convs): ModuleList(
          (0): GCNConv(5, 16)
          (1): GCNConv(16, 16)
        )
      )
      (lin_u): Linear(in_features=37, out_features=16, bias=True)
      (lin_r): Linear(in_features=37, out_features=16, bias=True)
      (lin_c): Linear(in_features=37, out_features=16, bias=True)
    )
    (1): TGCNCell(
      (gcn): GCN(
        (convs): ModuleList(
          (0-1): 2 x GCNConv(16, 16)
        )
      )
      (lin_u): Linear(in_features=48, out_features=16, bias=True)
      (lin_r): Linear(in_features=48, out_features=16, bias=True)
      (lin_c): Linear(in_features=48, out_features=16, bias=True)
    )
  )
  (attention): Sequential(
    (0): Linear(in_features=16, out_features=1, bias=True)
    (1): Softmax(dim=1)
  )
  (out): Linear(in_features=16, out_features=1, bias=True)
)

In [None]:
train(model, optimizer, criterion, train_dataloader, test_dataloader, num_epochs, "BuyOrSell", measure_acc=True)

Epochs:  53%|█████▎    | 34/64 [32:02<28:07, 56.25s/it, Batch=11.8%] 

## Results

### Results on train data

In [None]:
acc, tp, tn, fp, fn = evaluate_classification(model, train_dataloader)

print(f"Train accuracy: {acc * 100:.1f}%\nTrain confusion matrix:\n{np.array([[tn, fp], [fn, tp]])}")

### Results on test data

In [None]:
acc, tp, tn, fp, fn = evaluate_classification(model, test_dataloader)

print(f"Test accuracy: {acc * 100:.1f}%\nTest confusion matrix:\n{np.array([[tn, fp], [fn, tp]])}")