In [1]:
import torch
from torch import nn
from torch.optim import Adam
import pandas as pd
import numpy as np
import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"

from sklearn.datasets import make_regression

In [2]:
samples = 5000

X, y = make_regression(n_samples = samples, n_features = 24, n_informative = 2)

split = 0.9

X_train, X_test = X[:int(samples * split)], X[-int(samples * (1-split)):]
y_train, y_test = y[:int(samples * split)], y[-int(samples * (1-split)):]

In [3]:
import yfinance as yf

In [4]:
prices = yf.download(['PETR4.SA', 'OIBR3.SA'])['Adj Close']

[*********************100%***********************]  2 of 2 completed


In [5]:
dataframe = {}
for asset in prices.columns:
    
    features= pd.DataFrame()
    for window in range(1, 25):
        features[window] = np.log(prices[asset]) - np.log(prices[asset].shift(window * 20))
        
    target = np.log(prices[asset].shift(-20)) - np.log(prices[asset])
    feature_n_target = pd.concat([features, target], axis=1).reset_index(drop=True)
    feature_n_target = feature_n_target.rename({asset: 'TARGET'}, axis=1)
                       
    dataframe[asset] = feature_n_target
    
train_data = pd.DataFrame()
for asset in prices.columns:
    
    train_data = train_data.append(dataframe[asset])
    
train_data.dropna(inplace=True)

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X = np.array(train_data.drop(['TARGET'], axis=1))
y = np.array(train_data['TARGET'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [8]:
X_train.shape

(6289, 24)

In [9]:
N = X_train.shape[0]
E = X_train.shape[1]

query = torch.tensor(X_train.reshape(1, N, E).astype(np.float32)).to(device)
key = torch.tensor(X_train.reshape(1, N, E).astype(np.float32)).to(device)
value = torch.tensor(X_train.reshape(1, N, E).astype(np.float32)).to(device)
y_train_torch = torch.tensor(y_train.astype(np.float32)).to(device)

In [10]:
class NnAttention(nn.Module):
    def __init__(self, embedding_dim, attention_heads = 1):
        super(NnAttention, self).__init__()
        
        self.embedding_dim = embedding_dim
        
        self.multihead_attn = nn.MultiheadAttention(embedding_dim, attention_heads)
        
        self.linear_relu = nn.Sequential(
            nn.Linear(embedding_dim, 12),
            nn.ReLU(),
            nn.Linear(12, 12),
            nn.ReLU(),
            nn.Linear(12, 1),
            nn.Tanh()
        )
        
        self.criterion = nn.MSELoss()
        self.optmizer = Adam(self.parameters())

    def forward(self, query, key, value):
        
        batch_size = query.shape[1]

        attn_output, attn_output_weights = self.multihead_attn(query, key, value)
        
        output = self.linear_relu(attn_output.reshape(batch_size, self.embedding_dim))
        
        return output.squeeze()
    
    def fit(self, query, key, value, y, epochs =1):
        
        t = tqdm.trange(epochs, desc='Loss: ')
        for _ in t:
            self.zero_grad()
            output = self.forward(query, key, value)
            loss = self.criterion(output,y)
            loss.backward()
            self.optmizer.step() 
            
            t.set_description(f"Loss: {loss}", refresh=True)

In [11]:
model = NnAttention(E, 6).to(device)

In [12]:
y_train_norm = (y_train_torch - torch.mean(y_train_torch)) / (torch.std(y_train_torch))

model.fit(query, key, value, y_train_norm, epochs = 2500)

Loss: 0.47330453991889954: 100%|███████████████████████████████████████████████████| 2500/2500 [00:19<00:00, 127.42it/s]


In [13]:
N = X_test.shape[0]
E = X_test.shape[1]


query = torch.tensor(X_test.reshape(1, N, E).astype(np.float32)).to(device)
key = torch.tensor(X_test.reshape(1, N, E).astype(np.float32)).to(device)
value = torch.tensor(X_test.reshape(1, N, E).astype(np.float32)).to(device)

y_hat = model(query, key, value)
y_hat = np.sign(y_hat.cpu().detach())

In [14]:
from sklearn.metrics import classification_report

In [15]:
print(classification_report(y_hat, np.sign(y_test)))

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        -1.0       0.77      0.80      0.78       297
         0.0       0.00      0.00      0.00         0
         1.0       0.85      0.82      0.83       402

    accuracy                           0.81       699
   macro avg       0.54      0.54      0.54       699
weighted avg       0.81      0.81      0.81       699



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [21]:
print(classification_report(np.sign(X_test[:, 12]), np.sign(y_test)))

              precision    recall  f1-score   support

        -1.0       0.53      0.46      0.49       357
         0.0       0.00      0.00      0.00         0
         1.0       0.50      0.57      0.54       342

    accuracy                           0.51       699
   macro avg       0.34      0.34      0.34       699
weighted avg       0.52      0.51      0.51       699



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [128]:
# from torch.utils.data import Dataset
# from torch.utils.data import DataLoader

# class AttentionTimeSeries(Dataset):
#     def __init__(self, X, y, embedding_dim):
#         super(AttentionTimeSeries, self).__init__()
        
#         self.time_series_size = len(dataframe)
#         self.embedding_dim = X.shape[1]
        
#         self.attention_input = torch.tensor(X_test.reshape(1, time_series_size, self.embedding_dim).astype(np.float32)).to(device)

#     def __len__(self):
#         return self.time_series_size

#     def __getitem__(self, index):
#         return self.attention_input