### **Imports**

In [788]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [789]:
import torch
import torch.nn as nn

In [790]:
import importlib
import utils, model
importlib.reload(utils)
importlib.reload(model)

<module 'model' from '/Users/pawelgrzeszczyk/Documents/02_studies/master/portfolio-optimization-dl/main/modeling/model.py'>

### **Reading the dataset**

In [791]:
# # Reading data, renaming columns and reordering
# data = pd.read_csv('../data/data_files/w20_stock.csv').filter(regex='^Close_')
# data.columns = [x.split('_')[1].split('.')[0] for x in data.columns]
# data['Date'] = pd.read_csv('../data/data_files/w20_stock.csv')['Date']
# data.insert(0, 'Date', data.pop('Date'))

# # Date as index
# data = data.set_index('Date')

# # Getting the first row with no NaN values
# first_full_row = data.dropna().first_valid_index()
# print(f'First row with no NaN values is from: ({first_full_row})')

# # Removing rows with NaN values
# data = data.loc[first_full_row:]
# print(f'Number of rows with NaN values removed: {first_full_row}')
# print(f'Number of rows with no NaN values: {data.shape[0]}')
# data.head()

In [792]:
from utils import generate_data
data, seq_len_data = generate_data(end_date=datetime(2023, 1, 30),
                                   days=12,
                                   num_ascending_start=2,
                                   num_descending_start=2,
                                   swap_count=3)
# How many "next" days should be used to set the true label
Y_SEQ_LEN = 1

data

Unnamed: 0_level_0,ascending_1,ascending_2,descending_1,descending_2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-01-18,100.0,100.0,100.0,100.0
2023-01-19,100.0,100.0,100.0,100.0
2023-01-20,101.27,107.67,94.7,95.14
2023-01-21,107.78,114.3,94.11,90.6
2023-01-22,110.88,122.87,93.1,82.36
2023-01-23,115.83,124.81,88.85,80.29
2023-01-24,115.76,115.14,94.81,84.16
2023-01-25,106.03,104.35,99.62,86.57
2023-01-26,100.53,102.34,101.02,95.16
2023-01-27,93.7,94.37,104.82,95.63


### **Prepare data for the model**

**Calculating percentage changes**

In [793]:
# Calculate daily returns
data_returns = data.pct_change().dropna()

# Add new row equal to 0 - don't invest in anything
data_returns['SAVE'] = 0

print(f'Data shape: {data_returns.shape}')
data_returns.head()

Data shape: (12, 5)


Unnamed: 0_level_0,ascending_1,ascending_2,descending_1,descending_2,SAVE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-19,0.0,0.0,0.0,0.0,0
2023-01-20,0.0127,0.0767,-0.053,-0.0486,0
2023-01-21,0.064284,0.061577,-0.00623,-0.047719,0
2023-01-22,0.028762,0.074978,-0.010732,-0.090949,0
2023-01-23,0.044643,0.015789,-0.04565,-0.025134,0


**Convert to target shape**

In [794]:
# Parameters
Y_seq_len = Y_SEQ_LEN

X_seq_len = seq_len_data
batch_size = len(data_returns) - X_seq_len - Y_seq_len + 1
input_size = len(data_returns.columns)

# Convert DataFrame to NumPy for easier slicing
data_returns_np = data_returns.values

# Create sequences
X = []
Y = []
for i in range(batch_size):
    first_y_index = i + X_seq_len
    
    X.append(data_returns_np[i:first_y_index])

    # Get the index of the highest return for the next day
    next_day_returns = data_returns_np[first_y_index:first_y_index + Y_seq_len]
    Y.append(next_day_returns)

# Convert to PyTorch tensors
X = torch.tensor(np.array(X), dtype=torch.float32)  # Shape: (batch_size, X_seq_len, input_size)
Y = torch.tensor(np.array(Y), dtype=torch.float32)  # Shape: (batch_size, input_size)

In [795]:
print(f'X shape: \n\t{X.shape}')
print(f'X sample: \n\t{X[0]}')

X shape: 
	torch.Size([8, 4, 5])
X sample: 
	tensor([[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0127,  0.0767, -0.0530, -0.0486,  0.0000],
        [ 0.0643,  0.0616, -0.0062, -0.0477,  0.0000],
        [ 0.0288,  0.0750, -0.0107, -0.0909,  0.0000]])


In [796]:
print(f'Y shape: \n\t{Y.shape}')
print(f'Y sample: \n\t{Y[0]}')

Y shape: 
	torch.Size([8, 1, 5])
Y sample: 
	tensor([[ 0.0446,  0.0158, -0.0456, -0.0251,  0.0000]])


**True labels**

| Function                  | Arguments      | Description                                                                  |
|---------------------------|----------------|------------------------------------------------------------------------------|
| `get_Y_max_one`            | `Y`            | Pick one asset with the highest returns                                       |
| `get_Y_max_light`          | `Y`            | Distribute weights among assets with positive returns depending on the returns value |
| `get_Y_sharpe_one`         | `X, Y`         | Pick one asset with the highest Sharpe ratio                                  |
| `get_Y_sharpe_light`       | `X, Y`         | Distribute weights among assets with positive returns depending on the Sharpe ratio value |

In [797]:
Y_true_labels = utils.get_Y_sharpe_light(X, Y)

print(f'Y_true_labels shape: \n\t{Y_true_labels.shape}')
print(f'Y_true_labels sample: \n\t{Y_true_labels[-1]}')

Y_true_labels shape: 
	torch.Size([8, 5])
Y_true_labels sample: 
	tensor([0.0324, 0.9676, 0.0000, 0.0000, 0.0000])


### **Model**

In [798]:
input_size = X.shape[2]
hidden_size = 256
output_size = Y_true_labels.shape[1]

# Create the model
lstm_model = model.LSTMModel(input_size=input_size,
                  hidden_size=hidden_size,
                  output_size=output_size,
                  num_layers=2)

# Loss and optimizer
criterion = nn.MSELoss()
# criterion = lstm_model.SharpeRatioLoss()
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-4)

### **Training**

In [799]:
num_epochs = 1000

for epoch in range(num_epochs):
    lstm_model.train()
    optimizer.zero_grad()

    # Forward pass
    outputs = lstm_model(X)

    # Calculate loss
    loss = criterion(outputs, Y_true_labels)

    # Backward pass and optimization
    loss.backward()
    optimizer.step()

    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [100/1000], Loss: 0.0801
Epoch [200/1000], Loss: 0.0788
Epoch [300/1000], Loss: 0.0626
Epoch [400/1000], Loss: 0.0373
Epoch [500/1000], Loss: 0.0269
Epoch [600/1000], Loss: 0.0266
Epoch [700/1000], Loss: 0.0126
Epoch [800/1000], Loss: 0.0106
Epoch [900/1000], Loss: 0.0127
Epoch [1000/1000], Loss: 0.0026


In [800]:
with torch.no_grad():
    output_weights = lstm_model(X)

In [801]:
data

Unnamed: 0_level_0,ascending_1,ascending_2,descending_1,descending_2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-01-18,100.0,100.0,100.0,100.0
2023-01-19,100.0,100.0,100.0,100.0
2023-01-20,101.27,107.67,94.7,95.14
2023-01-21,107.78,114.3,94.11,90.6
2023-01-22,110.88,122.87,93.1,82.36
2023-01-23,115.83,124.81,88.85,80.29
2023-01-24,115.76,115.14,94.81,84.16
2023-01-25,106.03,104.35,99.62,86.57
2023-01-26,100.53,102.34,101.02,95.16
2023-01-27,93.7,94.37,104.82,95.63


In [802]:
X[-1]

tensor([[-0.0519, -0.0193,  0.0141,  0.0992,  0.0000],
        [-0.0679, -0.0779,  0.0376,  0.0049,  0.0000],
        [ 0.0587,  0.0527, -0.0638, -0.0680,  0.0000],
        [ 0.0264,  0.0340, -0.0399, -0.0698,  0.0000]])

In [803]:
Y[-1]

tensor([[ 0.0024,  0.0854, -0.0968, -0.0215,  0.0000]])

In [804]:
Y_true_labels

tensor([[0.7979, 0.2021, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.5992, 0.4008, 0.0000],
        [0.0000, 0.0000, 0.6825, 0.3175, 0.0000],
        [0.0000, 0.0000, 0.1842, 0.8158, 0.0000],
        [0.0000, 0.0000, 0.8905, 0.1095, 0.0000],
        [0.5370, 0.4630, 0.0000, 0.0000, 0.0000],
        [0.4467, 0.5533, 0.0000, 0.0000, 0.0000],
        [0.0324, 0.9676, 0.0000, 0.0000, 0.0000]])

In [805]:
print(f'Output weights shape: {output_weights.shape}')
[[f"{elem * 100 :.2f}" for elem in row] for row in output_weights]

Output weights shape: torch.Size([8, 5])


[['78.03', '12.02', '9.91', '0.04', '0.00'],
 ['0.66', '0.01', '40.49', '58.83', '0.01'],
 ['0.38', '0.00', '66.57', '33.03', '0.02'],
 ['0.31', '0.00', '25.54', '74.14', '0.01'],
 ['6.59', '0.11', '87.72', '5.58', '0.00'],
 ['57.82', '41.82', '0.36', '0.00', '0.00'],
 ['35.06', '64.88', '0.05', '0.00', '0.00'],
 ['27.13', '72.84', '0.02', '0.00', '0.00']]