### **Imports**

In [146]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [147]:
import torch
import torch.nn as nn

In [148]:
import importlib
import utils, models
importlib.reload(utils)
importlib.reload(models)

<module 'models' from '/Users/pawelgrzeszczyk/Documents/02_studies/master/portfolio-optimization-dl/main/modeling/models.py'>

### **Reading the dataset**

In [149]:
# # Reading data, renaming columns and reordering
# data = pd.read_csv('../data/data_files/w20_stock.csv').filter(regex='^Close_')
# data.columns = [x.split('_')[1].split('.')[0] for x in data.columns]
# data['Date'] = pd.read_csv('../data/data_files/w20_stock.csv')['Date']
# data.insert(0, 'Date', data.pop('Date'))

# # Date as index
# data = data.set_index('Date')

# # Getting the first row with no NaN values
# first_full_row = data.dropna().first_valid_index()
# print(f'First row with no NaN values is from: ({first_full_row})')

# # Removing rows with NaN values
# data = data.loc[first_full_row:]
# print(f'Number of rows with NaN values removed: {first_full_row}')
# print(f'Number of rows with no NaN values: {data.shape[0]}')
# data.head()

In [150]:
from utils import generate_data
data, seq_len_data = generate_data(end_date=datetime(2023, 1, 30),
                                   days=12,
                                   num_ascending_start=2,
                                   num_descending_start=2,
                                   swap_count=3)
# How many "next" days should be used to set the true label
Y_SEQ_LEN = 2

data

Unnamed: 0_level_0,ascending_1,ascending_2,descending_1,descending_2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-01-18,100.0,100.0,100.0,100.0
2023-01-19,100.0,100.0,100.0,100.0
2023-01-20,102.21,109.76,95.07,93.75
2023-01-21,112.39,118.96,88.47,88.9
2023-01-22,113.66,120.51,81.4,87.51
2023-01-23,122.1,127.36,76.33,79.91
2023-01-24,112.89,121.64,83.95,81.98
2023-01-25,107.48,118.67,90.51,83.6
2023-01-26,105.18,108.0,98.22,87.75
2023-01-27,101.79,100.42,98.63,96.51


### **Prepare data for the model**

**Calculating percentage changes**

In [151]:
# Calculate daily returns
data_returns = data.pct_change().dropna()

# Add new row equal to 0 - don't invest in anything
data_returns['SAVE'] = 0

print(f'Data shape: {data_returns.shape}')
data_returns.head()

Data shape: (12, 5)


Unnamed: 0_level_0,ascending_1,ascending_2,descending_1,descending_2,SAVE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-19,0.0,0.0,0.0,0.0,0
2023-01-20,0.0221,0.0976,-0.0493,-0.0625,0
2023-01-21,0.099599,0.083819,-0.069423,-0.051733,0
2023-01-22,0.0113,0.01303,-0.079914,-0.015636,0
2023-01-23,0.074257,0.056842,-0.062285,-0.086847,0


**Convert to target shape**

In [152]:
# Parameters
Y_seq_len = Y_SEQ_LEN

X_seq_len = seq_len_data
batch_size = len(data_returns) - X_seq_len - Y_seq_len + 1
input_size = len(data_returns.columns)

# Convert DataFrame to NumPy for easier slicing
data_returns_np = data_returns.values

# Create sequences
X = []
Y = []
for i in range(batch_size):
    first_y_index = i + X_seq_len
    
    X.append(data_returns_np[i:first_y_index])

    # Get the index of the highest return for the next day
    next_day_returns = data_returns_np[first_y_index:first_y_index + Y_seq_len]
    Y.append(next_day_returns)

# Convert to PyTorch tensors
X = torch.tensor(np.array(X), dtype=torch.float32) # Shape: (batch_size, X_seq_len, input_size)
Y = torch.tensor(np.array(Y), dtype=torch.float32) # Shape: (batch_size, Y_seq_len, input_size)
next_day_Y = Y[:, 0, :] # Shape: (batch_size, input_size)

In [153]:
print(f'X shape: \n\t{X.shape}')
print(f'X sample: \n\t{X[0]}')

X shape: 
	torch.Size([7, 4, 5])
X sample: 
	tensor([[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0221,  0.0976, -0.0493, -0.0625,  0.0000],
        [ 0.0996,  0.0838, -0.0694, -0.0517,  0.0000],
        [ 0.0113,  0.0130, -0.0799, -0.0156,  0.0000]])


In [154]:
print(f'Y shape: \n\t{Y.shape}')
print(f'Y sample: \n\t{Y[0]}')

Y shape: 
	torch.Size([7, 2, 5])
Y sample: 
	tensor([[ 0.0743,  0.0568, -0.0623, -0.0868,  0.0000],
        [-0.0754, -0.0449,  0.0998,  0.0259,  0.0000]])


**True labels**

| Function                  | Arguments      | Description                                                                  |
|---------------------------|----------------|------------------------------------------------------------------------------|
| `get_Y_max_one`            | `Y`            | Pick one asset with the highest returns                                       |
| `get_Y_max_light`          | `Y`            | Distribute weights among assets with positive returns depending on the returns value |
| `get_Y_sharpe_one`         | `X, Y`         | Pick one asset with the highest Sharpe ratio                                  |
| `get_Y_sharpe_light`       | `X, Y`         | Distribute weights among assets with positive returns depending on the Sharpe ratio value |

In [155]:
Y_true_labels = utils.get_Y_sharpe_light(X, Y)

print(f'Y_true_labels shape: \n\t{Y_true_labels.shape}')
print(f'Y_true_labels sample: \n\t{Y_true_labels[-1]}')

Y_true_labels shape: 
	torch.Size([7, 5])
Y_true_labels sample: 
	tensor([0.6389, 0.3611, 0.0000, 0.0000, 0.0000])


### **Model**

In [156]:
input_size = X.shape[2]
hidden_size = 256
output_size = Y_true_labels.shape[1]

# Create the model
lstm_model = models.LSTMModel(input_size=input_size,
                  hidden_size=hidden_size,
                  output_size=output_size,
                  num_layers=2)

# Loss and optimizer
criterion = nn.MSELoss()
# criterion = lstm_model.SharpeRatioLoss()
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-4)

### **Training**

In [157]:
num_epochs = 1000

for epoch in range(num_epochs):
    lstm_model.train()
    optimizer.zero_grad()

    # Forward pass
    outputs = lstm_model(X)

    # Calculate loss
    loss = criterion(outputs, Y_true_labels)

    # Backward pass and optimization
    loss.backward()
    optimizer.step()

    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [100/1000], Loss: 0.0583
Epoch [200/1000], Loss: 0.0537
Epoch [300/1000], Loss: 0.0275
Epoch [400/1000], Loss: 0.0162
Epoch [500/1000], Loss: 0.0094
Epoch [600/1000], Loss: 0.0070
Epoch [700/1000], Loss: 0.0047
Epoch [800/1000], Loss: 0.0050
Epoch [900/1000], Loss: 0.0053
Epoch [1000/1000], Loss: 0.0046


### **Evaluation**

In [158]:
with torch.no_grad():
    output_weights = lstm_model(X)

In [159]:
data

Unnamed: 0_level_0,ascending_1,ascending_2,descending_1,descending_2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-01-18,100.0,100.0,100.0,100.0
2023-01-19,100.0,100.0,100.0,100.0
2023-01-20,102.21,109.76,95.07,93.75
2023-01-21,112.39,118.96,88.47,88.9
2023-01-22,113.66,120.51,81.4,87.51
2023-01-23,122.1,127.36,76.33,79.91
2023-01-24,112.89,121.64,83.95,81.98
2023-01-25,107.48,118.67,90.51,83.6
2023-01-26,105.18,108.0,98.22,87.75
2023-01-27,101.79,100.42,98.63,96.51


In [160]:
X[-1]

tensor([[-0.0479, -0.0244,  0.0781,  0.0198,  0.0000],
        [-0.0214, -0.0899,  0.0852,  0.0496,  0.0000],
        [-0.0322, -0.0702,  0.0042,  0.0998,  0.0000],
        [ 0.0575,  0.0450, -0.0292, -0.0333,  0.0000]])

In [161]:
Y[-1]

tensor([[ 0.0109,  0.0435, -0.0489, -0.0445,  0.0000],
        [ 0.0984,  0.0195, -0.0656, -0.0062,  0.0000]])

In [162]:
Y_true_labels

tensor([[0.0000, 0.2719, 0.7281, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.6965, 0.3035, 0.0000],
        [0.0000, 0.0000, 0.5894, 0.4106, 0.0000],
        [0.0000, 0.0000, 0.3173, 0.6827, 0.0000],
        [0.2866, 0.0000, 0.0000, 0.7134, 0.0000],
        [0.4816, 0.5184, 0.0000, 0.0000, 0.0000],
        [0.6389, 0.3611, 0.0000, 0.0000, 0.0000]])

In [163]:
print(f'Output weights shape: {output_weights.shape}')
[[f"{elem * 100 :.2f}" for elem in row] for row in output_weights]

Output weights shape: torch.Size([7, 5])


[['1.24', '0.05', '76.43', '22.28', '0.00'],
 ['0.83', '0.02', '60.37', '38.78', '0.00'],
 ['0.83', '0.02', '63.20', '35.95', '0.00'],
 ['8.99', '0.28', '14.18', '76.55', '0.00'],
 ['23.77', '0.89', '5.22', '70.12', '0.01'],
 ['57.00', '40.05', '0.77', '1.96', '0.22'],
 ['63.36', '33.37', '0.92', '2.19', '0.17']]

In [164]:
output_weights

tensor([[1.2402e-02, 4.9209e-04, 7.6425e-01, 2.2284e-01, 1.2347e-05],
        [8.2905e-03, 2.2100e-04, 6.0371e-01, 3.8777e-01, 5.4534e-06],
        [8.3099e-03, 2.3446e-04, 6.3199e-01, 3.5946e-01, 5.8000e-06],
        [8.9936e-02, 2.7723e-03, 1.4179e-01, 7.6547e-01, 2.8171e-05],
        [2.3772e-01, 8.8701e-03, 5.2156e-02, 7.0120e-01, 5.8132e-05],
        [5.6997e-01, 4.0051e-01, 7.7320e-03, 1.9614e-02, 2.1745e-03],
        [6.3357e-01, 3.3368e-01, 9.1514e-03, 2.1864e-02, 1.7412e-03]])

### **Profit calculation**

In [165]:
next_day_Y

tensor([[ 0.0743,  0.0568, -0.0623, -0.0868,  0.0000],
        [-0.0754, -0.0449,  0.0998,  0.0259,  0.0000],
        [-0.0479, -0.0244,  0.0781,  0.0198,  0.0000],
        [-0.0214, -0.0899,  0.0852,  0.0496,  0.0000],
        [-0.0322, -0.0702,  0.0042,  0.0998,  0.0000],
        [ 0.0575,  0.0450, -0.0292, -0.0333,  0.0000],
        [ 0.0109,  0.0435, -0.0489, -0.0445,  0.0000]])

In [166]:
max_daily_returns = torch.max(next_day_Y, dim=1).values + 1
max_total_returns = torch.prod(max_daily_returns) - 1

print(f'Total max returns: {max_total_returns}')
print(f'Daily max returns: {max_daily_returns}')

Total max returns: 0.6775656938552856
Daily max returns: tensor([1.0743, 1.0998, 1.0781, 1.0852, 1.0998, 1.0575, 1.0435])


In [167]:
portfolio_daily_returns = (next_day_Y * output_weights).sum(dim=1) + 1
portfolio_total_returns = torch.prod(portfolio_daily_returns) - 1

print(f'Total portfolio returns: {portfolio_total_returns}')
print(f'Daily portfolio returns: {portfolio_daily_returns}')

Total portfolio returns: 0.2573341131210327
Daily portfolio returns: tensor([0.9340, 1.0697, 1.0561, 1.0479, 1.0619, 1.0499, 1.0200])


In [168]:
lost_profit = 1 - (total_portfolio_returns / max_total_returns)
lost_profit

tensor(0.3043)