### **Imports**

In [251]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [252]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [253]:
import importlib
import utils, model
importlib.reload(utils)
importlib.reload(model)

<module 'model' from '/Users/pawelgrzeszczyk/Documents/02_studies/master/portfolio-optimization-dl/main/modeling/model.py'>

### **Reading the dataset**

In [254]:
# # Reading data, renaming columns and reordering
# data = pd.read_csv('../data/data_files/w20_stock.csv').filter(regex='^Close_')
# data.columns = [x.split('_')[1].split('.')[0] for x in data.columns]
# data['Date'] = pd.read_csv('../data/data_files/w20_stock.csv')['Date']
# data.insert(0, 'Date', data.pop('Date'))

# # Date as index
# data = data.set_index('Date')

# # Getting the first row with no NaN values
# first_full_row = data.dropna().first_valid_index()
# print(f'First row with no NaN values is from: ({first_full_row})')

# # Removing rows with NaN values
# data = data.loc[first_full_row:]
# print(f'Number of rows with NaN values removed: {first_full_row}')
# print(f'Number of rows with no NaN values: {data.shape[0]}')
# data.head()

In [255]:
from utils import generate_data
data, seq_len_data = generate_data(end_date=datetime(2023, 1, 30),
                                   days=10,
                                   num_ascending_start=1,
                                   num_descending_start=1,
                                   swap_count=2)
data

Unnamed: 0_level_0,ascending_1,ascending_2,descending_1,descending_2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-10-17,100.00,100.00,100.00,100.00
2022-10-18,100.00,100.00,100.00,100.00
2022-10-19,109.28,102.43,93.92,92.83
2022-10-20,112.34,108.82,93.25,86.44
2022-10-21,116.19,113.09,93.09,80.63
...,...,...,...,...
2023-01-26,109.98,178.11,52.80,84.51
2023-01-27,114.06,186.02,51.10,76.66
2023-01-28,121.66,192.91,50.70,73.63
2023-01-29,123.29,208.53,45.70,67.14


### **Prepare data for the model**

**Calculating percentage changes**

In [256]:
# Calculate daily returns
data_returns = data.pct_change().dropna()

# Add new row equal to 0 - don't invest in anything
data_returns['SAVE'] = 0

print(f'Data shape: {data_returns.shape}')
data_returns.head()

Data shape: (105, 5)


Unnamed: 0_level_0,ascending_1,ascending_2,descending_1,descending_2,SAVE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-10-18,0.0,0.0,0.0,0.0,0
2022-10-19,0.0928,0.0243,-0.0608,-0.0717,0
2022-10-20,0.028001,0.062384,-0.007134,-0.068836,0
2022-10-21,0.034271,0.039239,-0.001716,-0.067214,0
2022-10-22,0.025045,0.083473,-0.032334,-0.068337,0


**Convert to target shape**

In [257]:
# Parameters
seq_len = seq_len_data
batch_size = len(data_returns) - seq_len
input_size = len(data_returns.columns)

# Convert DataFrame to NumPy for easier slicing
data_returns_np = data_returns.values

# Create sequences
X = []
Y = []
for i in range(batch_size):
    X.append(data_returns_np[i:i+seq_len])

    # Get the index of the highest return for the next day
    next_day_returns = data_returns_np[i+seq_len]
    Y.append(next_day_returns)

# Convert lists to NumPy arrays
X = np.array(X)  # Shape: (batch_size, seq_len, input_size)
Y = np.array(Y)  # Shape: (batch_size, input_size)

# Convert to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)  # Shape: (batch_size, seq_len, input_size)
Y = torch.tensor(Y, dtype=torch.float32)  # Shape: (batch_size, input_size)

In [258]:
print(f'X shape: \n\t{X.shape}')
print(f'X sample: \n\t{X[1]}')

X shape: 
	torch.Size([98, 7, 5])
X sample: 
	tensor([[ 0.0928,  0.0243, -0.0608, -0.0717,  0.0000],
        [ 0.0280,  0.0624, -0.0071, -0.0688,  0.0000],
        [ 0.0343,  0.0392, -0.0017, -0.0672,  0.0000],
        [ 0.0250,  0.0835, -0.0323, -0.0683,  0.0000],
        [ 0.0483,  0.0945, -0.0821, -0.0543,  0.0000],
        [ 0.0931,  0.0455, -0.0448, -0.0698,  0.0000],
        [ 0.0792,  0.0968, -0.0048, -0.0371,  0.0000]])


In [259]:
print(f'Y shape: \n\t{Y.shape}')
print(f'Y sample: \n\t{Y[1]}')

Y shape: 
	torch.Size([98, 5])
Y sample: 
	tensor([-0.0636, -0.0200,  0.0059,  0.0930,  0.0000])


### **Loss**

In [260]:
Y_max_id = Y.argmax(axis=1)
Y_max = torch.zeros_like(Y)
Y_max[torch.arange(Y_max_id.shape[0]), Y_max_id] = 1

print(f'Y_max shape: \n\t{Y_max.shape}')
print(f'Y_max sample: \n\t{Y_max[1]}')

Y_max shape: 
	torch.Size([98, 5])
Y_max sample: 
	tensor([0., 0., 0., 1., 0.])


### **Model**

In [261]:
class LSTMModel(nn.Module): 
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout_rate=0.5): 
        super(LSTMModel, self).__init__() 
        self.input_size = input_size #input size 
        self.hidden_size = hidden_size #hidden state 
        self.output_size = output_size #number of classes 
        self.num_layers = num_layers #number of layers 

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout_rate) #lstm 
        self.fc =  nn.Linear(hidden_size, hidden_size) #fully connected 1 
        self.fc_out = nn.Linear(hidden_size, output_size) #fully connected 2 
        self.relu = nn.ReLU() 
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x): 
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state 
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state 

        # LSTM layer 
        lstm_out, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state 
        x = lstm_out[:, -1, :]  # (batch_size, hidden_size)
        x = self.relu(x) 
        x = self.fc(x) 
        x = self.relu(x) 
        x = self.fc_out(x) 

        # Pass the output through the softmax function (to get sum equal to 1) 
        out = self.softmax(x)
        
        return out

### **Training**

In [250]:
input_size = X.shape[2]
hidden_size = 256
output_size = Y.shape[1]

# Create the model
lstm_model = LSTMModel(input_size=input_size,
                  hidden_size=hidden_size,
                  output_size=output_size,
                  num_layers=2)

# Loss and optimizer
criterion = nn.MSELoss()
# criterion = lstm_model.SharpeRatioLoss()
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-4)

# Training
num_epochs = 1000

for epoch in range(num_epochs):
    lstm_model.train()
    optimizer.zero_grad()

    # Forward pass
    outputs = lstm_model(X)

    # Calculate loss
    loss = criterion(outputs, Y_max)

    # Backward pass and optimization
    loss.backward()
    optimizer.step()

    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [100/1000], Loss: 0.1492
Epoch [200/1000], Loss: 0.1471
Epoch [300/1000], Loss: 0.1031
Epoch [400/1000], Loss: 0.0954
Epoch [500/1000], Loss: 0.0886
Epoch [600/1000], Loss: 0.0849
Epoch [700/1000], Loss: 0.0803
Epoch [800/1000], Loss: 0.0802
Epoch [900/1000], Loss: 0.0798
Epoch [1000/1000], Loss: 0.0756


In [233]:
with torch.no_grad():
    output_weights = model(X)

In [234]:
data

Unnamed: 0_level_0,ascending_1,ascending_2,descending_1,descending_2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-10-17,100.00,100.00,100.00,100.00
2022-10-18,100.00,100.00,100.00,100.00
2022-10-19,106.17,103.90,94.65,91.34
2022-10-20,109.76,106.87,93.96,90.48
2022-10-21,119.36,114.81,92.32,87.24
...,...,...,...,...
2023-01-26,74.63,93.70,91.25,58.94
2023-01-27,75.53,100.25,91.23,56.71
2023-01-28,78.91,106.66,91.11,51.77
2023-01-29,82.33,116.78,82.20,51.51


In [235]:
print(f'Output weights shape: {output_weights.shape}')
[[f"{elem * 100 :.2f}" for elem in row] for row in output_weights]

Output weights shape: torch.Size([98, 5])


[['13.76', '36.04', '7.88', '42.28', '0.04'],
 ['0.11', '0.21', '37.61', '62.06', '0.01'],
 ['0.61', '1.19', '53.15', '45.03', '0.02'],
 ['0.10', '0.22', '34.61', '65.07', '0.01'],
 ['0.12', '0.24', '34.96', '64.67', '0.01'],
 ['0.10', '0.19', '41.95', '57.76', '0.01'],
 ['0.11', '0.22', '42.48', '57.18', '0.01'],
 ['5.82', '16.57', '10.31', '67.27', '0.03'],
 ['39.74', '59.79', '0.01', '0.46', '0.00'],
 ['37.10', '62.19', '0.01', '0.69', '0.00'],
 ['33.19', '65.65', '0.02', '1.14', '0.00'],
 ['35.97', '63.30', '0.01', '0.71', '0.00'],
 ['37.62', '61.70', '0.01', '0.66', '0.00'],
 ['45.66', '54.06', '0.00', '0.28', '0.00'],
 ['32.49', '66.56', '0.02', '0.92', '0.00'],
 ['0.15', '0.29', '48.45', '51.10', '0.01'],
 ['0.02', '0.03', '88.52', '11.42', '0.00'],
 ['0.00', '0.00', '0.21', '99.78', '0.00'],
 ['0.00', '0.00', '0.45', '99.54', '0.00'],
 ['0.00', '0.01', '2.46', '97.52', '0.00'],
 ['0.37', '0.80', '53.74', '45.09', '0.01'],
 ['0.55', '1.41', '36.23', '61.79', '0.02'],
 ['26.65', 