### This notebook will contain the loading of the datasets, and tie together the different parts of the project

In [None]:
from DataLoader import DataLoader
from models.CNN import *
from models.FFN import *
import torch
import torch.nn as nn
from models.CointegrationResidualGenerator import CointegrationResidualGenerator
from models.BacktestSharpeEvaluator import BacktestSharpeEvaluator
from PortfolioOptimizer import PortfolioOptimizer



## Sharpe ratio

In [16]:
def sharpe_ratio_loss(returns, risk_free_rate=0.0):
    """
    Custom loss function to maximize the Sharpe Ratio.
    Args:
        returns (torch.Tensor): Predicted returns
        risk_free_rate (float): Risk-free rate for Sharpe calculation
    Returns:
        torch.Tensor: Negative Sharpe Ratio (to minimize)
    """
    excess_returns = returns - risk_free_rate
    mean_excess = torch.mean(excess_returns)
    std_excess = torch.std(excess_returns, unbiased=False) + 1e-6  # epsilon for stability
    sharpe_ratio = mean_excess / std_excess
    return -sharpe_ratio

In [17]:

# MAIN FUNCTION
### -------------- PARSING DATA -------------- ###
parser = DataLoader(file_path="../data/european_wholesale_electricity_price_data_daily.csv")

# Get list of countries
print("\n--- List of Countries ---")
all_countries_list = parser.get_country_list()
print(all_countries_list)

# Get daily price matrix for all countries for the entire year 2021
price_matrix = parser.get_price_matrix(
    time_range="2021-01-01,2021-12-31",
    countries=all_countries_list,
    fill_method="ffill"
)

# Get the raw daily returns for the price matrix
returns = price_matrix.pct_change().dropna()



### -------------- COINTEGRATION RESIDUALS -------------- ###
# Create an instance of the CointegrationResidualGenerator
residual_generator = CointegrationResidualGenerator(price_matrix)

residual_generator.compute_all_asset_residuals()

# Get residuals
asset_residuals = residual_generator.get_asset_residuals()

# Get the input for CNN
# cnn_input contains a set of 329 data samples, each sample represents 30-day cumulative residuals for the 31 countries
cumulative_residual_window = 30
cnn_input = residual_generator.prepare_cnn_input_from_residuals(window=cumulative_residual_window)

# Get the start index of the first 30-day cumulative residuals in the returns DataFrame
start_idx_in_returns = returns.index.get_loc(asset_residuals.index[0])
num_samples = len(asset_residuals) - cumulative_residual_window + 1
next_day_indices = [start_idx_in_returns + i + cumulative_residual_window for i in range(num_samples)]

# Get the next-day returns for the corresponding indices
# The next-day returns are the returns for the day after the last day of each 30-day window
# For example, if the first 30-day window ends on index 0, the next day return is at index 1
# If the second 30-day window ends on index 1, the next day return is at index 2, and so on.
next_day_returns = returns.iloc[next_day_indices]




### -------------- FEED THE 30-DAY CUMULATIVE RESIDUALS OF EVERY COUNTRY TO CNN+FFN -------------- ###
# Transform cnn_input to be compatible with the CNN input shape
cnn_input_array = cnn_input.transpose(0, 2, 1) # [samples, features, window]

# FILIP'S SECTION: CNN+FFN
# Hey Filip, this is the section where you can add your code to train the CNN+FFN model.
# cnn_input_array essentially contains 329 training data points, each data point is 30-day cumulative residuals for the 31 countries.
# So, for one "data point" you would feed the set of 30-day cumulative residuals for every country to the CNN+FFN model.
# One data point should result in a set of weights for each of the 31 countries.
# Each set of weights is used to calculate one next-day portfolio return.
# Repeat this for all 329 data points to get a set of 329 portfolio returns.
# You can then use these portfolio returns to calculate the Sharpe ratio
# Optimize the CNN+FFN to maximize the Sharpe ratio.

# Set the device to GPU if available, otherwise MPS (for Mac silicon) or CPU
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

torch.manual_seed(1)  # For reproducibility

# Create the PortfolioOptimizer instance and train the model
# NOTE: We need to have some validation data to evaluate the model performance
# otherwise we will be overfitting the model to the training data
optimizer = PortfolioOptimizer(cnn_input_array, next_day_returns, batch_size=1000, num_epochs=1000)
portfolio_returns = optimizer.train()

### -------------- GET THE WEIGHTS OUTPUTTED FROM CNN+FFN -------------- ###
# Initializing the Sharpe ratio evaluator
#evaluator = BacktestSharpeEvaluator()

# Get the weight outputted from the CNN+FFN model
# One weight for each country
#weights = np.array([]) # CHANGE THIS TO THE ACTUAL WEIGHTS OUTPUTTED FROM THE CNN+FFN MODEL

# Normalize the weights using L1 normalization
# This is done to ensure that the portfolio is dollar-neutral
#normalized_weights = evaluator.normalize_weights_l1(weights)

# Multiply the normalized weights (vector) with the next-day returns (vector) to get the portfolio return
#next_day_portfolio_return = evaluator.compute_portfolio_return(normalized_weights, next_day_returns.iloc[0].values)

# Store the next day portfolio return 
#evaluator.add_return(next_day_portfolio_return)

# Repeat the above step for all 329 data points, adding the portfolio returns to the evaluator for each data point

# Once all portfolio returns are calculated, you can calculate the Sharpe ratio

# Train the model to optimize the Sharpe Ratio


Data loaded successfully from ../data/european_wholesale_electricity_price_data_daily.csv

--- List of Countries ---
['Austria', 'Belgium', 'Czechia', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'United Kingdom', 'Bulgaria', 'Serbia', 'Croatia', 'Montenegro', 'North Macedonia', 'Ireland']


  result = func(self.values, **kwargs)


Epoch 0, Sharpe Ratio: 0.0005
Epoch 10, Sharpe Ratio: 0.0348
Epoch 20, Sharpe Ratio: 0.0520
Epoch 30, Sharpe Ratio: 0.0615
Epoch 40, Sharpe Ratio: 0.0727
Epoch 50, Sharpe Ratio: 0.0938
Epoch 60, Sharpe Ratio: 0.1118
Epoch 70, Sharpe Ratio: 0.1329
Epoch 80, Sharpe Ratio: 0.1586
Epoch 90, Sharpe Ratio: 0.1756
Epoch 100, Sharpe Ratio: 0.1660
Epoch 110, Sharpe Ratio: 0.1843
Epoch 120, Sharpe Ratio: 0.2078
Epoch 130, Sharpe Ratio: 0.2034
Epoch 140, Sharpe Ratio: 0.1963
Epoch 150, Sharpe Ratio: 0.1954
Epoch 160, Sharpe Ratio: 0.2044
Epoch 170, Sharpe Ratio: 0.2069
Epoch 180, Sharpe Ratio: 0.2121
Epoch 190, Sharpe Ratio: 0.2252
Epoch 200, Sharpe Ratio: 0.2287
Epoch 210, Sharpe Ratio: 0.2315
Epoch 220, Sharpe Ratio: 0.2265
Epoch 230, Sharpe Ratio: 0.2255
Epoch 240, Sharpe Ratio: 0.2279
Epoch 250, Sharpe Ratio: 0.2394
Epoch 260, Sharpe Ratio: 0.2399
Epoch 270, Sharpe Ratio: 0.2407
Epoch 280, Sharpe Ratio: 0.2453
Epoch 290, Sharpe Ratio: 0.2475
Epoch 300, Sharpe Ratio: 0.2509
Epoch 310, Sharpe R