Import all the classes we need.

In [1]:
from PriceDataLoader import PriceDataLoader
from WeatherDataLoader import WeatherDataLoader
from models.CNN import *
from models.FFN import *
import torch
from models.CointegrationResidualGenerator import CointegrationResidualGenerator
from PortfolioOptimizer import PortfolioOptimizer
from Trainer import Trainer
from PortfolioOptimizer import PortfolioOptimizer
from DataPreparation import DataPreparation


Set the device

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else
                        "mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

Using device: mps


Load the data

In [None]:
price_parser = PriceDataLoader(file_path="../data/european_wholesale_electricity_price_data_daily.csv")
weather_parser = WeatherDataLoader(file_path="../data/aggregated_weather.csv")

# Get countries with complete data (some countries do not have data for the entire time period)
time_range = "2015-01-01,2024-12-31"
countries_list = list(set(price_parser.get_countries_with_complete_data(time_range)) & 
                     set(weather_parser.get_country_list()))
print(f"Number of countries with complete data: {len(countries_list)}")
print(f"Countries: {countries_list}")

# Generate price matrix
price_matrix = price_parser.get_price_matrix(
    time_range=time_range, 
    countries=countries_list, 
    fill_method="ffill"
)
print(f"Shape of price_matrix: {price_matrix.shape}")
#print(f"Missing values in price_matrix:\n{price_matrix.isna().sum()}")

# Compute returns
returns = price_matrix.pct_change().dropna()
print(f"Shape of returns: {returns.shape}")

# Define weather features
weather_features = [
    'temperature_2m_mean', 'temperature_2m_min', 'temperature_2m_max',
    'precipitation_mean', 'precipitation_min', 'precipitation_max',
    'wind_speed_mean', 'wind_speed_min', 'wind_speed_max'
]
print(f"Number of total features (price + weather): {len(weather_features) * len(countries_list) + 1}")

# Generate weather matrix
weather_matrix = weather_parser.get_weather_matrix(
    time_range=time_range,
    countries=countries_list,
    fill_method="ffill",
    features=weather_features
)
print(f"Shape of weather_matrix: {weather_matrix.shape}")

Data loaded successfully from ../data/european_wholesale_electricity_price_data_daily.csv
Weather data loaded successfully from ../data/aggregated_weather.csv
Number of countries with complete data: 24
Countries: ['Slovenia', 'Slovakia', 'Italy', 'Denmark', 'Latvia', 'Czechia', 'Norway', 'Romania', 'Lithuania', 'Spain', 'Sweden', 'Greece', 'Portugal', 'Hungary', 'Netherlands', 'Germany', 'Austria', 'Belgium', 'Finland', 'Poland', 'France', 'Luxembourg', 'Estonia', 'Switzerland']
Shape of price_matrix: (3653, 24)
Missing values in price_matrix:
Country
Austria        0
Belgium        0
Czechia        0
Denmark        0
Estonia        0
Finland        0
France         0
Germany        0
Greece         0
Hungary        0
Italy          0
Latvia         0
Lithuania      0
Luxembourg     0
Netherlands    0
Norway         0
Poland         0
Portugal       0
Romania        0
Slovakia       0
Slovenia       0
Spain          0
Sweden         0
Switzerland    0
dtype: int64
Shape of returns: (36

Generate cointegration residuals

In [4]:
residual_generator = CointegrationResidualGenerator(price_matrix)
residual_generator.compute_all_asset_residuals()
asset_residuals = residual_generator.get_asset_residuals() # [num_days, num_assets (= countries)]
print(asset_residuals.shape)

(3537, 24)


  result = func(self.values, **kwargs)


Prepare the data as input to our model

In [5]:
data_prep = DataPreparation(
    price_residuals=asset_residuals,
    weather_data=weather_matrix,
    countries=countries_list,
    weather_features=weather_features
)
window_size = 30
stride = 1
combined_data = data_prep.prepare_rolling_windows(window_size=window_size, stride=stride)
# [num_samples, num_features, window_size]

next_day_returns = data_prep.prepare_next_day_returns(returns=returns, window_size=window_size, stride=stride)
# [num_samples, num_countries]

# Split the data in time-series
(train_data, train_returns), (val_data, val_returns), (test_data, test_returns) = \
    data_prep.create_train_val_test_split(combined_data=combined_data, next_day_returns=next_day_returns)

# Convert to tensors
train_data_tensor = torch.FloatTensor(train_data).to(device)
train_returns_tensor = torch.FloatTensor(train_returns).to(device)
val_data_tensor = torch.FloatTensor(val_data).to(device)
val_returns_tensor = torch.FloatTensor(val_returns).to(device)
test_data_tensor = torch.FloatTensor(test_data).to(device)
test_returns_tensor = torch.FloatTensor(test_returns).to(device)

print(f"Train data shape: {train_data_tensor.shape}") # [samples, num_features, window size]
print(f"Train returns shape: {train_returns_tensor.shape}") # [samples, num_countries (returns)]

Train data shape: torch.Size([2455, 240, 30])
Train returns shape: torch.Size([2455, 24])


We initialize the portfolio optimizer and set up our trainer, which allows grid search for finding the best hyperparameters.

In [6]:
portfolio_optimizer = PortfolioOptimizer(
    window_size=window_size,
    num_countries=len(countries_list),
    num_weather_features=len(weather_features),
    num_filters=8,  # Default value
    filter_size=3,
    hidden_dim=64,
    num_heads=4,
    use_transformer=True,
    device=device
)

trainer = Trainer(
    optimizer=portfolio_optimizer,
    train_data=train_data_tensor,
    train_returns=train_returns_tensor,
    val_data=val_data_tensor,
    val_returns=val_returns_tensor,
    lr=0.001,
    num_epochs=100,
    batch_size=32,
    patience=50,
    device=device,
)

# Can add more parameters to test
param_grid = {
    'num_filters': [8, 16, 32],
    'filter_size': [3, 5, 7],
    'hidden_dim': [64, 128]
}
best_params, best_score, best_returns = trainer.grid_search(param_grid, verbose=False)
print(f"Best parameters: {best_params} with Sharpe Ratio: {best_score:.4f}")

Testing: num_filters=8, filter_size=3, hidden_dim=64
Testing: num_filters=8, filter_size=3, hidden_dim=128
Testing: num_filters=8, filter_size=5, hidden_dim=64
Testing: num_filters=8, filter_size=5, hidden_dim=128
Testing: num_filters=8, filter_size=7, hidden_dim=64
Testing: num_filters=8, filter_size=7, hidden_dim=128
Testing: num_filters=16, filter_size=3, hidden_dim=64
Testing: num_filters=16, filter_size=3, hidden_dim=128
Testing: num_filters=16, filter_size=5, hidden_dim=64
Testing: num_filters=16, filter_size=5, hidden_dim=128
Testing: num_filters=16, filter_size=7, hidden_dim=64
Testing: num_filters=16, filter_size=7, hidden_dim=128
Testing: num_filters=32, filter_size=3, hidden_dim=64
Testing: num_filters=32, filter_size=3, hidden_dim=128
Testing: num_filters=32, filter_size=5, hidden_dim=64
Testing: num_filters=32, filter_size=5, hidden_dim=128
Testing: num_filters=32, filter_size=7, hidden_dim=64
Testing: num_filters=32, filter_size=7, hidden_dim=128
Best parameters: {'num_fi

Reinitialize with best parameters and train on train+val

In [7]:
portfolio_optimizer = PortfolioOptimizer(
    window_size=window_size,
    num_countries=len(countries_list),
    num_weather_features=len(weather_features),
    num_filters=best_params['num_filters'],
    filter_size=best_params['filter_size'],
    hidden_dim=best_params['hidden_dim'],
    num_heads=4,
    use_transformer=True,
    device=device
)
trainer = Trainer(
    optimizer=portfolio_optimizer,
    train_data=torch.cat([train_data_tensor, val_data_tensor]),  # Combine train and val
    train_returns=torch.cat([train_returns_tensor, val_returns_tensor]),
    val_data=val_data_tensor,  # Still use val for early stopping
    val_returns=val_returns_tensor,
    lr=0.001,
    num_epochs=100,
    batch_size=32,
    patience=50,
    device=device
)
final_sharpe, _ = trainer.train(verbose=True)

Epoch 0, Validation Sharpe: 0.2172
Epoch 10, Validation Sharpe: 0.2221
Epoch 20, Validation Sharpe: 0.2251
Epoch 30, Validation Sharpe: 0.2248
Epoch 40, Validation Sharpe: 0.2253
Epoch 50, Validation Sharpe: 0.2265
Epoch 60, Validation Sharpe: 0.2256
Epoch 70, Validation Sharpe: 0.2260
Early stopping at epoch 72


Evaluate on the test set

In [8]:
test_sharpe, test_returns = trainer.test(test_data_tensor, test_returns_tensor)
print(f"Final Sharpe Ratio on Test Set: {test_sharpe:.4f}")

Final Sharpe Ratio on Test Set: 0.0595
