In [1]:
import lstm_functions as lf
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import numpy as np
import data_prep as dp

import torch
import model as m

In [None]:
da_price_data = pd.read_csv("../models_santi/data_santi/data_da_23.csv")

# Cross-validation parameters
n_splits = 5
gammas = [0.1, 0.5, 1.0]
model_params = {
    "input_size": da_price_data.shape[1],
    "hidden_size": 16,
    "num_layers": 3,
    "dropout": 0.2,
    "past_horizon": 10,
    "forecast_horizon": 5,
    "batch_size": 16
}

class MockLSTMForecaster:
    def __init__(self, *args, **kwargs):
        pass

    def forecast(self, length):
        # Generate random forecasts
        return np.random.uniform(50, 150, length)


def cross_validation_test():
    result = lf.cross_validation(
        model_type="da",
        n_splits=n_splits,
        price_data=da_price_data,
        gammas=gammas,
        model_params=model_params
    )
    print(result)

# Replace LSTM class with the mock for testing
cross_validation = lf.cross_validation.__globals__.copy()
cross_validation['m.LSTM_multivariate_input_multi_step_forecaster'] = MockLSTMForecaster

cross_validation_test()


AttributeError: 'LSTM_multivariate_input_multi_step_forecaster' object has no attribute 'shape'

In [3]:
class MockLSTMForecaster:
    def __init__(self, *args, **kwargs):
        pass

    def forecast(self, length):
        # Generate random forecasts
        return np.random.uniform(50, 150, length)


In [2]:
from torch.utils.data import Dataset, DataLoader

class TimeSeriesDataset(Dataset):
    def __init__(self, data, past_horizon, forecast_horizon, target_col):
        """
        Args:
            data (pd.DataFrame): DataFrame containing the time series data.
            past_horizon (int): Number of past timesteps to use as input.
            forecast_horizon (int): Number of future timesteps to predict.
            target_col (str): Column name for the target variable.
        """
        self.data = data
        self.past_horizon = past_horizon
        self.forecast_horizon = forecast_horizon
        self.target_col = target_col

    def __len__(self):
        return len(self.data) - self.past_horizon - self.forecast_horizon

    def __getitem__(self, idx):
        # Input sequence
        x = self.data.iloc[idx:idx + self.past_horizon].values
        # Target sequence
        y = self.data.iloc[idx + self.past_horizon:idx + self.past_horizon + self.forecast_horizon][self.target_col].values
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

def train_model(model, train_loader, valid_loader, epochs, lr, device):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for x_batch, y_batch in train_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(x_batch)
            loss = criterion(outputs.squeeze(-1), y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        valid_loss = 0.0
        model.eval()
        with torch.no_grad():
            for x_batch, y_batch in valid_loader:
                x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                outputs = model(x_batch)
                loss = criterion(outputs.squeeze(-1), y_batch)
                valid_loss += loss.item()

        print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss / len(train_loader):.4f}, Validation Loss: {valid_loss / len(valid_loader):.4f}")

    return model


In [4]:
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_percentage_error

def cross_validation_lstm(data, target_col, past_horizon, forecast_horizon, n_splits, model_params, training_params, device):
    """
    Args:
        data (pd.DataFrame): DataFrame with the time series data.
        target_col (str): Name of the target variable column.
        past_horizon (int): Number of past timesteps for input.
        forecast_horizon (int): Number of future timesteps to predict.
        n_splits (int): Number of splits for TimeSeriesSplit.
        model_params (dict): Parameters for the LSTM model.
        training_params (dict): Parameters for training (epochs, learning rate, batch size).
        device (str): 'cuda' or 'cpu'.
    Returns:
        pd.DataFrame: Cross-validation results.
    """
    tscv = TimeSeriesSplit(n_splits=n_splits)
    results = []

    for fold, (train_index, valid_index) in enumerate(tscv.split(data)):
        train_data = data.iloc[train_index]
        valid_data = data.iloc[valid_index]

        # Create datasets and dataloaders
        train_dataset = TimeSeriesDataset(train_data, past_horizon, forecast_horizon, target_col)
        valid_dataset = TimeSeriesDataset(valid_data, past_horizon, forecast_horizon, target_col)
        train_loader = DataLoader(train_dataset, batch_size=training_params['batch_size'], shuffle=False)
        valid_loader = DataLoader(valid_dataset, batch_size=training_params['batch_size'], shuffle=False)

        # Initialize the model
        model = m.LSTM_multivariate_input_multi_step_forecaster(
            input_size=model_params['input_size'],
            hidden_size=model_params['hidden_size'],
            num_layers=model_params['num_layers'],
            dropout=model_params['dropout'],
            past_horizon=past_horizon,
            forecast_horizon=forecast_horizon
        )

        # Train the model
        model = train_model(model, train_loader, valid_loader, training_params['epochs'], training_params['lr'], device)

        # Evaluate on validation set
        model.eval()
        predictions, targets = [], []
        with torch.no_grad():
            for x_batch, y_batch in valid_loader:
                x_batch = x_batch.to(device)
                y_pred = model(x_batch).squeeze(-1).cpu().numpy()
                predictions.append(y_pred)
                targets.append(y_batch.numpy())
        
        predictions = np.concatenate(predictions)
        targets = np.concatenate(targets)

        mape = mean_absolute_percentage_error(targets, predictions)
        results.append({"fold": fold, "MAPE": mape})
        print(f"Fold {fold}, MAPE: {mape:.4f}")

    return pd.DataFrame(results)


In [None]:
import pandas as pd
import numpy as np

# data preprocessing
scaler = MinMaxScaler()

data = dp.create_dataframe()

# input_data = data.drop(columns=["DA"]).values  # Extract features
# target_data = data["DA"].values.reshape(-1, 1)  # Extract target

# scaled_input = scaler.fit_transform(input_data)
# scaled_target = scaler.fit_transform(target_data)


# Model and training parameters
model_params = {
    "input_size": data.shape[1],  # Number of features
    "hidden_size": 100,
    "num_layers": 3,
    "dropout": 0.2
}

training_params = {
    "epochs": 3,
    "lr": 0.001,
    "batch_size": 32
}

# Device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Run cross-validation
cv_results = cross_validation_lstm(
    data=data,
    target_col="Price1",
    past_horizon=24,
    forecast_horizon=24,
    n_splits=10,
    model_params=model_params,
    training_params=training_params,
    device=device
)

print(cv_results)


Found 365 marginalpdbc files
Found 365 precious files
Processed 8760 rows from marginalpdbc files
Processed 8760 rows from precious files
Epoch 1/10, Train Loss: 8875.0340, Validation Loss: 9564.0058
Epoch 2/10, Train Loss: 8502.5448, Validation Loss: 9123.1392
Epoch 3/10, Train Loss: 8072.4607, Validation Loss: 8620.0548
Epoch 4/10, Train Loss: 7600.0397, Validation Loss: 8070.4527
Epoch 5/10, Train Loss: 7089.9131, Validation Loss: 7489.5931
Epoch 6/10, Train Loss: 6569.1469, Validation Loss: 6893.1417
Epoch 7/10, Train Loss: 6045.8464, Validation Loss: 6298.1665
Epoch 8/10, Train Loss: 5509.1334, Validation Loss: 5713.4609
Epoch 9/10, Train Loss: 5018.0882, Validation Loss: 5152.6961
Epoch 10/10, Train Loss: 4557.7175, Validation Loss: 4624.2878
Fold 0, MAPE: 0.7945
Epoch 1/10, Train Loss: 9121.8849, Validation Loss: 7860.3775
Epoch 2/10, Train Loss: 8163.7244, Validation Loss: 6856.1284
Epoch 3/10, Train Loss: 7022.5864, Validation Loss: 5753.9744
Epoch 4/10, Train Loss: 5846.0243,

KeyboardInterrupt: 