In [8]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

# Set the base directory
base_dir = "seed42/batch_1_loss_mse_epoch_100"
output_file = os.path.join("aggregated_results.csv")

# Function to compute prediction intervals
def calculate_prediction_intervals(actual, predictions, alpha=0.05):
    residuals = actual - predictions
    std_residual = np.std(residuals, axis=0)
    z_score = 1.96  # 95% confidence
    margin_of_error = z_score * std_residual
    lower_bound = predictions - margin_of_error
    upper_bound = predictions + margin_of_error
    return lower_bound.mean(), upper_bound.mean(), std_residual.mean()

# Function to calculate percent overlap
def calculate_overlap(lower1, upper1, lower2, upper2):
    overlap_count = np.sum((upper1 >= lower2) & (lower1 <= upper2), axis=1)
    percent_overlap = (overlap_count / lower1.shape[1]) * 100
    return np.mean(percent_overlap)

# Dictionary to store results
results_dict = {}

# Traverse the directory
for file in os.listdir(base_dir):
    if file.endswith(".csv"):
        file_parts = file.split("_")
        trial_num = int(file_parts[1])
        lookback = int(file_parts[2][:-5])  # Extract lookback period
        batch_size = int(file_parts[5])
        epochs = int(file_parts[-1][:-4])  # Extract epochs from filename
        
        # Load CSV
        file_path = os.path.join(base_dir, file)
        df = pd.read_csv(file_path)
        original_test = df["Deaths"].iloc[48:].values
        lstm_test = df["LSTM Predictions"].iloc[48:].values
        sarima_test = df["SARIMA Predictions"].iloc[48:].values
        
        # Store trial data
        if lookback not in results_dict:
            results_dict[lookback] = {
                "batch_size": batch_size, "epochs": epochs,
                "lstm_rmse": [], "lstm_mape": [], "sarima_rmse": [], "sarima_mape": [],
                "lstm_lower": [], "lstm_upper": [], "sarima_lower": [], "sarima_upper": [],
                "lstm_std": [], "sarima_std": [], "percent_overlap": []
            }
        
        results_dict[lookback]["lstm_rmse"].append(np.sqrt(mean_squared_error(original_test, lstm_test)))
        results_dict[lookback]["lstm_mape"].append(mean_absolute_percentage_error(original_test, lstm_test) * 100)
        results_dict[lookback]["sarima_rmse"].append(np.sqrt(mean_squared_error(original_test, sarima_test)))
        results_dict[lookback]["sarima_mape"].append(mean_absolute_percentage_error(original_test, sarima_test) * 100)
        
        lower_lstm, upper_lstm, lstm_std = calculate_prediction_intervals(original_test, lstm_test)
        lower_sarima, upper_sarima, sarima_std = calculate_prediction_intervals(original_test, sarima_test)
        results_dict[lookback]["lstm_lower"].append(lower_lstm)
        results_dict[lookback]["lstm_upper"].append(upper_lstm)
        results_dict[lookback]["sarima_lower"].append(lower_sarima)
        results_dict[lookback]["sarima_upper"].append(upper_sarima)
        results_dict[lookback]["lstm_std"].append(lstm_std)
        results_dict[lookback]["sarima_std"].append(sarima_std)
        
        percent_overlap = calculate_overlap(lower_lstm, upper_lstm, lower_sarima, upper_sarima)
        results_dict[lookback]["percent_overlap"].append(percent_overlap)

# Compute averages and standard deviations
final_results = []
for lookback, data in sorted(results_dict.items()):
    final_results.append({
        "Lookback Period": lookback,
        "Batch Size": data["batch_size"],
        "Epochs": data["epochs"],
        "LSTM RMSE": np.mean(data["lstm_rmse"]),
        "LSTM RMSE Std": np.std(data["lstm_rmse"]),
        "LSTM MAPE": np.mean(data["lstm_mape"]),
        "LSTM MAPE Std": np.std(data["lstm_mape"]),
        "SARIMA RMSE": np.mean(data["sarima_rmse"]),
        "SARIMA RMSE Std": np.std(data["sarima_rmse"]),
        "SARIMA MAPE": np.mean(data["sarima_mape"]),
        "SARIMA MAPE Std": np.std(data["sarima_mape"]),
        "LSTM Prediction Interval Lower": np.mean(data["lstm_lower"]),
        "LSTM Prediction Interval Upper": np.mean(data["lstm_upper"]),
        "SARIMA Prediction Interval Lower": np.mean(data["sarima_lower"]),
        "SARIMA Prediction Interval Upper": np.mean(data["sarima_upper"]),
        "LSTM Std": np.mean(data["lstm_std"]),
        "SARIMA Std": np.mean(data["sarima_std"]),
        "Percent Overlap": np.mean(data["percent_overlap"])
    })

# Save results to CSV
results_df = pd.DataFrame(final_results)
results_df.to_csv(output_file, index=False)
print(f"Aggregated results saved to {output_file}")

TypeError: object of type 'numpy.float64' has no len()