In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import numpy as np
from Models import MoELSTM
import os
from collections import OrderedDict
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader

from typing import List, Tuple, Optional, Dict
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
import random
from Models import MoELSTM, LSTMModel, train_model
from Preprocess import (
    compute_metrics,
    convert_timeseries_to_numpy,
    create_dataloader,
    load_building_series,
    split_series_list,
)
from Models import model_fn
from tqdm import tqdm
from my_utils import train_model, load_energy_data_feather, get_weights, set_weights
from energy_ts_diffusion.task import convert_timeseries_to_numpy  # adjust as per your project
from tqdm import tqdm


In [18]:

@torch.no_grad()
def rolling_forecast_on_test(cid, model, filepath="train_final.feather", input_len=24, output_len=8):
    """
    Perform rolling window forecast on the test data using a trained model and return
    unscaled predictions and ground truths with actual timestamps.

    Args:
        cid (int): Client/building ID.
        model (nn.Module): Trained PyTorch model.
        filepath (str): Path to the Feather file.
        input_len (int): Input sequence length.
        output_len (int): Prediction horizon.

    Returns:
        Tuple[List[TimeSeries], List[TimeSeries]]: (predictions_ts_list, ground_truth_ts_list)
    """
    print(f"[DEBUG] rolling_forecast_on_test: CID={cid}")

    # Load and filter data
    df = pd.read_feather(filepath)
    df = df[df['building_id'] == cid]
    df['meter_reading'] = df['meter_reading'].fillna(0)

    if df.empty:
        raise ValueError(f"No data found for building_id {cid}")

    # Create TimeSeries and scale
    ts = TimeSeries.from_dataframe(
        df,
        time_col='timestamp',
        value_cols='meter_reading',
        fill_missing_dates=True,
        freq='h'
    )

    _, test_series = ts.split_before(0.75)

    # Scale
    scaler = MinMaxScaler(feature_range=(0.1, 1))
    transformer = Scaler(scaler)
    test_series_scaled = transformer.fit_transform(test_series)

    test_values_scaled = test_series_scaled.values().squeeze()
    test_timestamps = test_series_scaled.time_index

    predictions_ts_list = []
    ground_truth_ts_list = []

    model.eval()
    device = next(model.parameters()).device
    device = 'cuda'
    for i in range(0, len(test_values_scaled) - input_len - output_len + 1):
        input_seq = test_values_scaled[i:i+input_len]
        true_output = test_values_scaled[i+input_len:i+input_len+output_len]
        true_time = test_timestamps[i+input_len:i+input_len+output_len]

        input_tensor = torch.tensor(input_seq, dtype=torch.float32).unsqueeze(0).unsqueeze(-1).to(device)  # [1, input_len, 1]

        pred = model(input_tensor)
        
        if pred.dim() == 3:
            pred = pred.squeeze(0).squeeze(-1)
        else:
            pred = pred.squeeze(0)

        # Convert prediction & ground truth to TimeSeries
        pred_ts = TimeSeries.from_times_and_values(true_time, pred.cpu().numpy())
        true_ts = TimeSeries.from_times_and_values(true_time, true_output)

        # Inverse transform
        pred_unscaled = transformer.inverse_transform(pred_ts)
        true_unscaled = transformer.inverse_transform(true_ts)

        predictions_ts_list.append(pred_unscaled)
        ground_truth_ts_list.append(true_unscaled)

    return predictions_ts_list, ground_truth_ts_list


In [19]:
# Get model predictions and save to CSV
# This function will load the model for each round, perform predictions, and save results to a CSV file. - call rolling_forecast_on_test
def get_model_predictions_csv(model_name: str, cid: int,aggr_strat: str ,rounds: list, model_dir: str, output_csv: str):
    """
    For each round, load the model, predict on test set for cid, and save all preds in a single CSV.
    """
    rows = []

    for rnd in tqdm(rounds):
        model_path = os.path.join(model_dir, f"{model_name}_round_{rnd}_{aggr_strat}.pt")

        if not os.path.exists(model_path):
            print(f"[WARN] Model not found: {model_path}")
            continue

        model = model_fn(model_name)
        # model.load_state_dict(torch.load(model_path), weights_only=True)  # Ensure weights_only=True if using a custom model
        state_dict = torch.load(model_path, weights_only=True)
        model.load_state_dict(state_dict)
        model = model.to('cuda')
        model.eval()

        pred_ts_list, gt_ts_list = rolling_forecast_on_test(cid=cid, model=model)

        for pred_ts, true_ts in zip(pred_ts_list, gt_ts_list):
            df_pred = pd.DataFrame({"timestamp": pred_ts.time_index, "pred": pred_ts.values().squeeze()})
            df_true = pd.DataFrame({"timestamp": true_ts.time_index, "true": true_ts.values().squeeze()})

            # df_merged = df_true.join(df_pred, how="inner")
            df_merged = pd.merge(df_true, df_pred, on="timestamp", how="inner")
            df_merged["round"] = rnd


            rows.append(df_merged[["timestamp", "true", "pred", "round"]])

    # Combine all rows
    final_df = pd.concat(rows, ignore_index=True)
    final_df.to_csv(output_csv, index=False)
    print(f"[INFO] Forecasts written to {output_csv}")



In [20]:
import os
import pandas as pd

def model_strategy_csv(Base_dir, Target_dir, round_num, METRIC, sortBy="Model_Strategy"):
    """
    Computes boxplot statistics for each model_strategy CSV
    and saves the pivot table in the target directory.

    The metric column is dynamically named with METRIC.
    """
    os.makedirs(Target_dir, exist_ok=True)

    all_rows = []

    for csv_file in os.listdir(Base_dir):
        if not csv_file.endswith(".csv"):
            continue

        csv_path = os.path.join(Base_dir, csv_file)
        df = pd.read_csv(csv_path)

        # Filter for the specified round
        df_round = df[df["round"] == round_num]

        if df_round.empty:
            print(f"[SKIP] No round {round_num} in: {csv_file}")
            continue

        metric_values = df_round[METRIC].dropna()

        # Compute stats
        count = metric_values.count()
        min_val = metric_values.min()
        q1 = metric_values.quantile(0.25)
        median = metric_values.median()
        q3 = metric_values.quantile(0.75)
        max_val = metric_values.max()
        iqr = q3 - q1

        # Whiskers
        lower_whisker = q1 - 1.5 * iqr
        upper_whisker = q3 + 1.5 * iqr

        # Non-outlier min/max
        non_outliers = metric_values[(metric_values >= lower_whisker) & (metric_values <= upper_whisker)]
        min_non_outlier = non_outliers.min()
        max_non_outlier = non_outliers.max()

        # Extract model_strategy and strategy
        model_strategy = csv_file.replace("_metrics.csv", "")
        try:
            model, strategy = model_strategy.split("_", 1)
        except ValueError:
            strategy = "unknown"

        # ✅ The metric name is now a column with the median value.
        row = {
            "Model_Strategy": model_strategy,
            # "Strategy": strategy,
            "Round": round_num,
            "METRIC": METRIC,  # dynamic metric column with value
            "Count": count,
            "Min": min_val,
            "Q1": q1,
            "Median": median,
            "Q3": q3,
            "Max": max_val,
            "IQR": iqr,
            # "Lower Whisker": lower_whisker,
            # "Upper Whisker": upper_whisker,
            "True Min (Non-outlier)": min_non_outlier,
            "True Max (Non-outlier)": max_non_outlier
        }

        all_rows.append(row)

    # Combine to DataFrame
    pivot_df = pd.DataFrame(all_rows)
    pivot_df.sort_values(by=sortBy, inplace=True)

    # Clean output name
    safe_metric = METRIC.replace(" ", "_").replace("%", "percent")
    pivot_output = os.path.join(
        Target_dir,
        f"all_stats_round{round_num}_metric_{safe_metric}_sort_by_{sortBy}.csv"
    )

    pivot_df.to_csv(pivot_output, index=False)

    print(f"[OK] One-row-per-model_strategy pivot saved: {pivot_output}")


In [21]:
# evaluate forecasts - working correctly 1411 buildings count 
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

def smape(y_true, y_pred):
    """Symmetric Mean Absolute Percentage Error."""
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2.0
    return np.mean(np.where(denominator == 0, 0, np.abs(y_true - y_pred) / denominator)) * 100

def mape(y_true, y_pred):
    """Mean Absolute Percentage Error."""
    y_true = np.where(y_true == 0, 1e-8, y_true)  # avoid division by zero
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def evaluate_forecast_metrics_per_round(csv_path):
    """
    Reads forecast CSV and computes MAPE, MAE, SMAPE, RMSE, and MSE per round.

    Args:
        csv_path (str): Path to the CSV with columns: timestamp, true, pred, round

    Returns:
        pd.DataFrame: Metrics summary per round
    """
    df = pd.read_csv(csv_path)
    if df.empty:
        raise ValueError("CSV is empty or invalid")

    metrics_list = []

    for rnd in sorted(df['round'].unique()):

        df_rnd = df[df['round'] == rnd]
        df_rnd = df_rnd.fillna(0.005)
        y_true = df_rnd["true"].values
        y_pred = df_rnd["pred"].values

        mae = mean_absolute_error(y_true, y_pred)
        mse = mean_squared_error(y_true, y_pred)
        rmse = np.sqrt(mse)
        mape_val = mape(y_true, y_pred)
        smape_val = smape(y_true, y_pred)

        metrics_list.append({
            "round": rnd,
            "MAE": mae,
            "MSE": mse,
            "RMSE": rmse,
            "MAPE (%)": mape_val,
            "SMAPE (%)": smape_val
        })

    metrics_df = pd.DataFrame(metrics_list)
    return metrics_df


In [22]:
# combine all the model_strategy_csv of round 10 ,9

In [23]:
# try in function
def get_model_metric_grouped_by_model_strategy(
    MODELS: List[str],
    STRATEGIES: List[str],
    ROUNDS: List[int],
    BASE_PREDICTIONS_DIR: str,
    BASE_METRICS_DIR: str,
    METRIC: str = "MAE"
) -> None:
    """
    Computes metrics for each model and strategy, grouped by client ID and round.
    
    Args:
        MODELS (List[str]): List of model names.
        STRATEGIES (List[str]): List of strategy names.
        ROUNDS (List[int]): List of round numbers.
        BASE_PREDICTIONS_DIR (str): Directory containing prediction CSVs.
        BASE_METRICS_DIR (str): Directory to save metrics CSVs.
        METRIC (str): The metric to compute, e.g., "MAE", "MSE", etc.
    """
    os.makedirs(BASE_METRICS_DIR, exist_ok=True)

    for CID in tqdm(range(1411), desc="Processing Client IDs"):
        for model_name in MODELS:
            for strategy in STRATEGIES:
                input_csv = os.path.join(BASE_PREDICTIONS_DIR, f"{CID}_{model_name}_{strategy}.csv")
                output_csv = os.path.join(BASE_METRICS_DIR, f"{model_name}_{strategy}_metrics.csv")

                if not os.path.isfile(input_csv):
                    print(f"[SKIP] Missing: {input_csv}")
                    continue

                try:
                    # Compute metrics for the given CSV
                    metrics_df = evaluate_forecast_metrics_per_round(input_csv)
                    metrics_df.insert(0, "building_id", CID)  # add client ID
                    metrics_df.insert(1, "model", model_name)
                    metrics_df.insert(2, "strategy", strategy)

                    if os.path.isfile(output_csv):
                        metrics_df.to_csv(output_csv, index=False)
                    else:
                        metrics_df.to_csv(output_csv, index=False)

                    print(f"[OK] {output_csv} <- CID {CID}")

                except Exception as e:
                    print(f"[ERROR] CID={CID} | {model_name}{strategy} | {e}")

## Predicitons metric both (above code in function)

In [24]:
# Model List , strategy List, round List, base_results_dir, base_output_dir, metrics_dir,cid range

# # Model List , strategy List, round List, base_results_dir, base_output_dir, metrics_dir,cid range

import os

def get_model_predictions_metric(
    MODELS,
    STRATEGIES,
    ROUNDS,
    BASE_RESULTS_DIR: str,
    BASE_OUTPUT_DIR: str,
    METRICS_DIR: str,
    CID: range
):
    """
    For each client in CID, model in MODELS, and strategy in STRATEGIES,
    generates forecast predictions and computes metrics.

    Args:
        MODELS (List[str]): List of model names (e.g., ["gru", "lstm"])
        STRATEGIES (List[str]): List of aggregation strategies (e.g., ["_scaffold", "_diff"])
        ROUNDS (List[int]): Rounds to evaluate (e.g., list(range(9, 11)))
        BASE_RESULTS_DIR (str): Directory containing saved model weights.
        BASE_OUTPUT_DIR (str): Directory to save prediction CSVs.
        METRICS_DIR (str): Directory to save metric CSVs.
        CID (range): Range of client IDs (e.g., range(1411))
    """
    os.makedirs(BASE_OUTPUT_DIR, exist_ok=True)
    os.makedirs(METRICS_DIR, exist_ok=True)

    for cid in CID:
        print(f"\nProcessing Client ID: {cid}")

        for model_name in MODELS:
            for strategy in STRATEGIES:
                model_dir = os.path.join(BASE_RESULTS_DIR, model_name)
                output_csv = os.path.join(BASE_OUTPUT_DIR, f"{cid}_{model_name}_{strategy}.csv")
                metrics_csv = os.path.join(METRICS_DIR, f"cid{cid}_{model_name}_{strategy}_metrics.csv")

                print(f"\n Model: {model_name}, Strategy: {strategy}")

                try:
                    # Generate predictions and save to CSV
                    get_model_predictions_csv(
                        model_name=model_name,
                        cid=cid,
                        rounds=ROUNDS,
                        model_dir=model_dir,
                        output_csv=output_csv,
                        aggr_strat=strategy
                    )

                    # Evaluate metrics and save to CSV
                    metrics_df = evaluate_forecast_metrics_per_round(output_csv)
                    metrics_df.to_csv(metrics_csv, index=False)
                    print(f"Metrics saved to {metrics_csv}")

                except Exception as e:
                    print(f"[ERROR] model={model_name}, strategy={strategy}: {e}")




In [30]:
STRATEGIES = ["fedAvg_lr","das","diff_lr","scaffold_diff","scaffold_lr" ] # scaffold_diff, "diff_lr2","das11","das2", "fedAvg_lr","fedAvg_diffsample_dhc"
MODELS = [ "lstm","gru"] #,"gru"
# CID = 45
ROUNDS = list(range(40, 50))
BASE_RESULTS_DIR = "results"
BASE_OUTPUT_DIR = "predictions1411"
METRICS_DIR = "metrics1411"
CID = range(0,500) 

get_model_predictions_metric(
    MODELS=MODELS,
    STRATEGIES=STRATEGIES,
    ROUNDS=ROUNDS,
    BASE_RESULTS_DIR=BASE_RESULTS_DIR,
    BASE_OUTPUT_DIR=BASE_OUTPUT_DIR,
    METRICS_DIR=METRICS_DIR,
    CID=CID
)


Processing Client ID: 0

 Model: lstm, Strategy: fedAvg_lr


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=0


 10%|█         | 1/10 [00:02<00:18,  2.05s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 20%|██        | 2/10 [00:04<00:19,  2.40s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 30%|███       | 3/10 [00:06<00:15,  2.22s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 40%|████      | 4/10 [00:08<00:12,  2.13s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 50%|█████     | 5/10 [00:10<00:10,  2.11s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 60%|██████    | 6/10 [00:13<00:08,  2.23s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 70%|███████   | 7/10 [00:15<00:06,  2.11s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 80%|████████  | 8/10 [00:17<00:04,  2.13s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 90%|█████████ | 9/10 [00:19<00:02,  2.08s/it]

[DEBUG] rolling_forecast_on_test: CID=0


100%|██████████| 10/10 [00:21<00:00,  2.17s/it]


[INFO] Forecasts written to predictions1411/0_lstm_fedAvg_lr.csv
Metrics saved to metrics1411/cid0_lstm_fedAvg_lr_metrics.csv

 Model: lstm, Strategy: das


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=0


 10%|█         | 1/10 [00:02<00:18,  2.09s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 20%|██        | 2/10 [00:04<00:16,  2.03s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 30%|███       | 3/10 [00:06<00:14,  2.01s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 40%|████      | 4/10 [00:08<00:13,  2.20s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 50%|█████     | 5/10 [00:10<00:10,  2.12s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 60%|██████    | 6/10 [00:12<00:08,  2.06s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 70%|███████   | 7/10 [00:14<00:06,  2.07s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 80%|████████  | 8/10 [00:16<00:04,  2.19s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 90%|█████████ | 9/10 [00:18<00:02,  2.10s/it]

[DEBUG] rolling_forecast_on_test: CID=0


100%|██████████| 10/10 [00:20<00:00,  2.10s/it]


[INFO] Forecasts written to predictions1411/0_lstm_das.csv
Metrics saved to metrics1411/cid0_lstm_das_metrics.csv

 Model: lstm, Strategy: diff_lr


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=0


 10%|█         | 1/10 [00:02<00:22,  2.45s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 20%|██        | 2/10 [00:04<00:17,  2.14s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 30%|███       | 3/10 [00:06<00:14,  2.14s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 40%|████      | 4/10 [00:09<00:13,  2.29s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 50%|█████     | 5/10 [00:11<00:10,  2.18s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 60%|██████    | 6/10 [00:13<00:08,  2.13s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 70%|███████   | 7/10 [00:15<00:06,  2.10s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 80%|████████  | 8/10 [00:17<00:04,  2.18s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 90%|█████████ | 9/10 [00:19<00:02,  2.14s/it]

[DEBUG] rolling_forecast_on_test: CID=0


100%|██████████| 10/10 [00:21<00:00,  2.15s/it]


[INFO] Forecasts written to predictions1411/0_lstm_diff_lr.csv
Metrics saved to metrics1411/cid0_lstm_diff_lr_metrics.csv

 Model: lstm, Strategy: scaffold_diff


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=0


 10%|█         | 1/10 [00:02<00:20,  2.29s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 20%|██        | 2/10 [00:04<00:17,  2.17s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 30%|███       | 3/10 [00:06<00:14,  2.13s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 40%|████      | 4/10 [00:08<00:12,  2.06s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 50%|█████     | 5/10 [00:10<00:11,  2.22s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 60%|██████    | 6/10 [00:13<00:08,  2.18s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 70%|███████   | 7/10 [00:15<00:06,  2.14s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 80%|████████  | 8/10 [00:17<00:04,  2.08s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 90%|█████████ | 9/10 [00:19<00:02,  2.23s/it]

[DEBUG] rolling_forecast_on_test: CID=0


100%|██████████| 10/10 [00:21<00:00,  2.16s/it]


[INFO] Forecasts written to predictions1411/0_lstm_scaffold_diff.csv
Metrics saved to metrics1411/cid0_lstm_scaffold_diff_metrics.csv

 Model: lstm, Strategy: scaffold_lr


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=0


 10%|█         | 1/10 [00:02<00:18,  2.06s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 20%|██        | 2/10 [00:04<00:16,  2.01s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 30%|███       | 3/10 [00:06<00:15,  2.18s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 40%|████      | 4/10 [00:08<00:12,  2.06s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 50%|█████     | 5/10 [00:10<00:10,  2.05s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 60%|██████    | 6/10 [00:12<00:08,  2.04s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 70%|███████   | 7/10 [00:14<00:06,  2.15s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 80%|████████  | 8/10 [00:16<00:04,  2.12s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 90%|█████████ | 9/10 [00:18<00:02,  2.11s/it]

[DEBUG] rolling_forecast_on_test: CID=0


100%|██████████| 10/10 [00:20<00:00,  2.08s/it]


[INFO] Forecasts written to predictions1411/0_lstm_scaffold_lr.csv
Metrics saved to metrics1411/cid0_lstm_scaffold_lr_metrics.csv

 Model: gru, Strategy: fedAvg_lr


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=0


 10%|█         | 1/10 [00:01<00:17,  1.96s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 20%|██        | 2/10 [00:04<00:16,  2.06s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 30%|███       | 3/10 [00:06<00:14,  2.06s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 40%|████      | 4/10 [00:08<00:12,  2.04s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 50%|█████     | 5/10 [00:10<00:11,  2.24s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 60%|██████    | 6/10 [00:12<00:08,  2.20s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 70%|███████   | 7/10 [00:14<00:06,  2.12s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 80%|████████  | 8/10 [00:16<00:04,  2.10s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 90%|█████████ | 9/10 [00:19<00:02,  2.22s/it]

[DEBUG] rolling_forecast_on_test: CID=0


100%|██████████| 10/10 [00:21<00:00,  2.13s/it]


[INFO] Forecasts written to predictions1411/0_gru_fedAvg_lr.csv
Metrics saved to metrics1411/cid0_gru_fedAvg_lr_metrics.csv

 Model: gru, Strategy: das


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=0


 10%|█         | 1/10 [00:01<00:17,  1.97s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 20%|██        | 2/10 [00:04<00:18,  2.26s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 30%|███       | 3/10 [00:06<00:14,  2.11s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 40%|████      | 4/10 [00:08<00:12,  2.10s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 50%|█████     | 5/10 [00:10<00:10,  2.07s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 60%|██████    | 6/10 [00:12<00:08,  2.22s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 70%|███████   | 7/10 [00:15<00:06,  2.16s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 80%|████████  | 8/10 [00:17<00:04,  2.13s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 90%|█████████ | 9/10 [00:19<00:02,  2.07s/it]

[DEBUG] rolling_forecast_on_test: CID=0


100%|██████████| 10/10 [00:21<00:00,  2.16s/it]


[INFO] Forecasts written to predictions1411/0_gru_das.csv
Metrics saved to metrics1411/cid0_gru_das_metrics.csv

 Model: gru, Strategy: diff_lr


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=0


 10%|█         | 1/10 [00:02<00:18,  2.09s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 20%|██        | 2/10 [00:04<00:16,  2.03s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 30%|███       | 3/10 [00:06<00:15,  2.21s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 40%|████      | 4/10 [00:08<00:12,  2.11s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 50%|█████     | 5/10 [00:10<00:10,  2.08s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 60%|██████    | 6/10 [00:12<00:08,  2.02s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 70%|███████   | 7/10 [00:14<00:06,  2.16s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 80%|████████  | 8/10 [00:16<00:04,  2.12s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 90%|█████████ | 9/10 [00:18<00:02,  2.05s/it]

[DEBUG] rolling_forecast_on_test: CID=0


100%|██████████| 10/10 [00:20<00:00,  2.07s/it]


[INFO] Forecasts written to predictions1411/0_gru_diff_lr.csv
Metrics saved to metrics1411/cid0_gru_diff_lr_metrics.csv

 Model: gru, Strategy: scaffold_diff


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=0


 10%|█         | 1/10 [00:01<00:17,  1.96s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 20%|██        | 2/10 [00:03<00:15,  1.96s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 30%|███       | 3/10 [00:05<00:13,  1.94s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 40%|████      | 4/10 [00:07<00:11,  1.97s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 50%|█████     | 5/10 [00:09<00:09,  1.98s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 60%|██████    | 6/10 [00:12<00:08,  2.09s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 70%|███████   | 7/10 [00:14<00:06,  2.03s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 80%|████████  | 8/10 [00:16<00:04,  2.00s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 90%|█████████ | 9/10 [00:17<00:01,  1.95s/it]

[DEBUG] rolling_forecast_on_test: CID=0


100%|██████████| 10/10 [00:20<00:00,  2.03s/it]


[INFO] Forecasts written to predictions1411/0_gru_scaffold_diff.csv
Metrics saved to metrics1411/cid0_gru_scaffold_diff_metrics.csv

 Model: gru, Strategy: scaffold_lr


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=0


 10%|█         | 1/10 [00:01<00:16,  1.89s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 20%|██        | 2/10 [00:03<00:14,  1.87s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 30%|███       | 3/10 [00:05<00:13,  1.91s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 40%|████      | 4/10 [00:07<00:12,  2.03s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 50%|█████     | 5/10 [00:09<00:09,  1.96s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 60%|██████    | 6/10 [00:11<00:07,  1.93s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 70%|███████   | 7/10 [00:13<00:05,  1.91s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 80%|████████  | 8/10 [00:15<00:04,  2.02s/it]

[DEBUG] rolling_forecast_on_test: CID=0


 90%|█████████ | 9/10 [00:17<00:01,  1.98s/it]

[DEBUG] rolling_forecast_on_test: CID=0


100%|██████████| 10/10 [00:19<00:00,  1.95s/it]


[INFO] Forecasts written to predictions1411/0_gru_scaffold_lr.csv
Metrics saved to metrics1411/cid0_gru_scaffold_lr_metrics.csv

Processing Client ID: 1

 Model: lstm, Strategy: fedAvg_lr


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=1


 10%|█         | 1/10 [00:02<00:20,  2.30s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 20%|██        | 2/10 [00:04<00:16,  2.06s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 30%|███       | 3/10 [00:06<00:13,  1.97s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 40%|████      | 4/10 [00:08<00:12,  2.09s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 50%|█████     | 5/10 [00:10<00:10,  2.04s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 60%|██████    | 6/10 [00:12<00:08,  2.03s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 70%|███████   | 7/10 [00:14<00:05,  1.98s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 80%|████████  | 8/10 [00:16<00:04,  2.07s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 90%|█████████ | 9/10 [00:18<00:01,  1.99s/it]

[DEBUG] rolling_forecast_on_test: CID=1


100%|██████████| 10/10 [00:20<00:00,  2.02s/it]


[INFO] Forecasts written to predictions1411/1_lstm_fedAvg_lr.csv
Metrics saved to metrics1411/cid1_lstm_fedAvg_lr_metrics.csv

 Model: lstm, Strategy: das


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=1


 10%|█         | 1/10 [00:02<00:20,  2.28s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 20%|██        | 2/10 [00:04<00:17,  2.17s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 30%|███       | 3/10 [00:06<00:14,  2.09s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 40%|████      | 4/10 [00:08<00:12,  2.09s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 50%|█████     | 5/10 [00:11<00:11,  2.26s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 60%|██████    | 6/10 [00:12<00:08,  2.16s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 70%|███████   | 7/10 [00:14<00:06,  2.09s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 80%|████████  | 8/10 [00:16<00:04,  2.06s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 90%|█████████ | 9/10 [00:19<00:02,  2.16s/it]

[DEBUG] rolling_forecast_on_test: CID=1


100%|██████████| 10/10 [00:21<00:00,  2.12s/it]


[INFO] Forecasts written to predictions1411/1_lstm_das.csv
Metrics saved to metrics1411/cid1_lstm_das_metrics.csv

 Model: lstm, Strategy: diff_lr


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=1


 10%|█         | 1/10 [00:01<00:17,  1.93s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 20%|██        | 2/10 [00:04<00:18,  2.28s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 30%|███       | 3/10 [00:06<00:15,  2.17s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 40%|████      | 4/10 [00:08<00:12,  2.06s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 50%|█████     | 5/10 [00:10<00:10,  2.05s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 60%|██████    | 6/10 [00:12<00:08,  2.20s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 70%|███████   | 7/10 [00:14<00:06,  2.15s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 80%|████████  | 8/10 [00:16<00:04,  2.10s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 90%|█████████ | 9/10 [00:18<00:02,  2.06s/it]

[DEBUG] rolling_forecast_on_test: CID=1


100%|██████████| 10/10 [00:21<00:00,  2.12s/it]


[INFO] Forecasts written to predictions1411/1_lstm_diff_lr.csv
Metrics saved to metrics1411/cid1_lstm_diff_lr_metrics.csv

 Model: lstm, Strategy: scaffold_diff


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=1


 10%|█         | 1/10 [00:02<00:19,  2.13s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 20%|██        | 2/10 [00:04<00:16,  2.11s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 30%|███       | 3/10 [00:06<00:14,  2.08s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 40%|████      | 4/10 [00:08<00:13,  2.24s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 50%|█████     | 5/10 [00:10<00:11,  2.21s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 60%|██████    | 6/10 [00:13<00:08,  2.19s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 70%|███████   | 7/10 [00:15<00:06,  2.17s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 80%|████████  | 8/10 [00:17<00:04,  2.29s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 90%|█████████ | 9/10 [00:19<00:02,  2.23s/it]

[DEBUG] rolling_forecast_on_test: CID=1


100%|██████████| 10/10 [00:21<00:00,  2.20s/it]


[INFO] Forecasts written to predictions1411/1_lstm_scaffold_diff.csv
Metrics saved to metrics1411/cid1_lstm_scaffold_diff_metrics.csv

 Model: lstm, Strategy: scaffold_lr


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=1


 10%|█         | 1/10 [00:02<00:23,  2.56s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 20%|██        | 2/10 [00:04<00:18,  2.30s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 30%|███       | 3/10 [00:06<00:15,  2.17s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 40%|████      | 4/10 [00:09<00:14,  2.43s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 50%|█████     | 5/10 [00:11<00:11,  2.34s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 60%|██████    | 6/10 [00:13<00:09,  2.30s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 70%|███████   | 7/10 [00:16<00:06,  2.23s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 80%|████████  | 8/10 [00:18<00:04,  2.40s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 90%|█████████ | 9/10 [00:21<00:02,  2.37s/it]

[DEBUG] rolling_forecast_on_test: CID=1


100%|██████████| 10/10 [00:23<00:00,  2.32s/it]


[INFO] Forecasts written to predictions1411/1_lstm_scaffold_lr.csv
Metrics saved to metrics1411/cid1_lstm_scaffold_lr_metrics.csv

 Model: gru, Strategy: fedAvg_lr


  0%|          | 0/10 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test: CID=1


 10%|█         | 1/10 [00:02<00:21,  2.39s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 20%|██        | 2/10 [00:04<00:18,  2.26s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 30%|███       | 3/10 [00:06<00:16,  2.30s/it]

[DEBUG] rolling_forecast_on_test: CID=1


 30%|███       | 3/10 [00:07<00:17,  2.52s/it]


KeyboardInterrupt: 

In [26]:
STRATEGIES = ["fedProx" ] # scaffold_diff"scaffold_lr", "diff_lr2","das11","das2", "fedAvg_lr","fedAvg_diffsample_dhc"
MODELS = [ "lstm","gru"] #,"gru"
# CID = 45
ROUNDS = list(range(1, 50))
BASE_RESULTS_DIR = "results"
BASE_OUTPUT_DIR = "predictions1411"
METRICS_DIR = "metrics1411"
CID = range(70,72) 

get_model_predictions_metric(
    MODELS=MODELS,
    STRATEGIES=STRATEGIES,
    ROUNDS=ROUNDS,
    BASE_RESULTS_DIR=BASE_RESULTS_DIR,
    BASE_OUTPUT_DIR=BASE_OUTPUT_DIR,
    METRICS_DIR=METRICS_DIR,
    CID=CID
)


Processing Client ID: 70

 Model: lstm, Strategy: fedProx


100%|██████████| 49/49 [00:00<00:00, 112552.52it/s]


[WARN] Model not found: results/lstm/lstm_round_1_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_2_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_3_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_4_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_5_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_6_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_7_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_8_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_9_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_10_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_11_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_12_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_13_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_14_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_15_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_16_fedProx.pt
[WARN] Model not 

100%|██████████| 49/49 [00:00<00:00, 45813.84it/s]


[WARN] Model not found: results/gru/gru_round_1_fedProx.pt
[WARN] Model not found: results/gru/gru_round_2_fedProx.pt
[WARN] Model not found: results/gru/gru_round_3_fedProx.pt
[WARN] Model not found: results/gru/gru_round_4_fedProx.pt
[WARN] Model not found: results/gru/gru_round_5_fedProx.pt
[WARN] Model not found: results/gru/gru_round_6_fedProx.pt
[WARN] Model not found: results/gru/gru_round_7_fedProx.pt
[WARN] Model not found: results/gru/gru_round_8_fedProx.pt
[WARN] Model not found: results/gru/gru_round_9_fedProx.pt
[WARN] Model not found: results/gru/gru_round_10_fedProx.pt
[WARN] Model not found: results/gru/gru_round_11_fedProx.pt
[WARN] Model not found: results/gru/gru_round_12_fedProx.pt
[WARN] Model not found: results/gru/gru_round_13_fedProx.pt
[WARN] Model not found: results/gru/gru_round_14_fedProx.pt
[WARN] Model not found: results/gru/gru_round_15_fedProx.pt
[WARN] Model not found: results/gru/gru_round_16_fedProx.pt
[WARN] Model not found: results/gru/gru_round_17_

100%|██████████| 49/49 [00:00<00:00, 160437.86it/s]


[WARN] Model not found: results/lstm/lstm_round_1_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_2_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_3_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_4_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_5_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_6_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_7_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_8_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_9_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_10_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_11_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_12_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_13_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_14_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_15_fedProx.pt
[WARN] Model not found: results/lstm/lstm_round_16_fedProx.pt
[WARN] Model not 

100%|██████████| 49/49 [00:00<00:00, 204092.25it/s]

[WARN] Model not found: results/gru/gru_round_1_fedProx.pt
[WARN] Model not found: results/gru/gru_round_2_fedProx.pt
[WARN] Model not found: results/gru/gru_round_3_fedProx.pt
[WARN] Model not found: results/gru/gru_round_4_fedProx.pt
[WARN] Model not found: results/gru/gru_round_5_fedProx.pt
[WARN] Model not found: results/gru/gru_round_6_fedProx.pt
[WARN] Model not found: results/gru/gru_round_7_fedProx.pt
[WARN] Model not found: results/gru/gru_round_8_fedProx.pt
[WARN] Model not found: results/gru/gru_round_9_fedProx.pt
[WARN] Model not found: results/gru/gru_round_10_fedProx.pt
[WARN] Model not found: results/gru/gru_round_11_fedProx.pt
[WARN] Model not found: results/gru/gru_round_12_fedProx.pt
[WARN] Model not found: results/gru/gru_round_13_fedProx.pt
[WARN] Model not found: results/gru/gru_round_14_fedProx.pt
[WARN] Model not found: results/gru/gru_round_15_fedProx.pt
[WARN] Model not found: results/gru/gru_round_16_fedProx.pt
[WARN] Model not found: results/gru/gru_round_17_




In [None]:
# # try in function
# def get_model_metric_grouped_by_model_strategy(
#     MODELS: List[str],
#     STRATEGIES: List[str],
#     ROUNDS: List[int],
#     BASE_PREDICTIONS_DIR: str,
#     BASE_METRICS_DIR: str,
#     METRIC: str = "MAE"
# ) -> None:
#     """
#     Computes metrics for each model and strategy, grouped by client ID and round.
    
#     Args:
#         MODELS (List[str]): List of model names.
#         STRATEGIES (List[str]): List of strategy names.
#         ROUNDS (List[int]): List of round numbers.
#         BASE_PREDICTIONS_DIR (str): Directory containing prediction CSVs.
#         BASE_METRICS_DIR (str): Directory to save metrics CSVs.
#         METRIC (str): The metric to compute, e.g., "MAE", "MSE", etc.
#     """
#     os.makedirs(BASE_METRICS_DIR, exist_ok=True)

#     for CID in tqdm(range(1411), desc="Processing Client IDs"):
#         for model_name in MODELS:
#             for strategy in STRATEGIES:
#                 input_csv = os.path.join(BASE_PREDICTIONS_DIR, f"{CID}_{model_name}_{strategy}.csv")
#                 output_csv = os.path.join(BASE_METRICS_DIR, f"{model_name}_{strategy}_metrics.csv")

#                 if not os.path.isfile(input_csv):
#                     print(f"[SKIP] Missing: {input_csv}")
#                     continue

#                 try:
#                     # Compute metrics for the given CSV
#                     metrics_df = evaluate_forecast_metrics_per_round(input_csv)
#                     metrics_df.insert(0, "building_id", CID)  # add client ID
#                     metrics_df.insert(1, "model", model_name)
#                     metrics_df.insert(2, "strategy", strategy)

#                     if os.path.isfile(output_csv):
#                         metrics_df.to_csv(output_csv, index=False)
#                     else:
#                         metrics_df.to_csv(output_csv, index=False)

#                     print(f"[OK] {output_csv} <- CID {CID}")

#                 except Exception as e:
#                     print(f"[ERROR] CID={CID} | {model_name}{strategy} | {e}")

In [None]:
import os
import pandas as pd
from typing import List
from tqdm import tqdm

def get_model_metric_grouped_by_model_strategy(
    MODELS: List[str],
    STRATEGIES: List[str],
    ROUNDS: List[int],
    BASE_PREDICTIONS_DIR: str,
    BASE_METRICS_DIR: str,
    METRIC: str = "MAE"
) -> None:
    """
    Computes metrics for each model and strategy, grouped by client ID and round.
    Overwrites existing CSVs with combined results for all building IDs.
    """
    os.makedirs(BASE_METRICS_DIR, exist_ok=True)

    for model_name in MODELS:
        for strategy in STRATEGIES:
            all_metrics = []  # Accumulate metrics per model-strategy
            print(f"\n📊 Collecting metrics: {model_name} | {strategy}")

            for CID in tqdm(range(1411), desc=f"{model_name}_{strategy}"):
                input_csv = os.path.join(BASE_PREDICTIONS_DIR, f"{CID}_{model_name}_{strategy}.csv")

                if not os.path.isfile(input_csv):
                    print(f"[SKIP] Missing: {input_csv}")
                    continue

                try:
                    metrics_df = evaluate_forecast_metrics_per_round(input_csv)
                    metrics_df.insert(0, "building_id", CID)
                    metrics_df.insert(1, "model", model_name)
                    metrics_df.insert(2, "strategy", strategy)
                    all_metrics.append(metrics_df)

                except Exception as e:
                    print(f"[ERROR] CID={CID} | {model_name}{strategy} | {e}")

            # Save once after collecting all building metrics
            if all_metrics:
                final_df = pd.concat(all_metrics, ignore_index=True)
                output_csv = os.path.join(BASE_METRICS_DIR, f"{model_name}_{strategy}_metrics.csv")
                final_df.to_csv(output_csv, index=False)  # ✅ Full overwrite
                print(f" Written to: {output_csv}")


In [None]:
STRATEGIES = ["scaffold", "diff"]
MODELS = ["gru", "lstm"]
# CID = 45
ROUNDS = list(range(9, 11))
BASE_DIR = "predictions1411"
BASE_OUTPUT_DIR = "predictions1411"
METRICS_DIR = "metrics1411_grouped"
# using evaluate_forecast_metrics_per_round

get_model_metric_grouped_by_model_strategy(MODELS, 
                                           STRATEGIES, 
                                           ROUNDS, 
                                           BASE_PREDICTIONS_DIR=BASE_OUTPUT_DIR, 
                                           BASE_METRICS_DIR=METRICS_DIR,
                                           METRIC="MAE")


📊 Collecting metrics: gru | scaffold


gru_scaffold: 100%|██████████| 5/5 [00:00<00:00, 150.11it/s]


[✔] Written to: metrics1411_grouped/gru_scaffold_metrics.csv

📊 Collecting metrics: gru | diff


gru_diff: 100%|██████████| 5/5 [00:00<00:00, 163.35it/s]


[✔] Written to: metrics1411_grouped/gru_diff_metrics.csv

📊 Collecting metrics: lstm | scaffold


lstm_scaffold: 100%|██████████| 5/5 [00:00<00:00, 163.43it/s]


[✔] Written to: metrics1411_grouped/lstm_scaffold_metrics.csv

📊 Collecting metrics: lstm | diff


lstm_diff: 100%|██████████| 5/5 [00:00<00:00, 168.14it/s]

[✔] Written to: metrics1411_grouped/lstm_diff_metrics.csv





# Now compute tabular statistics

In [13]:
import os
import pandas as pd

def model_strategy_csv(Base_dir, Target_dir, round_num, METRIC, sortBy="Model_Strategy"):
    """
    Computes boxplot statistics for each model_strategy CSV
    and saves the pivot table in the target directory.

    The metric column is dynamically named with METRIC.
    """
    os.makedirs(Target_dir, exist_ok=True)

    all_rows = []

    for csv_file in os.listdir(Base_dir):
        if not csv_file.endswith(".csv"):
            continue

        csv_path = os.path.join(Base_dir, csv_file)
        df = pd.read_csv(csv_path)

        # Filter for the specified round
        df_round = df[df["round"] == round_num]

        if df_round.empty:
            print(f"[SKIP] No round {round_num} in: {csv_file}")
            continue

        metric_values = df_round[METRIC].dropna()

        # Compute stats
        count = metric_values.count()
        min_val = metric_values.min()
        q1 = metric_values.quantile(0.25)
        median = metric_values.median()
        q3 = metric_values.quantile(0.75)
        max_val = metric_values.max()
        iqr = q3 - q1

        # Whiskers
        lower_whisker = q1 - 1.5 * iqr
        upper_whisker = q3 + 1.5 * iqr

        # Non-outlier min/max
        non_outliers = metric_values[(metric_values >= lower_whisker) & (metric_values <= upper_whisker)]
        min_non_outlier = non_outliers.min()
        max_non_outlier = non_outliers.max()

        # Extract model_strategy and strategy
        model_strategy = csv_file.replace("_metrics.csv", "")
        try:
            model, strategy = model_strategy.split("_", 1)
        except ValueError:
            strategy = "unknown"

        # ✅ The metric name is now a column with the median value.
        row = {
            "Model_Strategy": model_strategy,
            # "Strategy": strategy,
            "Round": round_num,
            "METRIC": METRIC,  # dynamic metric column with value
            "Count": count,
            "Min": min_val,
            "Q1": q1,
            "Median": median,
            "Q3": q3,
            "Max": max_val,
            "IQR": iqr,
            # "Lower Whisker": lower_whisker,
            # "Upper Whisker": upper_whisker,
            "True Min (Non-outlier)": min_non_outlier,
            "True Max (Non-outlier)": max_non_outlier
        }

        all_rows.append(row)

    # Combine to DataFrame
    pivot_df = pd.DataFrame(all_rows)
    pivot_df.sort_values(by=sortBy, inplace=True)

    # Clean output name
    safe_metric = METRIC.replace(" ", "_").replace("%", "percent")
    pivot_output = os.path.join(
        Target_dir,
        f"all_stats_round{round_num}_metric_{safe_metric}_sort_by_{sortBy}.csv"
    )

    pivot_df.to_csv(pivot_output, index=False)

    print(f"[OK] One-row-per-model_strategy pivot saved: {pivot_output}")


In [None]:
# # Combine all metrics CSVs into a single file that contains statistics for each model and strategy.

for box plot combine all model stratetgy csv files

In [None]:
# # combine data 
# import os

# # Folder
# BASE_METRICS_DIR = "forecast_metrics_new_1411"
# combined = []

# for csv_file in os.listdir(BASE_METRICS_DIR):
#     if not csv_file.endswith(".csv"):
#         continue

#     csv_path = os.path.join(BASE_METRICS_DIR, csv_file)
#     df = pd.read_csv(csv_path)

#     # Make Model_Strategy for each row
#     model_strategy = csv_file.replace("_metrics.csv", "")
#     df["Model_Strategy"] = model_strategy

#     combined.append(df)

# # Combine all
# big_df = pd.concat(combined, ignore_index=True)

# # Correct path
# output_csv = os.path.join(BASE_METRICS_DIR, "saved_combined_data_round9_round10_1411.csv")

# # Save
# big_df.to_csv(output_csv, index=False)
# print(f"[OK] Combined raw file saved to: {output_csv}")

In [14]:
import os
import pandas as pd
from typing import List

def combine_metrics_csvs(
    BASE_METRICS_DIR: str,
    ROUNDS: List[int],
    OUTPUT_FILENAME: str = "saved_combined_metrics.csv"
) -> None:
    """
    Combines all *_metrics.csv files in the folder, filters by round,
    and saves one combined CSV with Model_Strategy column.

    Args:
        BASE_METRICS_DIR (str): Folder containing metric CSV files.
        ROUNDS (List[int]): List of rounds to include.
        OUTPUT_FILENAME (str): Name of output combined CSV file.
    """
    combined = []

    for csv_file in os.listdir(BASE_METRICS_DIR):
        if not csv_file.endswith("_metrics.csv"):
            continue

        csv_path = os.path.join(BASE_METRICS_DIR, csv_file)
        try:
            df = pd.read_csv(csv_path)
            df = df[df["round"].isin(ROUNDS)]  # Filter by rounds
            df["Model_Strategy"] = csv_file.replace("_metrics.csv", "")
            combined.append(df)
        except Exception as e:
            print(f"[ERROR] Could not read {csv_file}: {e}")

    if combined:
        final_df = pd.concat(combined, ignore_index=True)
        output_csv = os.path.join(BASE_METRICS_DIR, OUTPUT_FILENAME)
        final_df.to_csv(output_csv, index=False)
        print(f"[✔] Combined file saved: {output_csv}")
    else:
        print("[⚠] No metrics files matched or found.")


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
# input_csv willl refer to the combined metrics CSV
def plot_boxplot(input_csv, output_dir, metric_name, round_num, sortby="Model_Strategy"):
    """
    Creates a boxplot for the given metric from the pivot CSV.
    
    Args:
        input_csv (str): Path to pivot CSV file.
        output_dir (str): Directory to save the plot.
        metric_name (str): Column name to plot on Y-axis.
        round_num (int): Round number for file naming.
    """
    os.makedirs(output_dir, exist_ok=True)
    
    # Load the pivot CSV
    df = pd.read_csv(input_csv)
    
    if metric_name not in df.columns:
        raise ValueError(f"Metric '{metric_name}' not found in the CSV columns: {df.columns.tolist()}")
    
    # Sort X-axis by Model_Strategy for clean look
    df = df.sort_values(by=sortby)
    
    # Plot
    plt.figure(figsize=(14, 6))
    plt.boxplot(df[metric_name], vert=True)
    plt.xticks([1], [metric_name])
    plt.title(f"{metric_name} Boxplot for Round {round_num}")
    plt.ylabel(metric_name)
    
    # Save plot
    plot_name = f"{metric_name}_round{round_num}_sorting_strategy{sortby}_pivot_boxplot.png"
    plot_path = os.path.join(output_dir, plot_name)
    plt.savefig(plot_path)
    plt.close()
    
    print(f"[OK] Boxplot saved: {plot_path}")
