In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import numpy as np
from Models import MoELSTM
import os
from collections import OrderedDict
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader

from typing import List, Tuple, Optional, Dict
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
import random
from Models import MoELSTM, LSTMModel, train_model
from Preprocess import (
    compute_metrics,
    convert_timeseries_to_numpy,
    create_dataloader,
    load_building_series,
    split_series_list,
)
from Models import model_fn
from tqdm import tqdm
from my_utils import train_model, load_energy_data_feather, get_weights, set_weights
from energy_ts_diffusion.task import convert_timeseries_to_numpy  # adjust as per your project
from tqdm import tqdm


  from .autonotebook import tqdm as notebook_tqdm


In [18]:
@torch.no_grad()
def rolling_forecast_on_test_hourly(cid, model, filepath="train_final.feather", input_len=24, output_len=8):
    """
    Perform rolling window forecast using a model trained with hour-aware input.
    Returns unscaled predictions and ground truths with actual timestamps.
    """
    print(f"[DEBUG] rolling_forecast_on_test_hourly: CID={cid}")


    # Load and filter data
    df = pd.read_feather(filepath)
    df = df[df['building_id'] == cid]
    df['meter_reading'] = df['meter_reading'].fillna(0)

    if df.empty:
        raise ValueError(f"No data found for building_id {cid}")

    # Extract hour of day and normalize to [0, 1]
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['hour'] = df['timestamp'].dt.hour / 23.0

    # Create TimeSeries with both meter + hour
    ts = TimeSeries.from_dataframe(
        df,
        time_col='timestamp',
        value_cols=['meter_reading', 'hour'],
        fill_missing_dates=True,
        freq='h'
    )

    _, test_series = ts.split_before(0.75)

    # Scale both features
    scaler = MinMaxScaler(feature_range=(0.1, 1))
    transformer = Scaler(scaler)
    test_series_scaled = transformer.fit_transform(test_series)

    test_values_scaled = test_series_scaled.values()  # shape: [T, 2]
    test_timestamps = test_series_scaled.time_index

    predictions_ts_list = []
    ground_truth_ts_list = []

    model.eval()
    device = next(model.parameters()).device

    for i in range(0, len(test_values_scaled) - input_len - output_len + 1):
        input_seq = test_values_scaled[i:i+input_len]             # [24, 2]
        true_output = test_values_scaled[i+input_len:i+input_len+output_len, 0]  # only meter_reading
        true_time = test_timestamps[i+input_len:i+input_len+output_len]

        input_tensor = torch.tensor(input_seq, dtype=torch.float32).unsqueeze(0).to(device)  # [1, 24, 2]

        pred = model(input_tensor)  # [1, 8] or [1, 8, 1]

        if pred.dim() == 3:
            pred = pred.squeeze(0).squeeze(-1)
        else:
            pred = pred.squeeze(0)

        # Wrap predictions and ground truth in TimeSeries
        # Pad pred with dummy hour feature (e.g., zeros) for inverse transform
        dummy_hours = np.zeros_like(pred.cpu().numpy())
        pred_padded = np.stack([pred.cpu().numpy(), dummy_hours], axis=-1)
        true_padded = np.stack([true_output, dummy_hours], axis=-1)

        # Convert to TimeSeries
        pred_ts = TimeSeries.from_times_and_values(true_time, pred_padded)
        true_ts = TimeSeries.from_times_and_values(true_time, true_padded)

        # Inverse transform
        pred_unscaled = transformer.inverse_transform(pred_ts).values()[:, 0]
        true_unscaled = transformer.inverse_transform(true_ts).values()[:, 0]

        # Save as TimeSeries
        pred_ts = TimeSeries.from_times_and_values(true_time, pred_unscaled)
        true_ts = TimeSeries.from_times_and_values(true_time, true_unscaled)


        predictions_ts_list.append(pred_ts)
        ground_truth_ts_list.append(true_ts)

    return predictions_ts_list, ground_truth_ts_list


In [19]:
def get_model_predictions_csv(model_name: str, cid: int, aggr_strat: str, rounds: list, model_dir: str, output_csv: str):
    """
    For each round, load the model, predict on test set for cid, and save all preds in a single CSV.
    Uses the hour-aware rolling forecast.
    """
    rows = []

    for rnd in tqdm(rounds):
        model_path = os.path.join(model_dir, f"{model_name}_round_{rnd}_{aggr_strat}.pt")

        if not os.path.exists(model_path):
            print(f"[WARN] Model not found: {model_path}")
            continue

        model = model_fn(model_name)
        state_dict = torch.load(model_path, weights_only=True)
        model.load_state_dict(state_dict)
        model = model.to('cuda')
        model.eval()

        # 🔁 Use hour-aware rolling forecast
        pred_ts_list, gt_ts_list = rolling_forecast_on_test_hourly(cid=cid, model=model)

        for pred_ts, true_ts in zip(pred_ts_list, gt_ts_list):
            df_pred = pd.DataFrame({"timestamp": pred_ts.time_index, "pred": pred_ts.values().squeeze()})
            df_true = pd.DataFrame({"timestamp": true_ts.time_index, "true": true_ts.values().squeeze()})

            df_merged = pd.merge(df_true, df_pred, on="timestamp", how="inner")
            df_merged["round"] = rnd

            rows.append(df_merged[["timestamp", "true", "pred", "round"]])

    if rows:
        final_df = pd.concat(rows, ignore_index=True)
        final_df.to_csv(output_csv, index=False)
        print(f"[INFO] Forecasts written to {output_csv}")
    else:
        print("[WARN] No predictions made — check if models exist or round list is valid.")


In [20]:
import os
import pandas as pd

def model_strategy_csv(Base_dir, Target_dir, round_num, METRIC, sortBy="Model_Strategy"):
    """
    Computes boxplot statistics for each model_strategy CSV
    and saves the pivot table in the target directory.

    The metric column is dynamically named with METRIC.
    """
    os.makedirs(Target_dir, exist_ok=True)

    all_rows = []

    for csv_file in os.listdir(Base_dir):
        if not csv_file.endswith(".csv"):
            continue

        csv_path = os.path.join(Base_dir, csv_file)
        df = pd.read_csv(csv_path)

        # Filter for the specified round
        df_round = df[df["round"] == round_num]

        if df_round.empty:
            print(f"[SKIP] No round {round_num} in: {csv_file}")
            continue

        metric_values = df_round[METRIC].dropna()

        # Compute stats
        count = metric_values.count()
        min_val = metric_values.min()
        q1 = metric_values.quantile(0.25)
        median = metric_values.median()
        q3 = metric_values.quantile(0.75)
        max_val = metric_values.max()
        iqr = q3 - q1

        # Whiskers
        lower_whisker = q1 - 1.5 * iqr
        upper_whisker = q3 + 1.5 * iqr

        # Non-outlier min/max
        non_outliers = metric_values[(metric_values >= lower_whisker) & (metric_values <= upper_whisker)]
        min_non_outlier = non_outliers.min()
        max_non_outlier = non_outliers.max()

        # Extract model_strategy and strategy
        model_strategy = csv_file.replace("_metrics.csv", "")
        try:
            model, strategy = model_strategy.split("_", 1)
        except ValueError:
            strategy = "unknown"

        # ✅ The metric name is now a column with the median value.
        row = {
            "Model_Strategy": model_strategy,
            # "Strategy": strategy,
            "Round": round_num,
            "METRIC": METRIC,  # dynamic metric column with value
            "Count": count,
            "Min": min_val,
            "Q1": q1,
            "Median": median,
            "Q3": q3,
            "Max": max_val,
            "IQR": iqr,
            # "Lower Whisker": lower_whisker,
            # "Upper Whisker": upper_whisker,
            "True Min (Non-outlier)": min_non_outlier,
            "True Max (Non-outlier)": max_non_outlier
        }

        all_rows.append(row)

    # Combine to DataFrame
    pivot_df = pd.DataFrame(all_rows)
    pivot_df.sort_values(by=sortBy, inplace=True)

    # Clean output name
    safe_metric = METRIC.replace(" ", "_").replace("%", "percent")
    pivot_output = os.path.join(
        Target_dir,
        f"all_stats_round{round_num}_metric_{safe_metric}_sort_by_{sortBy}.csv"
    )

    pivot_df.to_csv(pivot_output, index=False)

    print(f"[OK] One-row-per-model_strategy pivot saved: {pivot_output}")


In [21]:
# evaluate forecasts - working correctly 1411 buildings count 
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

def smape(y_true, y_pred):
    """Symmetric Mean Absolute Percentage Error."""
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2.0
    return np.mean(np.where(denominator == 0, 0, np.abs(y_true - y_pred) / denominator)) * 100

def mape(y_true, y_pred):
    """Mean Absolute Percentage Error."""
    y_true = np.where(y_true == 0, 1e-8, y_true)  # avoid division by zero
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def evaluate_forecast_metrics_per_round(csv_path):
    """
    Reads forecast CSV and computes MAPE, MAE, SMAPE, RMSE, and MSE per round.

    Args:
        csv_path (str): Path to the CSV with columns: timestamp, true, pred, round

    Returns:
        pd.DataFrame: Metrics summary per round
    """
    df = pd.read_csv(csv_path)
    if df.empty:
        raise ValueError("CSV is empty or invalid")

    metrics_list = []

    for rnd in sorted(df['round'].unique()):

        df_rnd = df[df['round'] == rnd]
        df_rnd = df_rnd.fillna(0.005)
        y_true = df_rnd["true"].values
        y_pred = df_rnd["pred"].values

        mae = mean_absolute_error(y_true, y_pred)
        mse = mean_squared_error(y_true, y_pred)
        rmse = np.sqrt(mse)
        mape_val = mape(y_true, y_pred)
        smape_val = smape(y_true, y_pred)

        metrics_list.append({
            "round": rnd,
            "MAE": mae,
            "MSE": mse,
            "RMSE": rmse,
            "MAPE (%)": mape_val,
            "SMAPE (%)": smape_val
        })

    metrics_df = pd.DataFrame(metrics_list)
    return metrics_df


In [22]:
# combine all the model_strategy_csv of round 10 ,9

In [23]:
# try in function
def get_model_metric_grouped_by_model_strategy(
    MODELS: List[str],
    STRATEGIES: List[str],
    ROUNDS: List[int],
    BASE_PREDICTIONS_DIR: str,
    BASE_METRICS_DIR: str,
    METRIC: str = "MAE"
) -> None:
    """
    Computes metrics for each model and strategy, grouped by client ID and round.
    
    Args:
        MODELS (List[str]): List of model names.
        STRATEGIES (List[str]): List of strategy names.
        ROUNDS (List[int]): List of round numbers.
        BASE_PREDICTIONS_DIR (str): Directory containing prediction CSVs.
        BASE_METRICS_DIR (str): Directory to save metrics CSVs.
        METRIC (str): The metric to compute, e.g., "MAE", "MSE", etc.
    """
    os.makedirs(BASE_METRICS_DIR, exist_ok=True)

    for CID in tqdm(range(1411), desc="Processing Client IDs"):
        for model_name in MODELS:
            for strategy in STRATEGIES:
                input_csv = os.path.join(BASE_PREDICTIONS_DIR, f"{CID}_{model_name}_{strategy}.csv")
                output_csv = os.path.join(BASE_METRICS_DIR, f"{model_name}_{strategy}_metrics.csv")

                if not os.path.isfile(input_csv):
                    print(f"[SKIP] Missing: {input_csv}")
                    continue

                try:
                    # Compute metrics for the given CSV
                    metrics_df = evaluate_forecast_metrics_per_round(input_csv)
                    metrics_df.insert(0, "building_id", CID)  # add client ID
                    metrics_df.insert(1, "model", model_name)
                    metrics_df.insert(2, "strategy", strategy)

                    if os.path.isfile(output_csv):
                        metrics_df.to_csv(output_csv, index=False)
                    else:
                        metrics_df.to_csv(output_csv, index=False)

                    print(f"[OK] {output_csv} <- CID {CID}")

                except Exception as e:
                    print(f"[ERROR] CID={CID} | {model_name}{strategy} | {e}")

## Predicitons metric both (above code in function)

In [24]:
import os

def get_model_predictions_metric(
    MODELS,
    STRATEGIES,
    ROUNDS,
    BASE_RESULTS_DIR: str,
    BASE_OUTPUT_DIR: str,
    METRICS_DIR: str,
    CID: range
):
    """
    For each client in CID, model in MODELS, and strategy in STRATEGIES,
    generates forecast predictions and computes metrics.

    This version supports both regular and hour-aware models.
    """
    os.makedirs(BASE_OUTPUT_DIR, exist_ok=True)
    os.makedirs(METRICS_DIR, exist_ok=True)

    for cid in CID:
        print(f"\nProcessing Client ID: {cid}")

        for model_name in MODELS:
            for strategy in STRATEGIES:
                model_dir = os.path.join(BASE_RESULTS_DIR, model_name)
                output_csv = os.path.join(BASE_OUTPUT_DIR, f"{cid}_{model_name}_{strategy}.csv")
                metrics_csv = os.path.join(METRICS_DIR, f"cid{cid}_{model_name}_{strategy}_metrics.csv")

                print(f"\n Model: {model_name}, Strategy: {strategy}")

                try:
                    # Choose prediction function based on whether model uses hour input
                    if "hour" in model_name.lower():
                        get_model_predictions_csv(
                            model_name=model_name,
                            cid=cid,
                            rounds=ROUNDS,
                            model_dir=model_dir,
                            output_csv=output_csv,
                            aggr_strat=strategy
                        )
                    else:
                        get_model_predictions_csv(
                            model_name=model_name,
                            cid=cid,
                            rounds=ROUNDS,
                            model_dir=model_dir,
                            output_csv=output_csv,
                            aggr_strat=strategy
                        )

                    # Compute metrics
                    metrics_df = evaluate_forecast_metrics_per_round(output_csv)
                    metrics_df.to_csv(metrics_csv, index=False)
                    print(f"Metrics saved to {metrics_csv}")

                except Exception as e:
                    print(f"[ERROR] model={model_name}, strategy={strategy}, cid={cid}: {e}")


In [27]:
STRATEGIES = [ "fedAvg_hr",] #"scaffold_lr", "diff_lr2","das11","das2",
MODELS = [ "simple_ann_hour"] #,"gru"
# CID = 45
ROUNDS = list(range(1, 50))
BASE_RESULTS_DIR = "results"
BASE_OUTPUT_DIR = "predictions1411"
METRICS_DIR = "metrics1411"
CID = range(70,72) 

get_model_predictions_metric(
    MODELS=MODELS,
    STRATEGIES=STRATEGIES,
    ROUNDS=ROUNDS,
    BASE_RESULTS_DIR=BASE_RESULTS_DIR,
    BASE_OUTPUT_DIR=BASE_OUTPUT_DIR,
    METRICS_DIR=METRICS_DIR,
    CID=CID
)


Processing Client ID: 70

 Model: simple_ann_hour, Strategy: fedAvg_hr


  0%|          | 0/49 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


  2%|▏         | 1/49 [00:01<01:35,  2.00s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


  4%|▍         | 2/49 [00:03<01:31,  1.95s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


  6%|▌         | 3/49 [00:05<01:26,  1.87s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


  8%|▊         | 4/49 [00:07<01:25,  1.91s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 10%|█         | 5/49 [00:09<01:21,  1.86s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 12%|█▏        | 6/49 [00:11<01:22,  1.91s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 14%|█▍        | 7/49 [00:13<01:18,  1.88s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 16%|█▋        | 8/49 [00:15<01:17,  1.88s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 18%|█▊        | 9/49 [00:17<01:16,  1.92s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 20%|██        | 10/49 [00:18<01:13,  1.88s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 22%|██▏       | 11/49 [00:20<01:12,  1.92s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 24%|██▍       | 12/49 [00:22<01:10,  1.90s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 27%|██▋       | 13/49 [00:24<01:07,  1.87s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 29%|██▊       | 14/49 [00:26<01:04,  1.85s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 31%|███       | 15/49 [00:28<01:05,  1.93s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 33%|███▎      | 16/49 [00:30<01:02,  1.89s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 35%|███▍      | 17/49 [00:32<00:59,  1.87s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 37%|███▋      | 18/49 [00:34<01:00,  1.95s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 39%|███▉      | 19/49 [00:36<00:57,  1.90s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 41%|████      | 20/49 [00:37<00:54,  1.87s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 43%|████▎     | 21/49 [00:39<00:51,  1.84s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 45%|████▍     | 22/49 [00:41<00:52,  1.94s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 47%|████▋     | 23/49 [00:43<00:49,  1.91s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 49%|████▉     | 24/49 [00:45<00:47,  1.90s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 51%|█████     | 25/49 [00:47<00:44,  1.86s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 53%|█████▎    | 26/49 [00:49<00:42,  1.85s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 55%|█████▌    | 27/49 [00:51<00:43,  1.96s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 57%|█████▋    | 28/49 [00:53<00:40,  1.92s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 59%|█████▉    | 29/49 [00:54<00:37,  1.89s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 61%|██████    | 30/49 [00:56<00:35,  1.86s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 63%|██████▎   | 31/49 [00:58<00:33,  1.85s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 65%|██████▌   | 32/49 [01:00<00:33,  1.97s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 67%|██████▋   | 33/49 [01:02<00:30,  1.92s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 69%|██████▉   | 34/49 [01:04<00:28,  1.88s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 71%|███████▏  | 35/49 [01:06<00:25,  1.85s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 73%|███████▎  | 36/49 [01:08<00:23,  1.84s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 76%|███████▌  | 37/49 [01:09<00:22,  1.84s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 78%|███████▊  | 38/49 [01:12<00:21,  1.98s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 80%|███████▉  | 39/49 [01:13<00:19,  1.92s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 82%|████████▏ | 40/49 [01:15<00:17,  1.89s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 84%|████████▎ | 41/49 [01:17<00:14,  1.85s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 86%|████████▌ | 42/49 [01:19<00:12,  1.85s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 88%|████████▊ | 43/49 [01:21<00:11,  1.84s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 90%|████████▉ | 44/49 [01:22<00:09,  1.83s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 92%|█████████▏| 45/49 [01:25<00:07,  1.98s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 94%|█████████▍| 46/49 [01:27<00:05,  1.92s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 96%|█████████▌| 47/49 [01:28<00:03,  1.88s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


 98%|█████████▊| 48/49 [01:30<00:01,  1.85s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=70


100%|██████████| 49/49 [01:32<00:00,  1.89s/it]


[INFO] Forecasts written to predictions1411/70_simple_ann_hour_fedAvg_hr.csv
Metrics saved to metrics1411/cid70_simple_ann_hour_fedAvg_hr_metrics.csv

Processing Client ID: 71

 Model: simple_ann_hour, Strategy: fedAvg_hr


  0%|          | 0/49 [00:00<?, ?it/s]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


  2%|▏         | 1/49 [00:01<01:28,  1.84s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


  4%|▍         | 2/49 [00:03<01:25,  1.81s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


  6%|▌         | 3/49 [00:05<01:22,  1.80s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


  8%|▊         | 4/49 [00:07<01:21,  1.81s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 10%|█         | 5/49 [00:09<01:19,  1.82s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 12%|█▏        | 6/49 [00:11<01:20,  1.88s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 14%|█▍        | 7/49 [00:12<01:18,  1.86s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 16%|█▋        | 8/49 [00:14<01:18,  1.91s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 18%|█▊        | 9/49 [00:16<01:16,  1.91s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 20%|██        | 10/49 [00:18<01:12,  1.87s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 22%|██▏       | 11/49 [00:20<01:14,  1.95s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 24%|██▍       | 12/49 [00:22<01:10,  1.91s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 27%|██▋       | 13/49 [00:24<01:07,  1.88s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 29%|██▊       | 14/49 [00:26<01:07,  1.94s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 31%|███       | 15/49 [00:28<01:04,  1.91s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 33%|███▎      | 16/49 [00:30<01:01,  1.87s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 35%|███▍      | 17/49 [00:32<01:01,  1.93s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 37%|███▋      | 18/49 [00:33<00:58,  1.89s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 39%|███▉      | 19/49 [00:35<00:55,  1.86s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 41%|████      | 20/49 [00:37<00:53,  1.83s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 43%|████▎     | 21/49 [00:39<00:50,  1.81s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 45%|████▍     | 22/49 [00:41<00:51,  1.91s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 47%|████▋     | 23/49 [00:43<00:48,  1.88s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 49%|████▉     | 24/49 [00:44<00:46,  1.85s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 51%|█████     | 25/49 [00:46<00:44,  1.84s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 53%|█████▎    | 26/49 [00:49<00:45,  1.96s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 55%|█████▌    | 27/49 [00:50<00:42,  1.92s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 57%|█████▋    | 28/49 [00:52<00:39,  1.88s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 59%|█████▉    | 29/49 [00:54<00:37,  1.87s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 61%|██████    | 30/49 [00:56<00:35,  1.85s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 63%|██████▎   | 31/49 [00:58<00:35,  1.96s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 65%|██████▌   | 32/49 [01:00<00:32,  1.90s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 67%|██████▋   | 33/49 [01:02<00:29,  1.87s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 69%|██████▉   | 34/49 [01:03<00:27,  1.85s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 71%|███████▏  | 35/49 [01:05<00:25,  1.85s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 73%|███████▎  | 36/49 [01:07<00:23,  1.82s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 76%|███████▌  | 37/49 [01:09<00:23,  1.96s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 78%|███████▊  | 38/49 [01:11<00:21,  1.91s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 80%|███████▉  | 39/49 [01:13<00:18,  1.88s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 82%|████████▏ | 40/49 [01:15<00:16,  1.84s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 84%|████████▎ | 41/49 [01:16<00:14,  1.82s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 86%|████████▌ | 42/49 [01:18<00:12,  1.83s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 88%|████████▊ | 43/49 [01:20<00:10,  1.81s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 90%|████████▉ | 44/49 [01:22<00:09,  1.97s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 92%|█████████▏| 45/49 [01:24<00:07,  1.93s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 94%|█████████▍| 46/49 [01:26<00:05,  1.89s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 96%|█████████▌| 47/49 [01:28<00:03,  1.87s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


 98%|█████████▊| 48/49 [01:30<00:01,  1.85s/it]

[DEBUG] rolling_forecast_on_test_hourly: CID=71


100%|██████████| 49/49 [01:31<00:00,  1.88s/it]


[INFO] Forecasts written to predictions1411/71_simple_ann_hour_fedAvg_hr.csv
Metrics saved to metrics1411/cid71_simple_ann_hour_fedAvg_hr_metrics.csv


In [None]:
# # try in function
# def get_model_metric_grouped_by_model_strategy(
#     MODELS: List[str],
#     STRATEGIES: List[str],
#     ROUNDS: List[int],
#     BASE_PREDICTIONS_DIR: str,
#     BASE_METRICS_DIR: str,
#     METRIC: str = "MAE"
# ) -> None:
#     """
#     Computes metrics for each model and strategy, grouped by client ID and round.
    
#     Args:
#         MODELS (List[str]): List of model names.
#         STRATEGIES (List[str]): List of strategy names.
#         ROUNDS (List[int]): List of round numbers.
#         BASE_PREDICTIONS_DIR (str): Directory containing prediction CSVs.
#         BASE_METRICS_DIR (str): Directory to save metrics CSVs.
#         METRIC (str): The metric to compute, e.g., "MAE", "MSE", etc.
#     """
#     os.makedirs(BASE_METRICS_DIR, exist_ok=True)

#     for CID in tqdm(range(1411), desc="Processing Client IDs"):
#         for model_name in MODELS:
#             for strategy in STRATEGIES:
#                 input_csv = os.path.join(BASE_PREDICTIONS_DIR, f"{CID}_{model_name}_{strategy}.csv")
#                 output_csv = os.path.join(BASE_METRICS_DIR, f"{model_name}_{strategy}_metrics.csv")

#                 if not os.path.isfile(input_csv):
#                     print(f"[SKIP] Missing: {input_csv}")
#                     continue

#                 try:
#                     # Compute metrics for the given CSV
#                     metrics_df = evaluate_forecast_metrics_per_round(input_csv)
#                     metrics_df.insert(0, "building_id", CID)  # add client ID
#                     metrics_df.insert(1, "model", model_name)
#                     metrics_df.insert(2, "strategy", strategy)

#                     if os.path.isfile(output_csv):
#                         metrics_df.to_csv(output_csv, index=False)
#                     else:
#                         metrics_df.to_csv(output_csv, index=False)

#                     print(f"[OK] {output_csv} <- CID {CID}")

#                 except Exception as e:
#                     print(f"[ERROR] CID={CID} | {model_name}{strategy} | {e}")

In [None]:
import os
import pandas as pd
from typing import List
from tqdm import tqdm

def get_model_metric_grouped_by_model_strategy(
    MODELS: List[str],
    STRATEGIES: List[str],
    ROUNDS: List[int],
    BASE_PREDICTIONS_DIR: str,
    BASE_METRICS_DIR: str,
    METRIC: str = "MAE"
) -> None:
    """
    Computes metrics for each model and strategy, grouped by client ID and round.
    Overwrites existing CSVs with combined results for all building IDs.
    """
    os.makedirs(BASE_METRICS_DIR, exist_ok=True)

    for model_name in MODELS:
        for strategy in STRATEGIES:
            all_metrics = []  # Accumulate metrics per model-strategy
            print(f"\n📊 Collecting metrics: {model_name} | {strategy}")

            for CID in tqdm(range(1411), desc=f"{model_name}_{strategy}"):
                input_csv = os.path.join(BASE_PREDICTIONS_DIR, f"{CID}_{model_name}_{strategy}.csv")

                if not os.path.isfile(input_csv):
                    print(f"[SKIP] Missing: {input_csv}")
                    continue

                try:
                    metrics_df = evaluate_forecast_metrics_per_round(input_csv)
                    metrics_df.insert(0, "building_id", CID)
                    metrics_df.insert(1, "model", model_name)
                    metrics_df.insert(2, "strategy", strategy)
                    all_metrics.append(metrics_df)

                except Exception as e:
                    print(f"[ERROR] CID={CID} | {model_name}{strategy} | {e}")

            # Save once after collecting all building metrics
            if all_metrics:
                final_df = pd.concat(all_metrics, ignore_index=True)
                output_csv = os.path.join(BASE_METRICS_DIR, f"{model_name}_{strategy}_metrics.csv")
                final_df.to_csv(output_csv, index=False)  # ✅ Full overwrite
                print(f" Written to: {output_csv}")


In [None]:
STRATEGIES = ["scaffold", "diff"]
MODELS = ["gru", "lstm"]
# CID = 45
ROUNDS = list(range(9, 11))
BASE_DIR = "predictions1411"
BASE_OUTPUT_DIR = "predictions1411"
METRICS_DIR = "metrics1411_grouped"
# using evaluate_forecast_metrics_per_round

get_model_metric_grouped_by_model_strategy(MODELS, 
                                           STRATEGIES, 
                                           ROUNDS, 
                                           BASE_PREDICTIONS_DIR=BASE_OUTPUT_DIR, 
                                           BASE_METRICS_DIR=METRICS_DIR,
                                           METRIC="MAE")


📊 Collecting metrics: gru | scaffold


gru_scaffold: 100%|██████████| 5/5 [00:00<00:00, 150.11it/s]


[✔] Written to: metrics1411_grouped/gru_scaffold_metrics.csv

📊 Collecting metrics: gru | diff


gru_diff: 100%|██████████| 5/5 [00:00<00:00, 163.35it/s]


[✔] Written to: metrics1411_grouped/gru_diff_metrics.csv

📊 Collecting metrics: lstm | scaffold


lstm_scaffold: 100%|██████████| 5/5 [00:00<00:00, 163.43it/s]


[✔] Written to: metrics1411_grouped/lstm_scaffold_metrics.csv

📊 Collecting metrics: lstm | diff


lstm_diff: 100%|██████████| 5/5 [00:00<00:00, 168.14it/s]

[✔] Written to: metrics1411_grouped/lstm_diff_metrics.csv





# Now compute tabular statistics

In [13]:
import os
import pandas as pd

def model_strategy_csv(Base_dir, Target_dir, round_num, METRIC, sortBy="Model_Strategy"):
    """
    Computes boxplot statistics for each model_strategy CSV
    and saves the pivot table in the target directory.

    The metric column is dynamically named with METRIC.
    """
    os.makedirs(Target_dir, exist_ok=True)

    all_rows = []

    for csv_file in os.listdir(Base_dir):
        if not csv_file.endswith(".csv"):
            continue

        csv_path = os.path.join(Base_dir, csv_file)
        df = pd.read_csv(csv_path)

        # Filter for the specified round
        df_round = df[df["round"] == round_num]

        if df_round.empty:
            print(f"[SKIP] No round {round_num} in: {csv_file}")
            continue

        metric_values = df_round[METRIC].dropna()

        # Compute stats
        count = metric_values.count()
        min_val = metric_values.min()
        q1 = metric_values.quantile(0.25)
        median = metric_values.median()
        q3 = metric_values.quantile(0.75)
        max_val = metric_values.max()
        iqr = q3 - q1

        # Whiskers
        lower_whisker = q1 - 1.5 * iqr
        upper_whisker = q3 + 1.5 * iqr

        # Non-outlier min/max
        non_outliers = metric_values[(metric_values >= lower_whisker) & (metric_values <= upper_whisker)]
        min_non_outlier = non_outliers.min()
        max_non_outlier = non_outliers.max()

        # Extract model_strategy and strategy
        model_strategy = csv_file.replace("_metrics.csv", "")
        try:
            model, strategy = model_strategy.split("_", 1)
        except ValueError:
            strategy = "unknown"

        # ✅ The metric name is now a column with the median value.
        row = {
            "Model_Strategy": model_strategy,
            # "Strategy": strategy,
            "Round": round_num,
            "METRIC": METRIC,  # dynamic metric column with value
            "Count": count,
            "Min": min_val,
            "Q1": q1,
            "Median": median,
            "Q3": q3,
            "Max": max_val,
            "IQR": iqr,
            # "Lower Whisker": lower_whisker,
            # "Upper Whisker": upper_whisker,
            "True Min (Non-outlier)": min_non_outlier,
            "True Max (Non-outlier)": max_non_outlier
        }

        all_rows.append(row)

    # Combine to DataFrame
    pivot_df = pd.DataFrame(all_rows)
    pivot_df.sort_values(by=sortBy, inplace=True)

    # Clean output name
    safe_metric = METRIC.replace(" ", "_").replace("%", "percent")
    pivot_output = os.path.join(
        Target_dir,
        f"all_stats_round{round_num}_metric_{safe_metric}_sort_by_{sortBy}.csv"
    )

    pivot_df.to_csv(pivot_output, index=False)

    print(f"[OK] One-row-per-model_strategy pivot saved: {pivot_output}")


In [None]:
# # Combine all metrics CSVs into a single file that contains statistics for each model and strategy.

for box plot combine all model stratetgy csv files

In [None]:
# # combine data 
# import os

# # Folder
# BASE_METRICS_DIR = "forecast_metrics_new_1411"
# combined = []

# for csv_file in os.listdir(BASE_METRICS_DIR):
#     if not csv_file.endswith(".csv"):
#         continue

#     csv_path = os.path.join(BASE_METRICS_DIR, csv_file)
#     df = pd.read_csv(csv_path)

#     # Make Model_Strategy for each row
#     model_strategy = csv_file.replace("_metrics.csv", "")
#     df["Model_Strategy"] = model_strategy

#     combined.append(df)

# # Combine all
# big_df = pd.concat(combined, ignore_index=True)

# # Correct path
# output_csv = os.path.join(BASE_METRICS_DIR, "saved_combined_data_round9_round10_1411.csv")

# # Save
# big_df.to_csv(output_csv, index=False)
# print(f"[OK] Combined raw file saved to: {output_csv}")

In [14]:
import os
import pandas as pd
from typing import List

def combine_metrics_csvs(
    BASE_METRICS_DIR: str,
    ROUNDS: List[int],
    OUTPUT_FILENAME: str = "saved_combined_metrics.csv"
) -> None:
    """
    Combines all *_metrics.csv files in the folder, filters by round,
    and saves one combined CSV with Model_Strategy column.

    Args:
        BASE_METRICS_DIR (str): Folder containing metric CSV files.
        ROUNDS (List[int]): List of rounds to include.
        OUTPUT_FILENAME (str): Name of output combined CSV file.
    """
    combined = []

    for csv_file in os.listdir(BASE_METRICS_DIR):
        if not csv_file.endswith("_metrics.csv"):
            continue

        csv_path = os.path.join(BASE_METRICS_DIR, csv_file)
        try:
            df = pd.read_csv(csv_path)
            df = df[df["round"].isin(ROUNDS)]  # Filter by rounds
            df["Model_Strategy"] = csv_file.replace("_metrics.csv", "")
            combined.append(df)
        except Exception as e:
            print(f"[ERROR] Could not read {csv_file}: {e}")

    if combined:
        final_df = pd.concat(combined, ignore_index=True)
        output_csv = os.path.join(BASE_METRICS_DIR, OUTPUT_FILENAME)
        final_df.to_csv(output_csv, index=False)
        print(f"[✔] Combined file saved: {output_csv}")
    else:
        print("[⚠] No metrics files matched or found.")


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
# input_csv willl refer to the combined metrics CSV
def plot_boxplot(input_csv, output_dir, metric_name, round_num, sortby="Model_Strategy"):
    """
    Creates a boxplot for the given metric from the pivot CSV.
    
    Args:
        input_csv (str): Path to pivot CSV file.
        output_dir (str): Directory to save the plot.
        metric_name (str): Column name to plot on Y-axis.
        round_num (int): Round number for file naming.
    """
    os.makedirs(output_dir, exist_ok=True)
    
    # Load the pivot CSV
    df = pd.read_csv(input_csv)
    
    if metric_name not in df.columns:
        raise ValueError(f"Metric '{metric_name}' not found in the CSV columns: {df.columns.tolist()}")
    
    # Sort X-axis by Model_Strategy for clean look
    df = df.sort_values(by=sortby)
    
    # Plot
    plt.figure(figsize=(14, 6))
    plt.boxplot(df[metric_name], vert=True)
    plt.xticks([1], [metric_name])
    plt.title(f"{metric_name} Boxplot for Round {round_num}")
    plt.ylabel(metric_name)
    
    # Save plot
    plot_name = f"{metric_name}_round{round_num}_sorting_strategy{sortby}_pivot_boxplot.png"
    plot_path = os.path.join(output_dir, plot_name)
    plt.savefig(plot_path)
    plt.close()
    
    print(f"[OK] Boxplot saved: {plot_path}")
