In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import numpy as np
from Models import MoELSTM
import os
from collections import OrderedDict
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader

from typing import List, Tuple, Optional, Dict
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
import random
from Models import MoELSTM, LSTMModel, train_model
from Preprocess import (
    compute_metrics,
    convert_timeseries_to_numpy,
    create_dataloader,
    load_building_series,
    split_series_list,
)
from Models import model_fn
from tqdm import tqdm
from my_utils import train_model, load_energy_data_feather, get_weights, set_weights
# from energy_ts_diffusion.task import convert_timeseries_to_numpy  # adjust as per your project
from tqdm import tqdm


In [2]:
# evaluate forecasts - working correctly 1411 buildings count 
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

def smape(y_true, y_pred):
    """Symmetric Mean Absolute Percentage Error."""
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2.0
    return np.mean(np.where(denominator == 0, 0, np.abs(y_true - y_pred) / denominator)) * 100

def mape(y_true, y_pred):
    """Mean Absolute Percentage Error."""
    y_true = np.where(y_true == 0, 1e-8, y_true)  # avoid division by zero
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def evaluate_forecast_metrics_per_round(csv_path):
    """
    Reads forecast CSV and computes MAPE, MAE, SMAPE, RMSE, and MSE per round.

    Args:
        csv_path (str): Path to the CSV with columns: timestamp, true, pred, round

    Returns:
        pd.DataFrame: Metrics summary per round
    """
    df = pd.read_csv(csv_path)
    if df.empty:
        raise ValueError("CSV is empty or invalid")

    metrics_list = []

    for rnd in sorted(df['round'].unique()):

        df_rnd = df[df['round'] == rnd]
        df_rnd = df_rnd.fillna(0.005)
        y_true = df_rnd["true"].values
        y_pred = df_rnd["pred"].values

        mae = mean_absolute_error(y_true, y_pred)
        mse = mean_squared_error(y_true, y_pred)
        rmse = np.sqrt(mse)
        mape_val = mape(y_true, y_pred)
        smape_val = smape(y_true, y_pred)

        metrics_list.append({
            "round": rnd,
            "MAE": mae,
            "MSE": mse,
            "RMSE": rmse,
            "MAPE (%)": mape_val,
            "SMAPE (%)": smape_val
        })

    metrics_df = pd.DataFrame(metrics_list)
    return metrics_df


In [37]:

def compute_all_forecast_metrics(
    models,
    strategies,
    rounds,
    base_predictions_dir,
    base_metrics_dir,
    client_range=range(1411)
):
    """
    Computes forecast evaluation metrics (MAE, MSE, RMSE, MAPE, SMAPE) for all client IDs
    across combinations of model and strategy for specified rounds.

    Saves the results in separate CSVs per model_strategy in `base_metrics_dir`.

    Args:
        models (List[str]): List of model names (e.g. ['gru', 'lstm']).
        strategies (List[str]): List of strategy suffixes (e.g. ['_fedAvg', '_scaffold']).
        rounds (List[int]): List of training rounds (e.g. [9, 10]).
        base_predictions_dir (str): Directory where `{cid}_{model}_{strategy}.csv` forecast files are stored.
        base_metrics_dir (str): Output directory where metrics will be saved.
        client_range (range): Range of client IDs (default: range(1411))
    """
    os.makedirs(base_metrics_dir, exist_ok=True)

    for cid in tqdm(client_range, desc="Processing Client IDs"):
        for model_name in models:
            for strategy in strategies:
                input_csv = os.path.join(base_predictions_dir, f"{cid}_{model_name}_{strategy}.csv")
                output_metrics_csv = os.path.join(base_metrics_dir, f"{model_name}_{strategy}_metrics.csv")

                if not os.path.isfile(input_csv):
                    print(f"[SKIP] Missing: {input_csv}")
                    continue

                try:
                    # Compute metrics per round
                    metrics_df = evaluate_forecast_metrics_per_round(input_csv)

                    # Annotate with metadata
                    metrics_df.insert(0, "building_id", cid)
                    metrics_df.insert(1, "model", model_name)
                    metrics_df.insert(2, "strategy", strategy)

                    # Append to CSV
                    if os.path.isfile(output_metrics_csv):
                        metrics_df.to_csv(output_metrics_csv, mode='a', header=False, index=False)
                    else:
                        metrics_df.to_csv(output_metrics_csv, index=False)

                    print(f"[OK] Saved metrics for CID={cid} → {output_metrics_csv}")

                except Exception as e:
                    print(f"[ERROR] CID={cid} | model={model_name} | strategy={strategy} | {e}")


## Predicitons metric both (above code in function)

In [38]:
import os
import pandas as pd
from typing import List, Union

def combine_metrics_by_model_strategy(metrics_dir: str, output_dir: str, round: Union[List[int], tuple, None] = None):
    """
    For each round, combines all cid_model_strategy_metrics.csv files into
    separate model_strategy_metrics_round<r>.csv files.

    Args:
        metrics_dir (str): Directory with per-client metrics CSVs.
        output_dir (str): Directory to store combined round-wise metrics CSVs.
        round (List[int] | tuple, optional): Rounds to include (e.g., (9, 11)). If None, process all rounds.
    """
    os.makedirs(output_dir, exist_ok=True)
    combined = {}

    for fname in os.listdir(metrics_dir):
        if not fname.endswith("_metrics.csv"):
            continue

        parts = fname.replace(".csv", "").split("_")
        if len(parts) < 4:
            print(f"[WARN] Unexpected filename format: {fname}")
            continue

        cid = parts[0].replace("cid", "")
        model = parts[1]
        strategy = "_".join(parts[2:-1])
        key = f"{model}_{strategy}"

        df = pd.read_csv(os.path.join(metrics_dir, fname))
        df.insert(0, "cid", int(cid))
        df["Model_Strategy"] = key

        if key not in combined:
            combined[key] = []

        combined[key].append(df)

    for key, dfs in combined.items():
        full_df = pd.concat(dfs, ignore_index=True)
        full_df = full_df.sort_values(by=["cid", "round"])

        rounds_to_process = sorted(full_df["round"].unique()) if round is None else list(range(round[0], round[1] + 1))

        for r in rounds_to_process:
            df_r = full_df[full_df["round"] == r]
            if df_r.empty:
                print(f"[WARN] No data for {key} in round {r}")
                continue

            output_path = os.path.join(output_dir, f"{key}_metrics_round{r}.csv")
            df_r.to_csv(output_path, index=False)
            print(f"[INFO] Saved: {output_path}")


In [40]:
import os
import pandas as pd

def summarize_all_model_strategy_metrics(input_dir: str, output_csv: str, metric: str = "SMAPE (%)"):
    """
    Reads all *_metrics.csv files in the directory and computes mean metrics
    for each model-strategy file. Sorts by the given metric and saves summary CSV.

    Args:
        input_dir (str): Folder with *_metrics.csv files (e.g., forecast_12/)
        output_csv (str): Output CSV file path. Metric name will be appended if not already.
        metric (str): Metric to sort the summary by (e.g., "SMAPE (%)")
    """
    summary_rows = []

    for fname in sorted(os.listdir(input_dir)):
        if not fname.endswith("_round50.csv"):
            continue

        fpath = os.path.join(input_dir, fname)
        df = pd.read_csv(fpath)

        metrics = {
            "Model_Strategy": df["Model_Strategy"].iloc[0],
            "MAE": df["MAE"].mean(),
            "MSE": df["MSE"].mean(),
            "RMSE": df["RMSE"].mean(),
            "MAPE (%)": df["MAPE (%)"].mean(),
            "SMAPE (%)": df["SMAPE (%)"].mean()
        }
        summary_rows.append(metrics)

    summary_df = pd.DataFrame(summary_rows)
    print(summary_df.head())
    # Sort by requested metric
    summary_df = summary_df.sort_values(by=metric)
    

    # Sanitize metric for filename
    metric_filename = metric.lower().replace(" ", "").replace("(", "").replace(")", "").replace("%", "")
    base, ext = os.path.splitext(output_csv)
    output_csv = f"{base}_sorted_by_{metric_filename}{ext}"

    # Save
    summary_df.to_csv(output_csv, index=False)
    print(f"[INFO] Combined summary saved to: {output_csv}")


In [3]:
df = pd.read_feather("train_final.feather")

In [4]:
df.head()

Unnamed: 0,building_id,meter,timestamp,meter_reading,primary_use,air_temperature
7593144,0,0,2016-05-21 01:00:00,72.221012,Education,25.6
7593145,1,0,2016-05-21 01:00:00,39.611586,Education,25.6
7593146,2,0,2016-05-21 01:00:00,1.920567,Education,25.6
7593147,3,0,2016-05-21 01:00:00,111.532464,Education,25.6
7593148,4,0,2016-05-21 01:00:00,456.734799,Education,25.6


In [5]:
unique_cid_use = df[['building_id', 'primary_use']].drop_duplicates().reset_index(drop=True)


In [6]:
unique_cid_use.head()

Unnamed: 0,building_id,primary_use
0,0,Education
1,1,Education
2,2,Education
3,3,Education
4,4,Education
