In [35]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import numpy as np
from Models import MoELSTM
import os
from collections import OrderedDict
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader

from typing import List, Tuple, Optional, Dict
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
import random
from Models import MoELSTM, LSTMModel, train_model
from Preprocess import (
    compute_metrics,
    convert_timeseries_to_numpy,
    create_dataloader,
    load_building_series,
    split_series_list,
)
from Models import model_fn
from tqdm import tqdm
from my_utils import train_model, load_energy_data_feather, get_weights, set_weights
from energy_ts_diffusion.task import convert_timeseries_to_numpy  # adjust as per your project
from tqdm import tqdm


In [36]:
# evaluate forecasts - working correctly 1411 buildings count 
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

def smape(y_true, y_pred):
    """Symmetric Mean Absolute Percentage Error."""
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2.0
    return np.mean(np.where(denominator == 0, 0, np.abs(y_true - y_pred) / denominator)) * 100

def mape(y_true, y_pred):
    """Mean Absolute Percentage Error."""
    y_true = np.where(y_true == 0, 1e-8, y_true)  # avoid division by zero
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def evaluate_forecast_metrics_per_round(csv_path):
    """
    Reads forecast CSV and computes MAPE, MAE, SMAPE, RMSE, and MSE per round.

    Args:
        csv_path (str): Path to the CSV with columns: timestamp, true, pred, round

    Returns:
        pd.DataFrame: Metrics summary per round
    """
    df = pd.read_csv(csv_path)
    if df.empty:
        raise ValueError("CSV is empty or invalid")

    metrics_list = []

    for rnd in sorted(df['round'].unique()):

        df_rnd = df[df['round'] == rnd]
        df_rnd = df_rnd.fillna(0.005)
        y_true = df_rnd["true"].values
        y_pred = df_rnd["pred"].values

        mae = mean_absolute_error(y_true, y_pred)
        mse = mean_squared_error(y_true, y_pred)
        rmse = np.sqrt(mse)
        mape_val = mape(y_true, y_pred)
        smape_val = smape(y_true, y_pred)

        metrics_list.append({
            "round": rnd,
            "MAE": mae,
            "MSE": mse,
            "RMSE": rmse,
            "MAPE (%)": mape_val,
            "SMAPE (%)": smape_val
        })

    metrics_df = pd.DataFrame(metrics_list)
    return metrics_df


In [37]:

def compute_all_forecast_metrics(
    models,
    strategies,
    rounds,
    base_predictions_dir,
    base_metrics_dir,
    client_range=range(1411)
):
    """
    Computes forecast evaluation metrics (MAE, MSE, RMSE, MAPE, SMAPE) for all client IDs
    across combinations of model and strategy for specified rounds.

    Saves the results in separate CSVs per model_strategy in `base_metrics_dir`.

    Args:
        models (List[str]): List of model names (e.g. ['gru', 'lstm']).
        strategies (List[str]): List of strategy suffixes (e.g. ['_fedAvg', '_scaffold']).
        rounds (List[int]): List of training rounds (e.g. [9, 10]).
        base_predictions_dir (str): Directory where `{cid}_{model}_{strategy}.csv` forecast files are stored.
        base_metrics_dir (str): Output directory where metrics will be saved.
        client_range (range): Range of client IDs (default: range(1411))
    """
    os.makedirs(base_metrics_dir, exist_ok=True)

    for cid in tqdm(client_range, desc="Processing Client IDs"):
        for model_name in models:
            for strategy in strategies:
                input_csv = os.path.join(base_predictions_dir, f"{cid}_{model_name}_{strategy}.csv")
                output_metrics_csv = os.path.join(base_metrics_dir, f"{model_name}_{strategy}_metrics.csv")

                if not os.path.isfile(input_csv):
                    print(f"[SKIP] Missing: {input_csv}")
                    continue

                try:
                    # Compute metrics per round
                    metrics_df = evaluate_forecast_metrics_per_round(input_csv)

                    # Annotate with metadata
                    metrics_df.insert(0, "building_id", cid)
                    metrics_df.insert(1, "model", model_name)
                    metrics_df.insert(2, "strategy", strategy)

                    # Append to CSV
                    if os.path.isfile(output_metrics_csv):
                        metrics_df.to_csv(output_metrics_csv, mode='a', header=False, index=False)
                    else:
                        metrics_df.to_csv(output_metrics_csv, index=False)

                    print(f"[OK] Saved metrics for CID={cid} → {output_metrics_csv}")

                except Exception as e:
                    print(f"[ERROR] CID={cid} | model={model_name} | strategy={strategy} | {e}")


## Predicitons metric both (above code in function)

In [38]:
import os
import pandas as pd
from typing import List, Union

def combine_metrics_by_model_strategy(metrics_dir: str, output_dir: str, round: Union[List[int], tuple, None] = None):
    """
    For each round, combines all cid_model_strategy_metrics.csv files into
    separate model_strategy_metrics_round<r>.csv files.

    Args:
        metrics_dir (str): Directory with per-client metrics CSVs.
        output_dir (str): Directory to store combined round-wise metrics CSVs.
        round (List[int] | tuple, optional): Rounds to include (e.g., (9, 11)). If None, process all rounds.
    """
    os.makedirs(output_dir, exist_ok=True)
    combined = {}

    for fname in os.listdir(metrics_dir):
        if not fname.endswith("_metrics.csv"):
            continue

        parts = fname.replace(".csv", "").split("_")
        if len(parts) < 4:
            print(f"[WARN] Unexpected filename format: {fname}")
            continue

        cid = parts[0].replace("cid", "")
        model = parts[1]
        strategy = "_".join(parts[2:-1])
        key = f"{model}_{strategy}"

        df = pd.read_csv(os.path.join(metrics_dir, fname))
        df.insert(0, "cid", int(cid))
        df["Model_Strategy"] = key

        if key not in combined:
            combined[key] = []

        combined[key].append(df)

    for key, dfs in combined.items():
        full_df = pd.concat(dfs, ignore_index=True)
        full_df = full_df.sort_values(by=["cid", "round"])

        rounds_to_process = sorted(full_df["round"].unique()) if round is None else list(range(round[0], round[1] + 1))

        for r in rounds_to_process:
            df_r = full_df[full_df["round"] == r]
            if df_r.empty:
                print(f"[WARN] No data for {key} in round {r}")
                continue

            output_path = os.path.join(output_dir, f"{key}_metrics_round{r}.csv")
            df_r.to_csv(output_path, index=False)
            print(f"[INFO] Saved: {output_path}")


In [39]:
combine_metrics_by_model_strategy(
    metrics_dir="metrics40-50-168",
    output_dir="forecast40-50-168"
   
)

[INFO] Saved: forecast40-50-168/lstm_scaffold_diff001_metrics_round45.csv
[INFO] Saved: forecast40-50-168/lstm_scaffold_diff001_metrics_round46.csv
[INFO] Saved: forecast40-50-168/lstm_scaffold_diff001_metrics_round47.csv
[INFO] Saved: forecast40-50-168/lstm_scaffold_diff001_metrics_round48.csv
[INFO] Saved: forecast40-50-168/lstm_scaffold_diff001_metrics_round49.csv
[INFO] Saved: forecast40-50-168/lstm_scaffold_diff001_metrics_round50.csv
[INFO] Saved: forecast40-50-168/gru_scaffold_diff001_metrics_round45.csv
[INFO] Saved: forecast40-50-168/gru_scaffold_diff001_metrics_round46.csv
[INFO] Saved: forecast40-50-168/gru_scaffold_diff001_metrics_round47.csv
[INFO] Saved: forecast40-50-168/gru_scaffold_diff001_metrics_round48.csv
[INFO] Saved: forecast40-50-168/gru_scaffold_diff001_metrics_round49.csv
[INFO] Saved: forecast40-50-168/gru_scaffold_diff001_metrics_round50.csv
[INFO] Saved: forecast40-50-168/lstm_fedProx_metrics_round40.csv
[INFO] Saved: forecast40-50-168/lstm_fedProx_metrics_

In [40]:
import os
import pandas as pd

def summarize_all_model_strategy_metrics(input_dir: str, output_csv: str, metric: str = "SMAPE (%)"):
    """
    Reads all *_metrics.csv files in the directory and computes mean metrics
    for each model-strategy file. Sorts by the given metric and saves summary CSV.

    Args:
        input_dir (str): Folder with *_metrics.csv files (e.g., forecast_12/)
        output_csv (str): Output CSV file path. Metric name will be appended if not already.
        metric (str): Metric to sort the summary by (e.g., "SMAPE (%)")
    """
    summary_rows = []

    for fname in sorted(os.listdir(input_dir)):
        if not fname.endswith("_round50.csv"):
            continue

        fpath = os.path.join(input_dir, fname)
        df = pd.read_csv(fpath)

        metrics = {
            "Model_Strategy": df["Model_Strategy"].iloc[0],
            "MAE": df["MAE"].mean(),
            "MSE": df["MSE"].mean(),
            "RMSE": df["RMSE"].mean(),
            "MAPE (%)": df["MAPE (%)"].mean(),
            "SMAPE (%)": df["SMAPE (%)"].mean()
        }
        summary_rows.append(metrics)

    summary_df = pd.DataFrame(summary_rows)
    print(summary_df.head())
    # Sort by requested metric
    summary_df = summary_df.sort_values(by=metric)
    

    # Sanitize metric for filename
    metric_filename = metric.lower().replace(" ", "").replace("(", "").replace(")", "").replace("%", "")
    base, ext = os.path.splitext(output_csv)
    output_csv = f"{base}_sorted_by_{metric_filename}{ext}"

    # Save
    summary_df.to_csv(output_csv, index=False)
    print(f"[INFO] Combined summary saved to: {output_csv}")


In [41]:
metric = "SMAPE (%)"
summarize_all_model_strategy_metrics(
    input_dir="forecast40-50-168/",
    output_csv=f"forecast_summary_all_model40-50-168_strategy_metrics6.csv",
    metric=metric
)


        Model_Strategy        MAE           MSE       RMSE      MAPE (%)  \
0     gru_fedAvg_diff0  39.990998  82839.703365  56.760577  9.697264e+09   
1           gru_fedAvg  39.338295  78588.454370  55.934972  9.641165e+09   
2  gru_fedProx_diff001  40.391334  62989.168562  55.444070  8.967857e+09   
3     gru_fedProx_diff  40.319897  60076.340566  55.257935  8.814954e+09   
4          gru_fedProx  38.910364  54144.969795  54.137038  8.388048e+09   

   SMAPE (%)  
0  35.983418  
1  35.898345  
2  36.991723  
3  37.181595  
4  36.674986  
[INFO] Combined summary saved to: forecast_summary_all_model40-50-168_strategy_metrics6_sorted_by_smape.csv


# Now compute tabular statistics

In [42]:
import os
import pandas as pd

def model_strategy_csv(Base_dir, Target_dir, round_num, METRIC, sortBy="Model_Strategy"):
    """
    Computes boxplot statistics for each model_strategy CSV
    and saves the pivot table in the target directory.

    The metric column is dynamically named with METRIC.
    """
    os.makedirs(Target_dir, exist_ok=True)

    all_rows = []

    for csv_file in os.listdir(Base_dir):
        if not csv_file.endswith(".csv"):
            continue

        csv_path = os.path.join(Base_dir, csv_file)
        df = pd.read_csv(csv_path)

        # Filter for the specified round
        df_round = df[df["round"] == round_num]

        if df_round.empty:
            print(f"[SKIP] No round {round_num} in: {csv_file}")
            continue

        metric_values = df_round[METRIC].dropna()

        # Compute stats
        count = metric_values.count()
        min_val = metric_values.min()
        q1 = metric_values.quantile(0.25)
        median = metric_values.median()
        q3 = metric_values.quantile(0.75)
        max_val = metric_values.max()
        iqr = q3 - q1

        # Whiskers
        lower_whisker = q1 - 1.5 * iqr
        upper_whisker = q3 + 1.5 * iqr

        # Non-outlier min/max
        non_outliers = metric_values[(metric_values >= lower_whisker) & (metric_values <= upper_whisker)]
        min_non_outlier = non_outliers.min()
        max_non_outlier = non_outliers.max()

        # Extract model_strategy and strategy
        model_strategy = csv_file.replace("_metrics.csv", "")
        try:
            model, strategy = model_strategy.split("_", 1)
        except ValueError:
            strategy = "unknown"

        # ✅ The metric name is now a column with the median value.
        row = {
            "Model_Strategy": model_strategy,
            # "Strategy": strategy,
            "Round": round_num,
            "METRIC": METRIC,  # dynamic metric column with value
            "Count": count,
            "Min": min_val,
            "Q1": q1,
            "Median": median,
            "Q3": q3,
            "Max": max_val,
            "IQR": iqr,
            # "Lower Whisker": lower_whisker,
            # "Upper Whisker": upper_whisker,
            "True Min (Non-outlier)": min_non_outlier,
            "True Max (Non-outlier)": max_non_outlier
        }

        all_rows.append(row)

    # Combine to DataFrame
    pivot_df = pd.DataFrame(all_rows)
    pivot_df.sort_values(by=sortBy, inplace=True)

    # Clean output name
    safe_metric = METRIC.replace(" ", "_").replace("%", "percent")
    pivot_output = os.path.join(
        Target_dir,
        f"all_stats_round{round_num}_metric_{safe_metric}_sort_by_{sortBy}.csv"
    )

    pivot_df.to_csv(pivot_output, index=False)

    print(f"[OK] One-row-per-model_strategy pivot saved: {pivot_output}")


In [49]:
model_strategy_csv(Base_dir="forecast40-50-168",Target_dir="agger_metrics40-50-168-G",round_num=50,METRIC="SMAPE (%)") #SMAPE (%)

[SKIP] No round 50 in: gru_fedAvg_metrics_round46.csv
[SKIP] No round 50 in: lstm_fedProx_metrics_round42.csv
[SKIP] No round 50 in: lstm_fedAvg_metrics_round41.csv
[SKIP] No round 50 in: gru_fedProx_metrics_round42.csv
[SKIP] No round 50 in: gru_fedProx_diff_metrics_round46.csv
[SKIP] No round 50 in: gru_scaffold_diff_metrics_round40.csv
[SKIP] No round 50 in: gru_fedProx_diff_metrics_round45.csv
[SKIP] No round 50 in: lstm_fedAvg_metrics_round45.csv
[SKIP] No round 50 in: gru_fedAvg_metrics_round40.csv
[SKIP] No round 50 in: gru_scaffold_lr_metrics_round49.csv
[SKIP] No round 50 in: gru_fedProx_metrics_round41.csv
[SKIP] No round 50 in: lstm_fedAvg_metrics_round47.csv
[SKIP] No round 50 in: gru_fedProx_diff_metrics_round40.csv
[SKIP] No round 50 in: lstm_scaffold_diff001_metrics_round47.csv
[SKIP] No round 50 in: lstm_scaffold_lr_metrics_round47.csv
[SKIP] No round 50 in: lstm_fedProx_diff_metrics_round43.csv
[SKIP] No round 50 in: lstm_scaffold_lr_metrics_round40.csv
[SKIP] No round

In [25]:
intended_rows = ["lstm_scaffold_diff-new-60_metrics_round50.csv","gru_scaffold_diff-new-60_metrics_round50.csv","gru_scaffold_diff_metrics_round50.csv","gru_scaffold_diff_metrics_round50.csv","lstm_scaffold_diff_metrics_round50.csv","lstm_scaffold_lr_metrics_round50.csv","gru_scaffold_lr_metrics_round50.csv","gru_fedAvg_lr_metrics_round50.csv","lstm_fedAvg_lr_metrics_round50.csv","gru_fedAvg_diffsample_dhc_metrics_round50.csv","lstm_fedAvg_diffsample_dhc_metrics_round50.csv","gru_fedProx_diff_metrics_round50.csv","lstm_fedProx_diff_metrics_round50.csv","gru_fedProx_metrics_round50.csv","lstm_fedProx_metrics_round50.csv"]

In [50]:
res_df = pd.read_csv("agger_metrics40-50-168-G/all_stats_round50_metric_SMAPE_(percent)_sort_by_Model_Strategy.csv")
res_df = res_df[["Model_Strategy","METRIC","Q1","Median","Q3"]]
# res_df = res_df[res_df['Model_Strategy'].isin(intended_rows)]
res_df.sort_values("Median")

Unnamed: 0,Model_Strategy,METRIC,Q1,Median,Q3
22,transformer_global_model_metrics_round50.csv,SMAPE (%),9.474701,23.397741,37.010577
5,gru_global_model_metrics_round50.csv,SMAPE (%),9.946523,23.855794,37.58384
14,lstm_global_model_metrics_round50.csv,SMAPE (%),12.516487,25.84842,42.011191
19,transformer_fedAvg_metrics_round50.csv,SMAPE (%),12.442921,26.416363,42.449546
18,transformer_fedAvg_diff0_metrics_round50.csv,SMAPE (%),13.412298,27.135632,43.507664
21,transformer_fedProx_metrics_round50.csv,SMAPE (%),14.483301,27.462963,43.419694
7,gru_scaffold_diff_metrics_round50.csv,SMAPE (%),14.466854,27.600698,43.046327
8,gru_scaffold_lr_metrics_round50.csv,SMAPE (%),14.824229,28.077304,43.394052
20,transformer_fedProx_diff_metrics_round50.csv,SMAPE (%),15.143852,28.097247,44.372249
16,lstm_scaffold_diff_metrics_round50.csv,SMAPE (%),15.428086,28.649777,44.309417


In [21]:

res_df2 = pd.read_csv("agger_metrics40-50-168-T/all_stats_round50_metric_MAE_sort_by_Model_Strategy.csv")
res_df2 = res_df2[["Model_Strategy","METRIC","Q1","Median","Q3"]]
# res_df2 = res_df2[res_df2['Model_Strategy'].isin(intended_rows)]
res_df2.sort_values("Median")

Unnamed: 0,Model_Strategy,METRIC,Q1,Median,Q3
17,transformer_fedAvg_metrics_round50.csv,MAE,4.335951,9.502733,22.956894
16,transformer_fedAvg_diff0_metrics_round50.csv,MAE,4.51191,10.13696,23.553523
19,transformer_fedProx_metrics_round50.csv,MAE,4.761639,10.687016,25.059263
6,gru_scaffold_diff_metrics_round50.csv,MAE,4.829805,10.695921,24.508408
7,gru_scaffold_lr_metrics_round50.csv,MAE,4.833734,10.896919,24.548411
18,transformer_fedProx_diff_metrics_round50.csv,MAE,5.034991,11.0919,25.721099
5,gru_scaffold_diff001_metrics_round50.csv,MAE,5.030795,11.16262,25.318284
13,lstm_scaffold_diff001_metrics_round50.csv,MAE,5.126174,11.234051,25.921425
14,lstm_scaffold_diff_metrics_round50.csv,MAE,5.019606,11.277267,25.578467
15,lstm_scaffold_lr_metrics_round50.csv,MAE,5.093982,11.493388,26.295298


In [16]:

res_df3 = pd.read_csv("agger_metrics40-50-168/all_stats_round50_metric_SMAPE_(percent)_sort_by_Model_Strategy.csv")
res_df3 = res_df3[["Model_Strategy","METRIC","Q1","Median","Q3"]]
# res_df3 = res_df3[res_df3['Model_Strategy'].isin(intended_rows)]
res_df3.sort_values("Median")

Unnamed: 0,Model_Strategy,METRIC,Q1,Median,Q3
17,transformer_fedAvg_metrics_round50.csv,SMAPE (%),12.442921,26.416363,42.449546
16,transformer_fedAvg_diff0_metrics_round50.csv,SMAPE (%),13.412298,27.135632,43.507664
19,transformer_fedProx_metrics_round50.csv,SMAPE (%),14.483301,27.462963,43.419694
6,gru_scaffold_diff_metrics_round50.csv,SMAPE (%),14.466854,27.600698,43.046327
7,gru_scaffold_lr_metrics_round50.csv,SMAPE (%),14.824229,28.077304,43.394052
18,transformer_fedProx_diff_metrics_round50.csv,SMAPE (%),15.143852,28.097247,44.372249
14,lstm_scaffold_diff_metrics_round50.csv,SMAPE (%),15.428086,28.649777,44.309417
15,lstm_scaffold_lr_metrics_round50.csv,SMAPE (%),15.705115,28.850241,44.786737
13,lstm_scaffold_diff001_metrics_round50.csv,SMAPE (%),15.616963,28.939693,44.791802
5,gru_scaffold_diff001_metrics_round50.csv,SMAPE (%),15.70372,28.946013,43.929915


In [42]:
# # Combine all metrics CSVs into a single file that contains statistics for each model and strategy.

for box plot combine all model stratetgy csv files

In [None]:
# # combine data 
# import os

# # Folder
# BASE_METRICS_DIR = "forecast_metrics_new_1411"
# combined = []

# for csv_file in os.listdir(BASE_METRICS_DIR):
#     if not csv_file.endswith(".csv"):
#         continue

#     csv_path = os.path.join(BASE_METRICS_DIR, csv_file)
#     df = pd.read_csv(csv_path)

#     # Make Model_Strategy for each row
#     model_strategy = csv_file.replace("_metrics.csv", "")
#     df["Model_Strategy"] = model_strategy

#     combined.append(df)

# # Combine all
# big_df = pd.concat(combined, ignore_index=True)

# # Correct path
# output_csv = os.path.join(BASE_METRICS_DIR, "saved_combined_data_round9_round10_1411.csv")

# # Save
# big_df.to_csv(output_csv, index=False)
# print(f"[OK] Combined raw file saved to: {output_csv}")

In [14]:
import os
import pandas as pd
from typing import List

def combine_metrics_csvs(
    BASE_METRICS_DIR: str,
    ROUNDS: List[int],
    OUTPUT_FILENAME: str = "saved_combined_metrics.csv"
) -> None:
    """
    Combines all *_metrics.csv files in the folder, filters by round,
    and saves one combined CSV with Model_Strategy column.

    Args:
        BASE_METRICS_DIR (str): Folder containing metric CSV files.
        ROUNDS (List[int]): List of rounds to include.
        OUTPUT_FILENAME (str): Name of output combined CSV file.
    """
    combined = []

    for csv_file in os.listdir(BASE_METRICS_DIR):
        if not csv_file.endswith("_metrics.csv"):
            continue

        csv_path = os.path.join(BASE_METRICS_DIR, csv_file)
        try:
            df = pd.read_csv(csv_path)
            df = df[df["round"].isin(ROUNDS)]  # Filter by rounds
            df["Model_Strategy"] = csv_file.replace("_metrics.csv", "")
            combined.append(df)
        except Exception as e:
            print(f"[ERROR] Could not read {csv_file}: {e}")

    if combined:
        final_df = pd.concat(combined, ignore_index=True)
        output_csv = os.path.join(BASE_METRICS_DIR, OUTPUT_FILENAME)
        final_df.to_csv(output_csv, index=False)
        print(f"[✔] Combined file saved: {output_csv}")
    else:
        print("[⚠] No metrics files matched or found.")


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
# input_csv willl refer to the combined metrics CSV
def plot_boxplot(input_csv, output_dir, metric_name, round_num, sortby="Model_Strategy"):
    """
    Creates a boxplot for the given metric from the pivot CSV.
    
    Args:
        input_csv (str): Path to pivot CSV file.
        output_dir (str): Directory to save the plot.
        metric_name (str): Column name to plot on Y-axis.
        round_num (int): Round number for file naming.
    """
    os.makedirs(output_dir, exist_ok=True)
    
    # Load the pivot CSV
    df = pd.read_csv(input_csv)
    
    if metric_name not in df.columns:
        raise ValueError(f"Metric '{metric_name}' not found in the CSV columns: {df.columns.tolist()}")
    
    # Sort X-axis by Model_Strategy for clean look
    df = df.sort_values(by=sortby)
    
    # Plot
    plt.figure(figsize=(14, 6))
    plt.boxplot(df[metric_name], vert=True)
    plt.xticks([1], [metric_name])
    plt.title(f"{metric_name} Boxplot for Round {round_num}")
    plt.ylabel(metric_name)
    
    # Save plot
    plot_name = f"{metric_name}_round{round_num}_sorting_strategy{sortby}_pivot_boxplot.png"
    plot_path = os.path.join(output_dir, plot_name)
    plt.savefig(plot_path)
    plt.close()
    
    print(f"[OK] Boxplot saved: {plot_path}")
