In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from pathlib import Path

In [None]:
def evaluate_and_save(y_true, y_pred, model_name, impl_name, split_name, out_filename):
    """
    Evaluate forecast accuracy (MAE, RMSE, MAPE, OPE, RÂ²) and save results
    into a CSV file inside the project's `data/models` folder.

    If replace_existing=True:
        - Any existing row with the same (Model, Impl) is replaced.
    If replace_existing=False:
        - New rows are appended (old ones kept).

    Parameters
    ----------
    y_true : array-like
        Ground truth values.

    y_pred : array-like
        Model predictions.

    model_name : str
        Model identifier, e.g. "Naive", "RandomForestRegressor", "LSTM".

    impl_name : str
        Model family, e.g. "baseline", "statsmodels", "ml", "neural".

    split_name : str
        Data split identifier, e.g. "test", "val".

    out_filename : str
        File name of output CSV (saved in ../data/models/).

    Returns
    -------
    dict
        Dictionary of computed metrics.
    """

    # Convert inputs
    y_true = np.array(y_true, dtype=float)
    y_pred = np.array(y_pred, dtype=float)
    impl_name = impl_name.lower()
    split_name = split_name.lower()

    eps = 1e-12

    # Metrics
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / np.clip(np.abs(y_true), eps, None)))
    ope = np.abs(y_pred.sum() - y_true.sum()) / np.clip(np.abs(y_true.sum()), eps, None)
    r2 = r2_score(y_true, y_pred)

    result = {
        "Model": model_name,
        "Impl": impl_name,
        "Split": split_name,
        "MAE": mae,
        "RMSE": rmse,
        "MAPE": mape,
        "OPE": ope,
        "R2": r2,
    }

    out_path = Path("../data/models")
    out_path.mkdir(parents=True, exist_ok=True)
    csv_path = out_path / out_filename

    # If file exists: load + delete old row for the same model
    if csv_path.exists():
        df = pd.read_csv(csv_path)
        df = df[~((df["Model"] == model_name) & (df["Impl"] == impl_name))] # Remove old entry if model and impl match
        df = pd.concat([df, pd.DataFrame([result])], ignore_index=True)
    else:
        df = pd.DataFrame([result])

    # Sort and save
    df = df.sort_values("MAPE", ascending=True)
    df.to_csv(csv_path, index=False)

    return result

In [None]:
import pandas as pd
from pathlib import Path

def load_best_models(
    filename: str,
    k: int = 1,
    group_by: str = "Model",
    split: str = "Test"
):
    """
    Load a CSV containing forecast evaluation results and return 
    the best k models per group (family or implementation), optionally 
    filtered by validation/test split.

    The CSV is expected to be inside ../data/models/.

    Required columns:
        - Model : str  => model name
        - Impl  : str  => model family (Baseline, ML, Neural, ...)
        - MAE   : float => metric for ranking
    Optional but recommended:
        - Split : str  => "Val" or "Test"

    Parameters
    ----------
    filename : str
        Name of the CSV file located in ../data/models/.
        Example: "ml_results.csv"

    k : int, default=1
        Number of top models per group to return.

    group_by : {"Impl", "Model", ...}, default="Impl"
        Column used for grouping before selecting the top k.
        Most common choices:
            "Impl"  => best per family
            "Model" => best per model type

    split : str or None, default=None
        If provided ("Val" or "Test"):
            Only rows belonging to this split are evaluated.
        If None:
            All rows are used.

    Returns
    -------
    pd.DataFrame
        Dataframe containing top-k models per selected group.

    Example
    -------
    # Top 3 ML models (Validation only)
    top3 = load_best_models("ml_results.csv", k=3, group_by="Impl", split="Val")

    # Best model per model name on Test set
    best_test = load_best_models("neural_results.csv", k=1, group_by="Model", split="Test")

    # Combine best models from multiple CSVs
    combined = pd.concat([
        load_best_models("baseline_results.csv", k=1, split="Val"),
        load_best_models("ml_results.csv", k=1, split="Val"),
        load_best_models("neural_results.csv", k=1, split="Val"),
    ], ignore_index=True)
    """

    # Build full path automatically
    base_dir = Path("../data/models")
    full_path = base_dir / filename

    if not full_path.exists():
        raise FileNotFoundError(f"CSV not found: {full_path}")

    df = pd.read_csv(full_path)

    # Required columns
    required = ["Model", "Impl", "MAE"]
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns in CSV: {missing}")

    # Optional split filtering
    if split is not None:
        if "Split" not in df.columns:
            raise ValueError("CSV does not contain a 'Split' column.")
        df = df[df["Split"] == split]
        if df.empty:
            raise ValueError(f"No rows found for split '{split}' in {filename}")

    # Ensure grouping column exists
    if group_by not in df.columns:
        raise ValueError(f"group_by='{group_by}' not found in CSV columns: {df.columns.tolist()}")

    # Sort by MAE (ascending => better)
    df_sorted = df.sort_values("MAE", ascending=True)

    # Take top k per group
    best_k = (
        df_sorted
        .groupby(group_by, as_index=False)
        .head(k)
        .reset_index(drop=True)
    )

    return best_k
