In [None]:
import pandas as pd
import numpy as np
from scipy.stats import wilcoxon
from itertools import combinations

In [None]:
def load_residuals(csv_path):
    """
    L√™ um CSV de res√≠duos e retorna uma Series com valores absolutos.
    """
    try:
        df = pd.read_csv(csv_path, header=None)
        data = pd.to_numeric(df.iloc[:, 0], errors="coerce").dropna()
        return data.abs().reset_index(drop=True)
    except Exception as e:
        print(f"Erro ao ler {csv_path}: {e}")
        return pd.Series(dtype=float)

In [None]:

def align_series(series_dict):
    """
    Garante que todas as s√©ries tenham o mesmo tamanho (m√≠nimo comum).
    """
    min_len = min(len(s) for s in series_dict.values())
    return {k: v.iloc[:min_len] for k, v in series_dict.items()}


In [None]:
    """
    Avalia m√∫ltiplos modelos e indica o melhor.
    """
    residuals = {name: load_residuals(path) for name, path in csv_paths.items()}
    residuals = {k: v for k, v in residuals.items() if not v.empty}

    if len(residuals) < 2:
        print("√â necess√°rio pelo menos dois modelos v√°lidos.")
        return

    residuals = align_series(residuals)

    print("\nüìä Erro absoluto m√©dio por modelo:")
    mean_errors = {}
    for model, values in residuals.items():
        mean_errors[model] = values.mean()
        print(f"{model}: {values.mean():.6f}")

    best_model = min(mean_errors, key=mean_errors.get)

    print(f"\nüèÜ Melhor modelo (menor erro m√©dio): {best_model}")

    print("\nüìê Teste de Wilcoxon (compara√ß√µes pareadas):")
    for (m1, r1), (m2, r2) in combinations(residuals.items(), 2):
        stat, p = wilcoxon(r1, r2)
        winner = m1 if r1.mean() < r2.mean() else m2

        print(
            f"{m1} vs {m2} | "
            f"p-value = {p:.4e} | "
            f"Melhor: {winner}"
        )

    return best_model


In [None]:
csvs = {
    "prophet": "prophet_residuals.csv",
    "arima": "arima_residuals.csv",
    "rf": "residuals_rf.csv",
    "rnn": "residuals_rnn.csv",
}

best = evaluate_models(csvs)


üìä Erro absoluto m√©dio por modelo:
prophet: 2.252928
arima: 2.419981
rf: 1.507257
rnn: 1.255670

üèÜ Melhor modelo (menor erro m√©dio): rnn

üìê Teste de Wilcoxon (compara√ß√µes pareadas):
prophet vs arima | p-value = 4.8911e-03 | Melhor: prophet
prophet vs rf | p-value = 8.3666e-04 | Melhor: rf
prophet vs rnn | p-value = 2.1790e-05 | Melhor: rnn
arima vs rf | p-value = 2.9304e-05 | Melhor: rf
arima vs rnn | p-value = 3.3665e-06 | Melhor: rnn
rf vs rnn | p-value = 1.6681e-01 | Melhor: rnn
