### IMPORTS

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
from pathlib import Path

### CARREGANDO ARQUIVOS

In [None]:
BASE_DIR = Path.cwd().parent
sys.path.append(str(BASE_DIR / "src" / "ml"))

from train_baseline import train_baseline_models
from train_ml_models import train_ml_models
from train_time_series import train_time_series_models
from organize_metrics import consolidate_metrics

DATA_PATH = BASE_DIR / "data" / "processed" / "ml_dataset.csv"

### EXECUTANDO MODELOS

Baseline

In [None]:
baseline_metrics = train_baseline_models(DATA_PATH)

Série Temporal

In [None]:
df = pd.read_csv(DATA_PATH)
sarima_metrics, forecasts = train_time_series_models(df)

Machine Learning

In [None]:
rf_metrics, xgb_metrics = train_ml_models(DATA_PATH)

### CONSOLIDAR RESULTADOS

In [None]:
metrics_df = consolidate_metrics()
metrics_df.head(15)

### VISUALIZAÇÃO COMPARATIVA

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(data=metrics_df, x="modelo", y="RMSE", hue="modelo")
plt.title("Comparação de RMSE entre modelos")
plt.ylabel("RMSE (menor é melhor)")
plt.xlabel("")
plt.xticks(rotation=30)
plt.legend([], [], frameon=False)
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 6))
sns.barplot(data=metrics_df, x="modelo", y="MAPE", hue="modelo")
plt.title("Comparação de MAPE (%) entre modelos")
plt.ylabel("MAPE (%)")
plt.xlabel("")
plt.xticks(rotation=30)
plt.legend([], [], frameon=False)
plt.tight_layout()
plt.show()

### ANALISE DE PREVISAO DOS ULTIMOS MESES

In [None]:
# Concatenar todos os resultados
all_metrics = pd.concat([
    baseline_metrics.assign(origem="Baseline"),
    rf_metrics.assign(origem="Random Forest"),
    xgb_metrics.assign(origem="XGBoost"),
    sarima_metrics.assign(origem="SARIMA"),
], ignore_index=True)

In [None]:

# Plot RMSE
plt.figure(figsize=(10, 6))
sns.barplot(data=all_metrics, x="origem", y="RMSE", hue="origem")
plt.title("Comparação de RMSE entre Modelos")
plt.ylabel("RMSE (menor é melhor)")
plt.xlabel("")
plt.legend([], [], frameon=False)
plt.tight_layout()
plt.show()

# Plot MAPE
plt.figure(figsize=(10, 6))
sns.barplot(data=all_metrics, x="origem", y="MAPE", hue="origem")
plt.title("Comparação de MAPE entre Modelos")
plt.ylabel("MAPE (%)")
plt.xlabel("")
plt.legend([], [], frameon=False)
plt.tight_layout()
plt.show()

### RESUMO FINAL

In [None]:
# Filtrar apenas linhas com modelo e RMSE válido
valid_metrics = metrics_df.dropna(subset=["modelo", "RMSE"])
best_model = valid_metrics.sort_values("RMSE").iloc[0]

print(f"\n Melhor modelo: {best_model['modelo']}")
print(f"Target: {best_model['target']}")
print(f"MAE: {best_model['MAE']:.2f}, RMSE: {best_model['RMSE']:.2f}, MAPE: {best_model['MAPE'] if pd.notna(best_model['MAPE']) else 'N/A'}%")
