### IMPORTS

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
from pathlib import Path

### CARREGANDO ARQUIVOS

In [2]:
BASE_DIR = Path.cwd().parent
sys.path.append(str(BASE_DIR / "src" / "ml"))

from train_baseline import train_baseline_models
from train_ml_models import train_ml_models
from train_time_series import train_time_series_models

DATA_PATH = BASE_DIR / "data" / "processed" / "ml_dataset.csv"

### EXECUTANDO MODELOS

Baseline

In [3]:
baseline_metrics = train_baseline_models(DATA_PATH)

S√©rie Temporal

In [4]:
df = pd.read_csv(DATA_PATH)
sarima_metrics, forecasts = train_time_series_models(df)

Machine Learning

In [None]:
ml_metrics = train_ml_models(DATA_PATH)

### CONSOLIDAR RESULTADOS

In [None]:
metrics_df = pd.concat([
    baseline_metrics,
    sarima_metrics,
    ml_metrics
], ignore_index=True)

In [7]:
# Unificar nomes de colunas e preencher ausentes
metrics_df.columns = metrics_df.columns.str.upper()

In [8]:
# Padronizar todos os nomes poss√≠veis
metrics_df = metrics_df.rename(columns={
    'MODEL': 'MODELO',
    'MAE': 'MAE',
    'RMSE': 'RMSE',
    'MAPE': 'MAPE',
    'R2': 'R2',
    'TARGET': 'TARGET'
})


In [9]:
# Garantir que todas as colunas existam
for col in ['TARGET', 'MODELO', 'MAE', 'RMSE', 'MAPE', 'R2']:
    if col not in metrics_df.columns:
        metrics_df[col] = None

In [10]:
# Reorganizar as colunas
metrics_df = metrics_df[['TARGET', 'MODELO', 'MAE', 'RMSE', 'MAPE', 'R2']]

In [11]:
# Salvar a planilha final
metrics_df.to_csv("data/processed/model_metrics_clean.csv", index=False)
display(metrics_df)

OSError: Cannot save file into a non-existent directory: 'data\processed'

### VISUALIZA√á√ÉO COMPARATIVA

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(data=metrics_df, x="modelo", y="RMSE", hue="modelo")
plt.title("Compara√ß√£o de RMSE entre modelos")
plt.ylabel("RMSE (menor √© melhor)")
plt.xlabel("")
plt.xticks(rotation=30)
plt.legend([], [], frameon=False)
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 6))
sns.barplot(data=metrics_df, x="modelo", y="MAPE", hue="modelo")
plt.title("Compara√ß√£o de MAPE (%) entre modelos")
plt.ylabel("MAPE (%)")
plt.xlabel("")
plt.xticks(rotation=30)
plt.legend([], [], frameon=False)
plt.tight_layout()
plt.show()

### ANALISE DE PREVISAO DOS ULTIMOS MESES

In [None]:
pred_ml = pd.read_csv("data/processed/predictions_ml.csv")
pred_ml["data"] = pd.to_datetime(pred_ml["data"])

plt.figure(figsize=(12, 6))
plt.plot(pred_ml["data"], pred_ml["real"], label="Real", linewidth=2)
plt.plot(pred_ml["data"], pred_ml["rf_previsto"], label="Random Forest", linestyle="--")
plt.plot(pred_ml["data"], pred_ml["xgb_previsto"], label="XGBoost", linestyle="--")
plt.title("Previs√£o de Produ√ß√£o - Modelos ML")
plt.legend()
plt.tight_layout()
plt.show()

### RESUMO FINAL

In [None]:
best_model = metrics_df.sort_values("RMSE").iloc[0]
print(f"\nüèÜ Melhor modelo: {best_model['modelo']}")
print(f"MAE: {best_model['MAE']:.2f}, RMSE: {best_model['RMSE']:.2f}, MAPE: {best_model['MAPE']:.2f}%")