In [23]:
# Bibliotecas principais
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Modelagem
import lightgbm as lgb
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error
from lightgbm import LGBMRegressor, early_stopping
import joblib

In [None]:
!pip install pandas numpy matplotlib seaborn scikit-learn lightgbm

In [None]:
# Carregar dados processados com features
df = pd.read_csv("../data/processed/vendas_processed_features.csv", parse_dates=['date'], index_col='date')

# Visualizar primeiras linhas
df.head()

In [None]:
df = df.drop(columns=['dia_da_semana','feriado_nacional'])

df = df.rename(columns={'em_promocao':'is_promotion'})

for col in ['is_promotion','is_holiday','is_weekend']:
    df[col] = df[col].astype(int)

df = df.dropna().copy()

df.head()

In [None]:
target = 'vendas'
features = [c for c in df.columns if c != target]

X = df[features]
y = df[target]

In [None]:
tscv = TimeSeriesSplit(n_splits=3)
metrics = []

for fold, (train_idx, val_idx) in enumerate(tscv.split(X), 1):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
    
    model = LGBMRegressor(
        objective='regression',
        n_estimators=1000
    )
    
    model.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        eval_metric='mae',
        callbacks=[early_stopping(stopping_rounds=10)],
        verbose=False
    )
    
    y_pred = model.predict(X_val)
    mae = mean_absolute_error(y_val, y_pred)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mape = np.mean(np.abs((y_val - y_pred)/y_val)) * 100
    
    metrics.append({'fold': fold, 'MAE': mae, 'RMSE': rmse, 'MAPE': mape})
    print(f"Fold {fold} → MAE: {mae:.2f}, RMSE: {rmse:.2f}, MAPE: {mape:.2f}%")

In [None]:

final_train_data = lgb.Dataset(X, label=y)
final_model = lgb.train(params, final_train_data, verbose_eval=False)

# Salvar modelo
final_model.save_model("model_lightgbm.txt")

# ---------------------------
# 🔟 Gerar previsões para 14 dias futuros
# ---------------------------
# Para simplificação: usar últimos valores como lags
last_row = df.iloc[-1].copy()
future_preds = []

for day in range(14):
    X_new = last_row[features].to_frame().T
    y_pred = final_model.predict(X_new)[0]
    future_preds.append(y_pred)
    
    # Atualizar lags
    last_row['lag_1'] = last_row['vendas']
    last_row['lag_7'] = last_row['lag_6'] if 'lag_6' in last_row else last_row['lag_7']
    last_row['lag_14'] = last_row['lag_13'] if 'lag_13' in last_row else last_row['lag_14']
    last_row['lag_28'] = last_row['lag_27'] if 'lag_27' in last_row else last_row['lag_28']
    last_row['vendas'] = y_pred

# Visualizar previsões
future_dates = pd.date_range(start=df.index[-1]+pd.Timedelta(days=1), periods=14)
forecast_df = pd.DataFrame({'date': future_dates, 'vendas_pred': future_preds}).set_index('date')
print("\nPrevisões para os próximos 14 dias:")
print(forecast_df)