In [12]:
# prophet_functions.py

import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import plotly.graph_objects as go
import numpy as np # Import the NumPy library

def limpar_df_ipea(df_ipea):
    df_ipea.rename(columns={
        'data': 'ds',
        'valor': 'y',
    }, inplace=True)
    df_ipea.drop(columns=['percentual', 'ano', 'decada', 'var_pct_ano_anterior'], inplace=True)
    df_ipea['ds'] = pd.to_datetime(df_ipea['ds'])
    df_ipea['y'] = df_ipea['y']
    df_ipea.sort_values(by=['ds'], ascending=True, inplace=True)
    return df_ipea

def treinar_modelo(train_data):
    model = Prophet(daily_seasonality=True)
    model.fit(train_data)
    return model

def realizar_previsao(model, periods=90, freq='B'):
    future = model.make_future_dataframe(periods=periods, freq=freq)
    forecast = model.predict(future)
    return forecast

def calcular_metricas(result):
    y_result = result['y']
    yhat_result = result['yhat']
    mse = mean_squared_error(y_result, yhat_result)
    mae = mean_absolute_error(y_result, yhat_result)
    r2 = r2_score(y_result, yhat_result)
    mape = (np.abs((y_result - yhat_result) / y_result)).mean() * 100
    return mse, mae, r2, mape

def plotar_previsao(df, forecast):
    fig = go.Figure()

    # Plotando os valores reais
    fig.add_trace(go.Scatter(x=df['ds'], y=df['y'], mode='lines', name='Valores Reais'))

    # Plotando a previsão
    fig.add_trace(go.Scatter(x=forecast['ds'], y=forecast['yhat'], mode='lines', name='Previsão'))

    fig.update_layout(
        title='Previsão com Prophet',
        xaxis_title='Data',
        yaxis_title='Valor',
        legend=dict(x=0, y=1),
        margin=dict(l=0, r=0, t=30, b=0)
    )

    fig.show()

In [13]:
# main_pipeline.py

import pandas as pd
from sklearn.pipeline import Pipeline
# from prophet_functions import limpar_df_ipea, treinar_modelo, realizar_previsao, calcular_metricas, plotar_previsao

# Importar dados
df_ipea = pd.read_csv('/content/drive/MyDrive/PosTech_Analise_de_dados/FASE4/tech_challenge_4/dados/ipeadata_limpo.csv')

# Limpar dados
df_ipea = limpar_df_ipea(df_ipea)

# Filtrar os últimos 4 anos de dados
prophet_df_ipea = df_ipea[df_ipea['ds'] >= df_ipea['ds'].max() - pd.DateOffset(years=4)]

# Dividir dados em treino e teste
train_ipea = prophet_df_ipea.sample(frac=0.8, random_state=0)
test_ipea = prophet_df_ipea.drop(train_ipea.index)

# Instanciar e treinar o modelo Prophet
modelo_prophet = treinar_modelo(train_ipea)

# Realizar a previsão
previsao_prophet = realizar_previsao(modelo_prophet)

# Combinar previsões com dados reais para calcular métricas
result = pd.merge(previsao_prophet[['ds', 'yhat']], train_ipea[['ds', 'y']], on='ds', how='inner')

# Calcular métricas de desempenho
mse, mae, r2, mape = calcular_metricas(result)

print(f'MSE: {mse:.2f}')
print(f'MAE: {mae:.2f}')
print(f'R²: {r2:.2f}')
print(f'MAPE: {mape:.2f}%')

# Plotar a previsão e valores reais
plotar_previsao(prophet_df_ipea, previsao_prophet)

DEBUG:cmdstanpy:input tempfile: /tmp/tmpw1dwu2ox/3ep2f373.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpw1dwu2ox/w2zwh6on.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=43795', 'data', 'file=/tmp/tmpw1dwu2ox/3ep2f373.json', 'init=/tmp/tmpw1dwu2ox/w2zwh6on.json', 'output', 'file=/tmp/tmpw1dwu2ox/prophet_modelhkwumlhi/prophet_model-20240712171754.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:17:54 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:17:54 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


MSE: 23.11
MAE: 3.66
R²: 0.94
MAPE: 4.80%


In [14]:
import joblib as jb
jb.dump(modelo_prophet, 'modelo_prophet.pkl')

['modelo_prophet.pkl']