In [None]:
import pandas as pd
import plotly.express as px
import numpy as np
import statsmodels.tsa.stattools as st
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pymannkendall as mk
from scipy.stats import kruskal
import plotly.graph_objects as go
from pmdarima import auto_arima
import gc

In [None]:
df = pd.read_csv('dataset-case-iqvia.csv', sep=',')

In [None]:
df['week_dt'] = pd.to_datetime(df['week_dt']).dt.date
df['region_nm'] = df['region_nm'].astype(str)
df['product_id'] = df['product_id'].astype(str)
df['product_attr_1'] = df['product_attr_1'].astype(str)
df['product_attr_2'] = df['product_attr_2'].astype(str)
df['product_attr_3'] = df['product_attr_3'].astype(str)

In [None]:
df_st = df.groupby(['week_dt', 'dsupp_id', 'region_nm','product_id'])['units_qty'].sum().reset_index()

In [None]:
df_st.groupby(['dsupp_id', 'region_nm','product_id']).count()

In [None]:
df_count = df_st.groupby(['dsupp_id', 'region_nm','product_id']).count().reset_index()

In [None]:
df_count

In [None]:
sorted(df_count['units_qty'].unique().tolist())

Problema vendas:
- uma unica venda ou poucas vendas

In [None]:
# index = 5 - df_count
df_st.query("dsupp_id == 1 and region_nm == 'Sul' and product_id == '185'")

In [None]:
df_st.query("dsupp_id == 1")["product_id"].sort_values().unique().tolist()

Não tem para todos os product_id

Irei preencher com valores de units_qty 0

In [None]:
list_supp_id = df['dsupp_id'].unique()
list_regiao = df['region_nm'].unique()
list_id = df['product_id'].unique()
list_weeK = df['week_dt'].unique()

In [None]:
df.columns

In [None]:
index = pd.MultiIndex.from_product([list_weeK, list_supp_id, list_id, list_regiao], 
                                    names=['week_dt', 'dsupp_id', 'product_id', 'region_nm'])

In [None]:
df_full = pd.DataFrame(index=index).reset_index()

In [None]:
df_final = df_full.merge(df_st, on=['week_dt', 'dsupp_id', 'product_id', 'region_nm'], how='left')

In [None]:
df_final

In [None]:
df_final['units_qty'] = df_final['units_qty'].fillna(0)

Confirmando se está tudo preenchido

In [None]:
len(list_weeK)

In [None]:
df_final.groupby(['dsupp_id', 'region_nm','product_id']).count().reset_index()['units_qty'].unique().tolist()

In [None]:
df_final.groupby(['dsupp_id', 'region_nm','product_id']).count().reset_index()

In [None]:
df_final['week_dt'] = df_final['week_dt'].astype(str)

In [None]:
df_final = df_final.sort_values(by='week_dt').reset_index(drop=True)

In [None]:
list_supp_id

In [None]:
for sup_id in list_supp_id:
        for regiao in list_regiao:
            for iD in list_id:
                a = df_final.query(f"dsupp_id == {sup_id} and region_nm == '{regiao}' and product_id == '{iD}'" )
                train, test = train_test_split(a, test_size=0.2, shuffle=False)
                print(f"{sup_id}_{regiao}_{iD}")
                # Teste para saber se tendência estatisticamente significativa
                try:
                    mk_test = mk.original_test(train['units_qty'])                    
                    if mk_test.p > 0.05:
                        print("Tendência NÃO É estatisticamente significativa")
                    else:
                        print("Tendência É estatisticamente significativa")
                except:
                    pass
                # Teste para saber se a série é estacionaria
                try:
                    adfuller_p = adfuller(train['units_qty'])
                    if adfuller_p[1] > 0.05:
                        print("Série NÃO é estacionáia")
                    else:
                        print("Série é estacionáia")
                except:
                    pass
                # Teste para saber se a série tem sazonalidade
                try:
                    kw_stat, kw_p = kruskal(*train['units_qty'].to_list())
                    if kw_p > 0.05:
                        print("Sazonalidade SEM evidência estatisticas")
                        bool_seasonal = False
                    else:
                        print("Sazonalidade HÁ evidência estatisticas")
                        bool_seasonal = True
                        acf_values = acf(train["units_qty"], nlags=24)
                        m = np.argmax(acf_values[1:]) + 1
                except:
                    bool_seasonal = False
                    pass
                print("****************************************************************")
                decomposicao = seasonal_decompose(train['units_qty'],period=7)
                fig = go.Figure()
                fig.add_trace(go.Scatter(x=train["week_dt"], y=train["units_qty"], mode="lines", name="Série Original", line=dict(color="blue", dash="dot")))
                fig.add_trace(go.Scatter(x=train["week_dt"], y=decomposicao.trend, mode="lines", name="Tendência", line=dict(color="red", dash="dot")))
                fig.add_trace(go.Scatter(x=train["week_dt"], y=decomposicao.seasonal, mode="lines", name="Sazonalidade", line=dict(color="green", dash="dot")))
                fig.update_layout(title="Decomposição da Série Temporal",
                                    xaxis_title="Data",
                                    yaxis_title="Quantidade de Unidades",
                                    template="plotly_white")
                fig.write_html(fr"F:\ST\st_{sup_id}_{regiao}_{iD}.html")
                if bool_seasonal:
                    model = auto_arima(train['units_qty'],seasonal=bool_seasonal,m=m,trace=False,stepwise=True,error_action='ignore',suppress_warnings=True)
                else: 
                    model = auto_arima(train['units_qty'], seasonal=bool_seasonal,trace=False,stepwise=True,error_action='ignore',suppress_warnings=True)

                prediction = model.predict(n_periods=25)
                mae = mean_absolute_error(test['units_qty'], prediction)
                rmse = np.sqrt(mean_squared_error(test['units_qty'], prediction))
                mape = np.mean(np.abs((test['units_qty'] - prediction) / test['units_qty'])) * 100

                fig_erro = go.Figure(data=[go.Table(
                    header=dict(values=["Métrica", "Valor"],
                                fill_color='royalblue',
                                align='left',
                                font=dict(color='white', size=14)),
                    cells=dict(values=[["MAE", "RMSE", "MAPE"], [f"{mae:.4f}", f"{rmse:.4f}", f"{mape:.2f}%"]],
                            fill_color='lightgray',
                            align='left',
                            font=dict(size=14))
                )])

                fig_erro.update_layout(title="Métricas de Erro do Modelo ARIMA")
                fig_erro.write_html(fr"F:\ST\st_{sup_id}_{regiao}_{iD}_erro.html")

                fig_pred = go.Figure()
                # Série original (Treino)
                fig_pred.add_trace(go.Scatter(x=train["week_dt"], y=train["units_qty"], 
                                        mode='lines', name='Treino', line=dict(color='blue')))
                # Valores de teste
                fig_pred.add_trace(go.Scatter(x=test["week_dt"], y=test["units_qty"], 
                                        mode='lines', name='Teste', line=dict(color='green')))
                # Previsões do modelo
                fig_pred.add_trace(go.Scatter(x=test["week_dt"], y=prediction, 
                                        mode='lines', name='Previsão', 
                                        line=dict(color='red', dash='dot')))
                fig_pred.update_layout(title="Previsão com ARIMA",
                                xaxis_title="Data",
                                yaxis_title="Valor",
                                template="plotly_white")
                fig_pred.write_html(fr"F:\ST\st_{sup_id}_{regiao}_{iD}_pred.html")
                
                try:
                    del kw_stat
                except:
                    pass
                try:
                    del kw_p
                except:
                    pass
                try:
                    del mk_test
                except:
                    pass
                try:
                    del adfuller_p
                except:
                    pass
                try:
                    del a
                except:
                    pass
                try:
                    del model
                except:
                    pass
                gc.collect()