In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from pandas import DataFrame
from statsmodels.tsa.arima_model import ARIMA
import pmdarima as pm

from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score

import seaborn as sns
sns.set()

import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({'figure.figsize':(16,3), 'figure.dpi':100})

In [None]:
series = pd.read_excel('spare-parts-sales.xlsx', header=0, names=['item', 'sales', 'cost', 'date'], index_col=None, parse_dates=True, squeeze=True)

series['date'] = pd.to_datetime(series['date'])
date_filter = (series['date'] >= '2014-01-01') & (series['date'] <= '2016-11-30')
series = series[date_filter]

sku = '98550154'
series = series[series.item == sku]

series.drop(series.columns[[0, 2]], axis=1, inplace=True)

In [None]:
series = (series.groupby(pd.Grouper(key='date',freq='w')).sum()).reset_index()

In [None]:
# Tratando a sazonalidade com diferenciação da série
series_diff = series['sales'] - series['sales'].diff()

In [None]:
# Dividir em conjuntos de treinamento e teste
train_size = int(len(series_diff) * 0.66)
train, test = series_diff[0:train_size], series_diff[train_size:]
train = train.dropna()
test = test.dropna()

In [None]:
# Ajustando os parâmetros com auto arima
model = pm.auto_arima(train, start_p=0, d=1, start_q=0, test=("kpss", "adf"), max_p=5, max_q=5, m=52, start_P=0,
                      D=1, start_Q=0, max_P=5, max_D=5, max_Q=5, stepwise = True, trace=True,
                      error_action='ignore', suppress_warnings=True,  random_state=20, n_fits=50)

In [None]:
# Executando as previsões
predictions = pd.DataFrame(model.predict(n_periods = int(len(test))), index=test.index)
predictions.columns = ['prediction_sales']

In [None]:
# Avaliação do Modelo
MAPE = mean_absolute_percentage_error(test, predictions)
MAE = mean_absolute_error(test, predictions)
MSE = mean_squared_error(test, predictions)
R2 = r2_score(test,predictions)
accuracy = 100 - MAPE
print('Performance do Modelo')
print('----------------------------')
#print('Coeficiente de Determinação: {:0.2}.'.format(R2))
print('Acurácia = {:0.2f} %.'.format(accuracy))
print('MAPE = {:0.2f} %.'.format(MAPE))
print('MAE = {:0.2f} Unidades.'.format(MAE))
print('MSE = {:0.2f}.'.format(MSE))
print('RMSE = {:0.2f}.'.format(sqrt(MSE)))

In [None]:
residuals = [test.values[i]-predictions.values[i] for i in range(len(predictions))]
residuals = DataFrame(residuals)
print('Descrição dos Resíduos')
print(residuals.describe())

In [None]:
plt.plot(series['sales'], label='Original')
plt.plot(predictions, label='Predicted')
plt.legend(loc='best')
plt.show()