In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from pandas import DataFrame
from pandas import concat

from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score

from statsmodels.tsa.ar_model import AutoReg
from statsmodels.graphics.gofplots import qqplot

plt.rcParams.update({'figure.figsize':(16,3), 'figure.dpi':100})

In [None]:
series = pd.read_excel('spare-parts-sales.xlsx', header=0, names=['item', 'sales', 'cost', 'date'], index_col=None, parse_dates=True, squeeze=True)

series['date'] = pd.to_datetime(series['date'])
date_filter = (series['date'] >= '2014-01-01') & (series['date'] <= '2016-11-30')
series = series[date_filter]

sku = '98550154'
series = series[series.item == sku]

series.drop(series.columns[[0, 2]], axis=1, inplace=True)

In [None]:
series = (series.groupby(pd.Grouper(key='date',freq='w')).sum())

In [None]:
# criação do dataset com lags
values = DataFrame(series.values)
dataframe = concat([values.shift(1), values], axis=1)
dataframe.columns = ['t', 't+1']

# dividir em conjuntos de treinamento e teste
X = dataframe.values
train_size = int(len(X) * 0.66)
train, test = X[1:train_size], X[train_size:]
train_X, train_y = train[:,0], train[:,1]
test_X, test_y = test[:,0], test[:,1]

# naive model no conjunto de teste
train_pred = [x for x in train_X]

# calcular residuos
train_resid = [train_y[i]-train_pred[i] for i in range(len(train_pred))]

# modelar os resíduos do conjunto de treinamento
window = 15
model = AutoReg(train_resid, lags=window, old_names=True)
model_fit = model.fit()
coef = model_fit.params

# walk forward ao longo do tempo, no conjunto de teste
history = train_resid[len(train_resid)-window:]
history = [history[i] for i in range(len(history))]
predictions = list()
for t in range(len(test_y)):
    # naive
    yhat = test_X[t]
    error = test_y[t] - yhat
    # predição do erro
    length = len(history)
    lag = [history[i] for i in range(length-window,length)]
    pred_error = coef[0]
    for d in range(window):
        pred_error += coef[d+1] * lag[window-d-1]
    # correção da predição
    yhat = yhat + pred_error
    predictions.append(yhat)
    history.append(error)

In [None]:
# Avaliação do Modelo
MAPE = mean_absolute_percentage_error(test_y, predictions)
MAE = mean_absolute_error(test_y, predictions)
MSE = mean_squared_error(test_y, predictions)
R2 = r2_score(test_y,predictions)
accuracy = 100 - MAPE
print('Performance do Modelo')
print('----------------------------')
#print('Coeficiente de Determinação: {:0.2}.'.format(R2))
print('Acurácia = {:0.2f} %.'.format(accuracy))
print('MAPE = {:0.2f} %.'.format(MAPE))
print('MAE = {:0.2f} Unidades.'.format(MAE))
print('MSE = {:0.2f}.'.format(MSE))
print('RMSE = {:0.2f}.'.format(sqrt(MSE)))

In [None]:
# Avaliação dos Resíduos
residuals = [test_y[i]-predictions[i] for i in range(len(predictions))]
residuals = DataFrame(residuals)
print('Descrição dos Resíduos')
print(residuals.describe())

In [None]:
# Gráfico com os valores de treino e teste
original=np.concatenate((train_X, test_X), axis=0)
plt.plot(original, label='Original')
x = range((len(train_X)), (len(original)))
plt.plot(x, predictions, label='Predicted')
plt.title('Vendas de Peças de Reposição de 2014 a 2016')
plt.xlabel('Meses')
plt.ylabel('Quantidade Vendas')
plt.legend(loc='best')
plt.show()