In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from pandas import DataFrame
from pandas import concat

from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score

from statsmodels.tsa.ar_model import AutoReg

import seaborn as sns
sns.set()

import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({'figure.figsize':(16,3), 'figure.dpi':100})

In [None]:
series = pd.read_excel('spare-parts-sales.xlsx', header=0, names=['item', 'sales', 'cost', 'date'], index_col=None, parse_dates=True, squeeze=True)

series['date'] = pd.to_datetime(series['date'])
date_filter = (series['date'] >= '2014-01-01') & (series['date'] <= '2016-11-30')
series = series[date_filter]

sku = '98550154'
series = series[series.item == sku]

series.drop(series.columns[[0, 2]], axis=1, inplace=True)

In [None]:
series = (series.groupby(pd.Grouper(key='date',freq='w')).sum())

In [None]:
# criar o dataset com Lags
values = DataFrame(series.values)
dataframe = concat([values.shift(1), values], axis=1)
dataframe.columns = ['t', 't+1']

# Dividir em conjuntos de treinamento e teste
X = dataframe.values
train_size = int(len(X) * 0.66)
train, test = X[0:train_size], X[train_size:]
train_X, train_y = train[:,0], train[:,1]
test_X, test_y = test[:,0], test[:,1]

# naive model
def model_naive(x):
    return x

In [None]:
# validação walk-forward
predictions = list()
for x in test_X:
    yhat = model_naive(x)
    predictions.append(yhat)

In [None]:
# Avaliação do Modelo
MAPE = mean_absolute_percentage_error(test_y, predictions)
MAE = mean_absolute_error(test_y, predictions)
MSE = mean_squared_error(test_y, predictions)
R2 = r2_score(test_y,predictions)
accuracy = 100 - MAPE
print('Performance do Modelo')
print('----------------------------')
#print('Coeficiente de Determinação: {:0.2}.'.format(R2))
print('Acurácia = {:0.2f} %.'.format(accuracy))
print('MAPE = {:0.2f} %.'.format(MAPE))
print('MAE = {:0.2f} Unidades.'.format(MAE))
print('MSE = {:0.2f}.'.format(MSE))
print('RMSE = {:0.2f}.'.format(sqrt(MSE)))

In [None]:
# Avaliação dos Resíduos
residuals = [test_y[i]-predictions[i] for i in range(len(predictions))]
residuals = DataFrame(residuals)
print(residuals.describe())

In [None]:
# Gráfico com os valores de treino e teste
original=np.concatenate((train_X, test_X), axis=0)
plt.plot(original, label='Original')
x = range((len(train_X)), (len(original)))
plt.plot(x, predictions, label='Predicted')
plt.title('Vendas de Peças de Reposição de 2014 a 2016')
plt.xlabel('Meses')
plt.ylabel('Quantidade Vendas')
plt.legend(loc='best')
plt.show()