In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from pandas import DataFrame
from pandas import concat

from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

import seaborn as sns
sns.set()

import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({'figure.figsize':(16,3), 'figure.dpi':100})

In [None]:
# Leitura do arquivo Excel
series = pd.read_excel('spare-parts-sales.xlsx', header=0, 
                       names=['item', 'sales', 'cost', 'date'], index_col=None, parse_dates=True, squeeze=True)

# Filtra o arquivo para os anos de 2014 a 2016
series['date'] = pd.to_datetime(series['date'])
date_filter = (series['date'] >= '2014-01-01') & (series['date'] <= '2016-11-30')
series = series[date_filter]

# Filtra para o item '98550154' (óleo de motor)
sku = '98550154'
series = series[series.item == sku]

# Exclui a coluna item e cost
series.drop(series.columns[[0, 2]], axis=1, inplace=True)

# Transforma para vendas semanais
series = (series.groupby(pd.Grouper(key='date',freq='W')).sum()).reset_index()

In [None]:
# transformar lista em formato de aprendizagem supervisionada
def series_to_supervised(data, n_in, n_out=1):
    df = DataFrame(data)
    cols = list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    # put it all together
    agg = concat(cols, axis=1)
    # drop rows with NaN values
    agg.dropna(inplace=True)
    return agg.values

In [None]:
df = (pd.DataFrame(series_to_supervised(series['sales'], 1)))

In [None]:
# Dividir em conjuntos de treinamento e teste
X = df.values

X = X.reshape((X.shape[0], X.shape[1], 1))

train_size = int(len(X) * 0.66)
train, test = X[0:train_size], X[train_size:]
train_X, train_y = train[:,0], train[:,1]
test_X, test_y = test[:,0], test[:,1]

In [None]:
# Construção da grade de parâmetros com base nos resultados da pesquisa aleatória 
param_grid = {
    'bootstrap': [True],
    'max_depth': [80, 90, 100, 110],
    'max_features': [0, 1],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000]
}
# Instanciando o modelo
rf = RandomForestRegressor()

# Iniciando o modelo de Grid Search
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, 
                          cv = 3, n_jobs = -1, verbose = 2)

In [None]:
# Ajustando os dados ao modelo
grid_search.fit(train_X, train_y.ravel())
grid_search.best_params_

In [None]:
def evaluate(model, test_features, test_labels):
    predictions = model.predict(test_y)
    MAPE = mean_absolute_percentage_error(test_y, predictions)
    MAE = mean_absolute_error(test_y, predictions)
    MSE = mean_squared_error(test_y, predictions)
    R2 = r2_score(test_y,predictions)
    accuracy = 100 - MAPE
    print('Performance do Modelo')
    print('----------------------------')
    #print('Coeficiente de Determinação: {:0.2}.'.format(R2))
    print('Acurácia = {:0.2f} %.'.format(accuracy))
    print('MAPE = {:0.2f} %.'.format(MAPE))
    print('MAE = {:0.2f} Unidades.'.format(MAE))
    print('MSE = {:0.2f}.'.format(MSE))
    print('RMSE = {:0.2f}.'.format(sqrt(MSE)))
    return accuracy

In [None]:
best_grid = grid_search.best_estimator_
grid_accuracy = evaluate(best_grid, test_X, test_y)

In [None]:
predictions = best_grid.predict(test_y)
residuals = [test_y[i]-predictions[i] for i in range(len(predictions))]
residuals = DataFrame(residuals)
print('Descrição dos Resíduos')
print(residuals.describe())

In [None]:
original=np.concatenate((train_X, test_X), axis=0)
plt.plot(original, label='Original')
x = range((len(train_X)), (len(original)))
plt.plot(x, predictions, label='Predicted')
plt.title('Vendas de Peças de Reposição de 2014 a 2016')
plt.xlabel('Meses')
plt.ylabel('Quantidade Vendas')
plt.legend(loc='best')
plt.show()