In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("bmes.csv", encoding = "ISO-8859-1", index_col=0)

In [3]:
df.head()

Unnamed: 0_level_0,PRODUTO,QUANT_VENDIDA,DIASEMANA,FERIADO_PROXIMO
DATA_VENDA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
05/01/15,627,824,SEGUNDA-FEIRA,
05/01/15,191,657,SEGUNDA-FEIRA,
05/01/15,399,414,SEGUNDA-FEIRA,
05/01/15,1419,274,SEGUNDA-FEIRA,
05/01/15,801,262,SEGUNDA-FEIRA,


In [4]:
df.shape

(57670, 4)

### Informações Sobre as Features

* DATA_VENDA = data de emissão da nota fiscal
* PRODUTO = Código do Produto
* QUANT_VENDIDA = Quantidade de produto em caixas que foram vendidas por dia.
* DIASEMANA = Dia da semana
* FERIADO_PROXIMO = Se existe feriado próximo (2 dias antes, 2 dias depois da data da venda) 1 = há feriado, 0 = não há feriado

In [5]:
df.isnull().sum() #O quanto temos de dados nulos?

PRODUTO                0
QUANT_VENDIDA          0
DIASEMANA              0
FERIADO_PROXIMO    51487
dtype: int64

In [6]:
df.FERIADO_PROXIMO.unique() 

array([nan,  1.])

In [7]:
#Onde FERIADO_PROXIMO for nan, será substituído por 0
df.fillna(0, inplace = True)

In [8]:
df.sort_values('QUANT_VENDIDA', ascending=False)

Unnamed: 0_level_0,PRODUTO,QUANT_VENDIDA,DIASEMANA,FERIADO_PROXIMO
DATA_VENDA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
13/06/16,801,6564,SEGUNDA-FEIRA,0.0
13/02/15,627,4729,SEXTA-FEIRA,0.0
24/11/17,399,4640,SEXTA-FEIRA,0.0
02/10/17,238,3905,SEGUNDA-FEIRA,0.0
06/04/18,191,3671,SEXTA-FEIRA,0.0
02/03/18,627,3613,SEXTA-FEIRA,0.0
23/02/18,627,3522,SEXTA-FEIRA,0.0
29/08/17,627,3507,TERÇA-FEIRA,0.0
10/04/18,627,3461,TERÇA-FEIRA,0.0
06/11/17,627,3460,SEGUNDA-FEIRA,0.0


In [9]:
df = df[(df.PRODUTO == 627)]

In [10]:
df = df.iloc[ : , :2 ]

In [11]:
df.head()

Unnamed: 0_level_0,PRODUTO,QUANT_VENDIDA
DATA_VENDA,Unnamed: 1_level_1,Unnamed: 2_level_1
05/01/15,627,824
06/01/15,627,1528
07/01/15,627,966
08/01/15,627,1425
09/01/15,627,2394


In [12]:
df = df.drop(['PRODUTO'], axis=1)

In [13]:
df.index = pd.to_datetime(df.index, dayfirst=True)

In [14]:
df.tail()

Unnamed: 0_level_0,QUANT_VENDIDA
DATA_VENDA,Unnamed: 1_level_1
2018-05-14,2358
2018-05-15,1644
2018-05-16,1926
2018-05-17,1855
2018-05-18,50


In [15]:
df.shape

(898, 1)

In [16]:
df.head()

Unnamed: 0_level_0,QUANT_VENDIDA
DATA_VENDA,Unnamed: 1_level_1
2015-01-05,824
2015-01-06,1528
2015-01-07,966
2015-01-08,1425
2015-01-09,2394


In [17]:
df = df.sort_index()

In [18]:
import plotly as ply
import cufflinks as cf
#from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
#init_notebook_mode(connected=True)
ply.tools.set_credentials_file(username='rpa1tera', api_key='f29ZE6Zff9Skz065lHHB')

In [19]:
df.iplot(title="Vendas Jan 2015--Mai 2018")

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~rpa1tera/0 or inside your plot.ly account where it is named 'Vendas Jan 2015--Mai 2018'


In [20]:
from plotly.plotly import iplot_mpl
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(df, freq=30)

In [21]:
fig = result.plot()
iplot_mpl(fig)

In [22]:
from pyramid.arima import auto_arima

In [23]:
stepwise_model = auto_arima(df, start_p=1, start_q=1,
                           max_p=3, max_q=3, m=12,
                           start_P=0, seasonal=True,
                           d=1, D=1, trace=True,
                           error_action='ignore',  
                           suppress_warnings=True, 
                           stepwise=True)

Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=14167.982, BIC=14191.983, Fit time=5.865 seconds
Fit ARIMA: order=(0, 1, 0) seasonal_order=(0, 1, 0, 12); AIC=nan, BIC=nan, Fit time=nan seconds
Fit ARIMA: order=(1, 1, 0) seasonal_order=(1, 1, 0, 12); AIC=14857.664, BIC=14876.865, Fit time=2.700 seconds
Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=14184.753, BIC=14203.954, Fit time=3.503 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 1, 12); AIC=14171.680, BIC=14200.481, Fit time=6.102 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 0, 12); AIC=14683.132, BIC=14702.333, Fit time=1.000 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=14173.673, BIC=14202.474, Fit time=14.888 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=14173.518, BIC=14207.120, Fit time=17.450 seconds
Fit ARIMA: order=(2, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=14165.757, BIC=14194.559, Fit time=5.369 seconds
Fit ARIMA: order=(2, 1

In [24]:
print(stepwise_model.aic())

14165.757479627868


In [25]:
train = df.loc['2015-01-01':'2016-12-30']
test = df.loc['2017-01-01':]

In [30]:
stepwise_model.fit(train)

ARIMA(callback=None, disp=0, maxiter=50, method=None, order=(2, 1, 1),
   out_of_sample_size=0, scoring='mse', scoring_args={},
   seasonal_order=(0, 1, 1, 12), solver='lbfgs', start_params=None,

In [36]:
future_forecast = stepwise_model.predict(n_periods=370)

In [38]:
print(future_forecast)

[1391.92195645 1480.97454512 1440.55166799 1434.66449884 1518.60326905
 1396.44743006 1592.28365805 1409.77573877 1695.40781016 1397.08844378
 1542.93321889 1362.24129438 1406.31663047 1518.96842476 1464.54130335
 1461.37360096 1544.56939441 1422.69249994 1618.55634701 1436.14367164
 1721.85359796 1423.61666025 1569.54267462 1388.93230045 1433.08910574
 1545.82239042 1491.47675387 1488.39053778 1571.66781717 1449.87240872
 1645.8177418  1463.48655243 1749.27796476 1451.12251306 1597.13001344
 1416.60112526 1460.83941656 1573.65418725 1519.39003671 1516.38530663
 1599.74407202 1478.03014958 1674.05696867 1491.8072653  1777.68016364
 1479.60619795 1625.69518433 1445.24778217 1489.56755947 1602.46381617
 1548.28115164 1545.35790756 1628.79815896 1507.16572252 1703.27402762
 1521.10581026 1807.06019461 1509.06771492 1655.23818731 1474.87227115
 1519.27353446 1632.25127716 1578.15009864 1575.30834057 1658.83007798
 1537.27912755 1733.46891865 1551.3821873  1837.41805765 1539.50706397
 1685.

In [39]:
future_forecast = pd.DataFrame(future_forecast, index = test.index,columns=['Prediction'])

In [40]:
pd.concat([test,future_forecast],axis=1).iplot()