In [68]:
import warnings
warnings.filterwarnings("ignore")

# DATA MANIPULATION
import numpy as np # linear algebra
import random as rd # generating random numbers
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import datetime # manipulating date formats
from operator import add # elementwise addition

# VIZUALIZATION
import matplotlib.pyplot as plt # basic plotting
import seaborn # for prettier plots

# TIME SERIES
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller, acf, pacf,arma_order_select_ic

In [42]:
train=pd.read_csv('../input/train.csv')
train['date']=train.date.apply(lambda x:datetime.datetime.strptime(x, '%Y-%m-%d'))

In [41]:
test=pd.read_csv('../input/test.csv')
test['date']=test.date.apply(lambda x:datetime.datetime.strptime(x, '%Y-%m-%d'))

In [53]:
ts=train.loc[(train['store_nbr']==47) & (train['item_nbr']==1503844),['date','unit_sales']].set_index('date')

In [57]:
ts.head()

Unnamed: 0_level_0,unit_sales
date,Unnamed: 1_level_1
2016-08-16,266.48
2016-08-17,397.368
2016-08-18,252.541
2016-08-19,220.316
2016-08-20,337.98


In [47]:
ts_test=test.loc[(train['store_nbr']==47) & (train['item_nbr']==1503844),['date','unit_sales']].set_index('date')

In [61]:
ts_test=ts_test.unit_sales.astype('float')

In [58]:
ts=ts.unit_sales.astype('float')

In [59]:
def test_stationarity(timeseries):
    
    #Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print (dfoutput)

test_stationarity(ts)

Results of Dickey-Fuller Test:
Test Statistic                  -4.558891
p-value                          0.000154
#Lags Used                      14.000000
Number of Observations Used    327.000000
Critical Value (1%)             -3.450507
Critical Value (5%)             -2.870420
Critical Value (10%)            -2.571501
dtype: float64


In [69]:
result = arma_order_select_ic(ts,max_ar=10, max_ma=10, ic=['aic','bic'], trend='c', fit_kw=dict(method='css',maxiter=500))
print('The bic prescribes these (p,q) parameters : {}'.format(result.bic_min_order))
print('The aic prescribes these (p,q) parameters : {}'.format(result.aic_min_order))
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.title('bic results')
seaborn.heatmap(result.bic);
plt.subplot(1,2,2)
plt.title('aic results')
seaborn.heatmap(result.aic);

The bic prescribes these (p,q) parameters : (10, 2)
The aic prescribes these (p,q) parameters : (9, 10)


In [70]:
pdq=(4,0,4)
model = ARIMA(ts, order = pdq, freq='W')
model_fit = model.fit(disp=False,method='css',maxiter=100)

In [71]:
history = [x for x in ts]
predictions = list()

print('Starting the ARIMA predictions...')
print('\n')
for t in range(len(ts_test)):
    try:
        model = ARIMA(history, order = pdq, freq='W');
        model_fit = model.fit(disp=0);
        output = model_fit.forecast()
        yhat = output[0]
        predictions.append(float(yhat))
        obs = ts_test[t]
        history.append(obs)
        print('all good...')
    except:
        print('caught exception: passing')
        pass
print('Predictions finished.\n')
    
predictions_series = pd.Series(predictions, index = ts_test.index)

Starting the ARIMA predictions...


all good...
all good...
all good...
all good...
all good...
all good...
all good...
all good...
all good...
all good...
all good...
all good...
all good...
all good...
all good...
all good...
all good...
Predictions finished.



In [72]:
predictions_series.head()

date
2017-07-26    377.388617
2017-07-27    349.442754
2017-07-28    218.644509
2017-07-29    321.448092
2017-07-30    475.795349
dtype: float64

In [73]:
predictions_series.to_csv('predictions.csv')