In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

In [5]:
df=  pd.read_csv('sales-data.csv',header=None)
df.columns =['Month','Sales']
df['Month'] = pd.to_datetime(df['Month'],format='%Y-%m')
df.head()

Unnamed: 0,Month,Sales
0,1980-01-01,154
1,1980-02-01,96
2,1980-03-01,73
3,1980-04-01,49
4,1980-05-01,36


In [6]:
df=  df.set_index('Month')
df.head()

Unnamed: 0_level_0,Sales
Month,Unnamed: 1_level_1
1980-01-01,154
1980-02-01,96
1980-03-01,73
1980-04-01,49
1980-05-01,36


### Boxcox transformation

In [9]:
from scipy.stats import boxcox
data_boxcox = pd.DataFrame(boxcox(df['Sales'],lmbda=0),index=df.index)
data_boxcox

Unnamed: 0_level_0,0
Month,Unnamed: 1_level_1
1980-01-01,5.036953
1980-02-01,4.564348
1980-03-01,4.290459
1980-04-01,3.891820
1980-05-01,3.583519
...,...
1986-01-01,6.442540
1986-02-01,5.730100
1986-03-01,5.780744
1986-04-01,5.513429


### 1st Differencing

In [11]:
data_boxcox_diff = data_boxcox.diff()
data_boxcox_diff.dropna(inplace=True)

In [20]:
data_boxcox_diff.shape

(76, 1)

In [26]:
#splitting into train and test data
train_len=60
train = df[:train_len]
test = df[train_len:]

train_data_boxcox = data_boxcox[:train_len]
test_data_boxcox = data_boxcox[train_len:]


train_data_boxcox_diff = data_boxcox_diff[:train_len-1]
test_data_boxcox_diff = data_boxcox_diff[train_len-1:]

### ARMA Method

In [36]:
from statsmodels.tsa.arima.model import ARIMA
model = ARIMA(train_data_boxcox_diff,order=(4,0,3))
model_fit = model.fit()

  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


In [37]:
## recovering original time series
y_hat_arma = data_boxcox_diff.copy()
y_hat_arma['arma_boxcox_diff'] = model_fit.predict(data_boxcox_diff.index.min(),data_boxcox_diff.index.max())
y_hat_arma['arma_forecast'] = np.exp(y_hat_arma['arma_boxcox_diff'].cumsum().add(data_boxcox[0]))

In [38]:
## rmse and mape values
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(np.mean(mean_squared_error(test['Sales'],y_hat_arma['arma_forecast'][test.index.min():]))).round(2)
mape = np.round((np.mean(np.abs(test['Sales']-y_hat_arma['arma_forecast'][test.index.min():])/test['Sales']))*100,2)
results = pd.DataFrame({
    'Method':['ARMA Method'],
    'MAPE':mape,
    'RMSE':rmse
})
results

Unnamed: 0,Method,MAPE,RMSE
0,ARMA Method,566.59,3828.08
