In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [4]:
df = pd.read_csv('UNRATE.csv',header=None)
df.columns = ['Month','Unrate']
df['Month'] = pd.to_datetime(df['Month'],format ='%d-%m-%Y')

In [5]:
df.head()

Unnamed: 0,Month,Unrate
0,1948-01-01,3.4
1,1948-02-01,3.8
2,1948-03-01,4.0
3,1948-04-01,3.9
4,1948-05-01,3.5


In [7]:
df = df.set_index('Month')
df

Unnamed: 0_level_0,Unrate
Month,Unnamed: 1_level_1
1948-01-01,3.4
1948-02-01,3.8
1948-03-01,4.0
1948-04-01,3.9
1948-05-01,3.5
...,...
2019-11-01,3.5
2019-12-01,3.5
2020-01-01,3.6
2020-02-01,3.5


In [8]:
## splitting the dataset
train_len = 612
train = df[:train_len]
test = df[train_len:]

In [9]:
!pip install statsmodels==0.10.1

Collecting statsmodels==0.10.1
  Downloading statsmodels-0.10.1.tar.gz (14.1 MB)
     ---------------------------------------- 14.1/14.1 MB 1.1 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'error'


  error: subprocess-exited-with-error
  
  python setup.py egg_info did not run successfully.
  exit code: 1
  
  [61 lines of output]
    req_ver = LooseVersion(SETUP_REQUIREMENTS[key])
    ver = LooseVersion(mod.__version__)
    req_ver = LooseVersion(SETUP_REQUIREMENTS[key])
  
  Error compiling Cython file:
  ------------------------------------------------------------
  ...
      kfilter._tmp2[0] = scalar * kfilter._forecast_error[0]
      blas.dcopy(&model._k_endogstates, model._design, &inc, kfilter._tmp3, &inc)
      blas.dscal(&model._k_endogstates, &scalar, kfilter._tmp3, &inc)
  
      if not (kfilter.conserve_memory & MEMORY_NO_STD_FORECAST > 0):
          kfilter._standardized_forecast_error[0] = kfilter._forecast_error[0] * scalar**0.5
                                                                               ^
  ------------------------------------------------------------
  
  statsmodels\tsa\statespace\_filters\_inversions.pyx:421:77: Cannot assign type 'npy_float64

In [10]:
## Boxcox transformation
from scipy.stats import boxcox
data_boxcox = pd.Series(boxcox(df['Unrate'],lmbda=0),index=df.index)

In [13]:
## differencing
data_boxcox_diff = data_boxcox.diff()
data_boxcox_diff

Month
1948-01-01         NaN
1948-02-01    0.111226
1948-03-01    0.051293
1948-04-01   -0.025318
1948-05-01   -0.108214
                ...   
2019-11-01   -0.028171
2019-12-01    0.000000
2020-01-01    0.028171
2020-02-01   -0.028171
2020-03-01    0.228842
Length: 867, dtype: float64

In [14]:
data_boxcox_diff.dropna(inplace=True)

### Splitting into train and test data

In [15]:
train_data_boxcox = data_boxcox[:train_len]
test_data_boxcox = data_boxcox[train_len:]

train_data_boxcox_diff = data_boxcox_diff[:train_len]
test_data_boxcox_diff = data_boxcox_diff[train_len:]

## Auto Regressive Model

In [41]:
from statsmodels.tsa.arima.model import ARIMA
model = ARIMA(train_data_boxcox_diff,order=(12,0,0))
model_fit = model.fit()
print(model_fit.params)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


const     0.000415
ar.L1     0.061048
ar.L2     0.162039
ar.L3     0.118992
ar.L4     0.090119
ar.L5     0.106859
ar.L6     0.007711
ar.L7    -0.030006
ar.L8     0.032072
ar.L9     0.007941
ar.L10   -0.125104
ar.L11    0.046844
ar.L12   -0.148100
sigma2    0.001472
dtype: float64


In [42]:
## recovering the original time series
y_hat_ar = data_boxcox_diff.copy()
y_hat_ar['ar_forecast_boxcox_diff'] = model_fit.predict(data_boxcox_diff.index.min(),data_boxcox_diff.index.max())
y_hat_ar['ar_forecast'] = np.exp(y_hat_ar['ar_forecast_boxcox_diff'].cumsum().add(data_boxcox[0]))

In [44]:
## rmse and mape values
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(np.mean(mean_squared_error(test['Unrate'],y_hat_ar['ar_forecast'][test.index.min():]))).round(2)
mape = np.round(np.mean(np.abs(test['Unrate'] - y_hat_ar['ar_forecast'][test.index.min():])/test['Unrate']*100),2)
results = pd.DataFrame({
    'Method':['AR Method'],
    'RMSE':rmse,
    'MAPE':mape
})
results

Unnamed: 0,Method,RMSE,MAPE
0,AR Method,2.16,21.88


In [45]:
## Predicting the unemployment reate of June 1948
y_hat_ar['ar_forecast'].head()

1948-02-01    3.401410
1948-03-01    3.452986
1948-04-01    3.559521
1948-05-01    3.651311
1948-06-01    3.670327
Freq: MS, Name: predicted_mean, dtype: float64

### Moving Average

In [46]:
from statsmodels.tsa.arima.model import ARIMA
model = ARIMA(train_data_boxcox_diff,order=(0,0,5))
model_fit = model.fit()

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [47]:
## recovering original series
y_hat_ma = data_boxcox_diff.copy()
y_hat_ma['ma_forecast_boxcox_diff'] = model_fit.predict(data_boxcox_diff.index.min(),data_boxcox_diff.index.max())
y_hat_ma['ma_forecast'] = np.exp(y_hat_ma['ma_forecast_boxcox_diff'].cumsum().add(data_boxcox[0]))

In [49]:
## rmse and mape values
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(np.mean(mean_squared_error(test['Unrate'],y_hat_ma['ma_forecast'][test.index.min():]))).round(2)
mape = np.round(np.mean(np.abs(test['Unrate'] - y_hat_ma['ma_forecast'][test.index.min():])/test['Unrate']*100),2)
results.loc[len(results)] = ['MA Method',rmse,mape]

In [50]:
results

Unnamed: 0,Method,RMSE,MAPE
0,AR Method,2.16,21.88
1,MA Method,2.11,21.67


In [54]:
## unemployment rate for 1948 June
y_hat_ma['ma_forecast'].loc['1948-06-01']

3.6104083910894076

### ARIMA

In [55]:
model = ARIMA(train_data_boxcox,order= (12,1,5))
model_fit = model.fit()

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [56]:
y_hat_arima = data_boxcox_diff.copy()
y_hat_arima['arima_forecast_boxcox_diff'] = model_fit.predict(data_boxcox_diff.index.min(),data_boxcox_diff.index.max())
y_hat_arima['arima_forecast'] = np.exp(y_hat_arima['arima_forecast_boxcox_diff'])

In [57]:
## rmse and mape values
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(np.mean(mean_squared_error(test['Unrate'],y_hat_arima['arima_forecast'][test.index.min():]))).round(2)
mape = np.round(np.mean(np.abs(test['Unrate'] - y_hat_arima['arima_forecast'][test.index.min():])/test['Unrate']*100),2)
results.loc[len(results)] = ['ARIMA Method',rmse,mape]
results

Unnamed: 0,Method,RMSE,MAPE
0,AR Method,2.16,21.88
1,MA Method,2.11,21.67
2,ARIMA Method,2.28,22.58


### SARIMA

In [58]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
model = SARIMAX(train_data_boxcox,order=(2,1,4),seasonal_order=(2,1,4,12))
model_fit = model.fit()

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [61]:
y_hat_sarima = data_boxcox_diff.copy()
y_hat_sarima['sarima_forecast_boxcox_diff'] = model_fit.predict(data_boxcox_diff.index.min(),data_boxcox_diff.index.max())
y_hat_sarima['sarima_forecast'] = np.exp(y_hat_sarima['sarima_forecast_boxcox_diff'])

In [62]:
## rmse and mape values
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(np.mean(mean_squared_error(test['Unrate'],y_hat_sarima['sarima_forecast'][test.index.min():]))).round(2)
mape = np.round(np.mean(np.abs(test['Unrate'] - y_hat_sarima['sarima_forecast'][test.index.min():])/test['Unrate']*100),2)
results.loc[len(results)] = ['SARIMA Method',rmse,mape]
results

Unnamed: 0,Method,RMSE,MAPE
0,AR Method,2.16,21.88
1,MA Method,2.11,21.67
2,ARIMA Method,2.28,22.58
3,SARIMA Method,2.25,22.1
