### MOVING AVERAGE Model
#### STEPS
1. Build a forecasting model (AR or Persistence Model)
2. Find residual/ Forecast errors
3. Build a forecast model on residuals
4. Use forecasted residuals to update the initial forecast

<b>*if initial forecasting in AR, this technique is called ARMA*<b>
    

In [3]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
df = pd.read_csv('daily-min-temperatures.csv', header=0, parse_dates=[0])


In [5]:
df.head()

Unnamed: 0,Date,Temp
0,1981-01-01,20.7
1,1981-01-02,17.9
2,1981-01-03,18.8
3,1981-01-04,14.6
4,1981-01-05,15.8


In [6]:
df['t'] = df.Temp.shift(1)

In [7]:
df['Resid'] = df.Temp - df.t

In [8]:
df.head()

Unnamed: 0,Date,Temp,t,Resid
0,1981-01-01,20.7,,
1,1981-01-02,17.9,20.7,-2.8
2,1981-01-03,18.8,17.9,0.9
3,1981-01-04,14.6,18.8,-4.2
4,1981-01-05,15.8,14.6,1.2


In [9]:
train, test = df.Resid[1:df.shape[0]-7], df.Resid[df.shape[0]-7:]

In [10]:
train.head()

1   -2.8
2    0.9
3   -4.2
4    1.2
5    0.0
Name: Resid, dtype: float64

In [11]:
test.head()

3643    2.9
3644    1.7
3645   -0.6
3646   -0.4
3647   -0.1
Name: Resid, dtype: float64

In [20]:
from statsmodels.tsa.ar_model import AutoReg
model = AutoReg(train, lags=29)
model_fit = model.fit()

In [21]:
model_fit.params

const       -0.006415
Resid.L1    -0.407139
Resid.L2    -0.496194
Resid.L3    -0.445747
Resid.L4    -0.403706
Resid.L5    -0.362528
Resid.L6    -0.334810
Resid.L7    -0.288468
Resid.L8    -0.274281
Resid.L9    -0.235333
Resid.L10   -0.234799
Resid.L11   -0.228532
Resid.L12   -0.208608
Resid.L13   -0.180324
Resid.L14   -0.178425
Resid.L15   -0.152265
Resid.L16   -0.143617
Resid.L17   -0.132501
Resid.L18   -0.097075
Resid.L19   -0.104927
Resid.L20   -0.079371
Resid.L21   -0.059348
Resid.L22   -0.045921
Resid.L23   -0.063197
Resid.L24   -0.063441
Resid.L25   -0.045061
Resid.L26   -0.023578
Resid.L27   -0.011882
Resid.L28   -0.003789
Resid.L29   -0.003422
dtype: float64

In [44]:
pred_resid = model_fit.predict(start=len(train), end=len(train)+len(test)-1)

In [45]:
pred_resid

3643    1.264192
3644    0.510869
3645   -0.251687
3646   -0.122054
3647    0.077509
3648    0.055600
3649         NaN
dtype: float64

In [46]:
predictions = df.t[df.shape[0]-7] + pred_resid

In [47]:
predictions

3643    11.264192
3644    10.510869
3645     9.748313
3646     9.877946
3647    10.077509
3648    10.055600
3649          NaN
dtype: float64

In [52]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(test[:-1], predictions[:-1])
mse # *** #

87.60117245144413

ARIMA Model