##### Moving average

In [1]:
import pandas as pd

df = pd.read_csv(r'C:\E\E_drive_copy\sample_data\ts_ud\Resources\Data + Code\daily-min-temperatures.csv', parse_dates=[0])

In [2]:
df.head()

Unnamed: 0,Date,Temp
0,1981-01-01,20.7
1,1981-01-02,17.9
2,1981-01-03,18.8
3,1981-01-04,14.6
4,1981-01-05,15.8


In [3]:
df.shape

(3650, 2)

In [4]:
df['Temp_shift1'] = df['Temp'].shift(1)

In [5]:
df.head()

Unnamed: 0,Date,Temp,Temp_shift1
0,1981-01-01,20.7,
1,1981-01-02,17.9,20.7
2,1981-01-03,18.8,17.9
3,1981-01-04,14.6,18.8
4,1981-01-05,15.8,14.6


In [6]:
df['1st_diff'] = df['Temp'] - df['Temp_shift1']

In [7]:
df.head()

# this is naive forecasting and residual

Unnamed: 0,Date,Temp,Temp_shift1,1st_diff
0,1981-01-01,20.7,,
1,1981-01-02,17.9,20.7,-2.8
2,1981-01-03,18.8,17.9,0.9
3,1981-01-04,14.6,18.8,-4.2
4,1981-01-05,15.8,14.6,1.2


In [8]:
# train and test from 1st difference (ie residual of naive forecasting)

train, test = df['1st_diff'][1:df.shape[0]-7], df['1st_diff'][df.shape[0]-7 :]

In [9]:
train

1      -2.8
2       0.9
3      -4.2
4       1.2
5       0.0
       ... 
3638    0.7
3639   -2.3
3640    0.1
3641    0.7
3642   -3.9
Name: 1st_diff, Length: 3642, dtype: float64

In [10]:
test

3643    2.9
3644    1.7
3645   -0.6
3646   -0.4
3647   -0.1
3648    2.2
3649   -2.7
Name: 1st_diff, dtype: float64

In [11]:
# checking AR model

from statsmodels.tsa.ar_model import AutoReg

In [13]:
ar_model = AutoReg(train, lags=1).fit()



In [14]:
ar_model.params

intercept     -0.002507
1st_diff.L1   -0.180678
dtype: float64

In [17]:
# predicting for test data

pred = ar_model.predict(start=len(train), end=len(train)+len(test))

In [19]:
pred[:-1]  # forecasted residuels

3643   -0.129368
3644    0.020867
3645   -0.006277
3646   -0.001373
3647   -0.002259
3648   -0.002099
3649   -0.002128
dtype: float64

In [34]:
df['Temp_shift1'][df.shape[0]-7:]

3643    10.0
3644    12.9
3645    14.6
3646    14.0
3647    13.6
3648    13.5
3649    15.7
Name: Temp_shift1, dtype: float64

In [31]:
# to get prediction using MA model, add this predicted residual to naive bayes prediction (shift 1 data)

pred_dif = df['Temp_shift1'][df.shape[0]-7:] + pred[:-1]

In [32]:
pred_dif

3643     9.870632
3644    12.920867
3645    14.593723
3646    13.998627
3647    13.597741
3648    13.497901
3649    15.697872
dtype: float64

In [33]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [39]:
mean_absolute_error(df['Temp'][-7:], pred_dif)

1.5283661294507185

In [40]:
(mean_squared_error(df['Temp'][-7:], pred_dif))**0.5

1.8763671728296074