## ARMA Modeling: Model Selection

**Functions**

`sm.tsa.SARIMAX`

### Exercise 68
Perform a model selection exercise on the term premium using

1. General-to-Specific
2. Specific-to-General
3. Minimizing an Information Criteria

In [1]:
import pandas as pd
data = pd.read_hdf("data/term-premium.h5", "term_premium")
term = data["TERM"]

In [2]:
import statsmodels.tsa.api as tsa

ic = {}
for ar in range(5):
    for ma in range(5):
        print(f"AR: {ar}, MA: {ma}")
        mod = tsa.SARIMAX(term, order=(ar, 0, ma), trend="c")
        res = mod.fit()
        ic[(ar, ma)] = [res.aic, res.bic]

ic = pd.DataFrame(ic, index=["AIC", "BIC"]).T
ic.index = ic.index.set_names(["AR", "MA"])
ic

AR: 0, MA: 0
AR: 0, MA: 1
AR: 0, MA: 2


  warn('Non-invertible starting MA parameters found.'


AR: 0, MA: 3


AR: 0, MA: 4


AR: 1, MA: 0


AR: 1, MA: 1


AR: 1, MA: 2


AR: 1, MA: 3


AR: 1, MA: 4


AR: 2, MA: 0


AR: 2, MA: 1


AR: 2, MA: 2


AR: 2, MA: 3


AR: 2, MA: 4


AR: 3, MA: 0


AR: 3, MA: 1


AR: 3, MA: 2


  warn('Non-stationary starting autoregressive parameters'




AR: 3, MA: 3




AR: 3, MA: 4




AR: 4, MA: 0


AR: 4, MA: 1


AR: 4, MA: 2




AR: 4, MA: 3




AR: 4, MA: 4




Unnamed: 0_level_0,Unnamed: 1_level_0,AIC,BIC
AR,MA,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,2427.365154,2436.764155
0,1,1451.02888,1465.127381
0,2,890.205626,909.003627
0,3,545.806184,569.303685
0,4,382.344827,410.54183
1,0,88.835331,102.933832
1,1,-15.051811,3.74619
1,2,-17.770684,5.726818
1,3,-17.682174,10.514828
1,4,-18.859514,14.036988


In [3]:
aic = ic.sort_values("AIC")
ar, ma = aic.index[0]
print(f"AIC selects AR {ar}, MA {ma}")

bic = ic.sort_values("BIC")
ar, ma = bic.index[0]
print(f"BIC selects AR {ar}, MA {ma}")

AIC selects AR 3, MA 3
BIC selects AR 1, MA 1


In [4]:
res = tsa.SARIMAX(term, order=(4, 0, 4), trend="c").fit()
res.tvalues



intercept     3.054304
ar.L1         2.714447
ar.L2        -1.611729
ar.L3         1.422727
ar.L4         3.512530
ma.L1         5.386445
ma.L2         5.830310
ma.L3         4.480061
ma.L4         1.493538
sigma2       36.659584
dtype: float64

In [5]:
gts_res = tsa.SARIMAX(term, order=(4, 0, 3), trend="c").fit()
gts_res.summary()

  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'




0,1,2,3
Dep. Variable:,TERM,No. Observations:,812.0
Model:,"SARIMAX(4, 0, 3)",Log Likelihood,20.406
Date:,"Mon, 07 Dec 2020",AIC,-22.813
Time:,13:44:33,BIC,19.483
Sample:,04-01-1953,HQIC,-6.576
,- 11-01-2020,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.1360,0.038,3.590,0.000,0.062,0.210
ar.L1,-0.1518,0.060,-2.547,0.011,-0.269,-0.035
ar.L2,0.0614,0.060,1.017,0.309,-0.057,0.180
ar.L3,0.7837,0.059,13.322,0.000,0.668,0.899
ar.L4,0.1612,0.059,2.744,0.006,0.046,0.276
ma.L1,1.4982,0.058,26.016,0.000,1.385,1.611
ma.L2,1.3925,0.083,16.707,0.000,1.229,1.556
ma.L3,0.5465,0.056,9.723,0.000,0.436,0.657
sigma2,0.0554,0.001,38.575,0.000,0.053,0.058

0,1,2,3
Ljung-Box (L1) (Q):,0.01,Jarque-Bera (JB):,3550.35
Prob(Q):,0.92,Prob(JB):,0.0
Heteroskedasticity (H):,0.71,Skew:,0.68
Prob(H) (two-sided):,0.0,Kurtosis:,13.15


In [6]:
res = tsa.SARIMAX(term, order=(1, 0, 0), trend="c").fit()
res.tvalues

intercept      2.994413
ar.L1        153.773330
sigma2        57.831764
dtype: float64

In [7]:
res = tsa.SARIMAX(term, order=(0, 0, 1), trend="c").fit()
res.tvalues

  warn('Non-invertible starting MA parameters found.'


intercept    24.061317
ma.L1        73.865350
sigma2       21.763449
dtype: float64

In [8]:
res = tsa.SARIMAX(term, order=(2, 0, 0), trend="c").fit()
res.tvalues

intercept     4.136973
ar.L1        92.943054
ar.L2       -22.097042
sigma2       49.974398
dtype: float64

In [9]:
res = tsa.SARIMAX(term, order=(1, 0, 1), trend="c").fit()
res.tvalues


intercept      3.948588
ar.L1        116.642232
ma.L1         33.614000
sigma2        42.834021
dtype: float64

In [10]:
res = tsa.SARIMAX(term, order=(2, 0, 1), trend="c").fit()
res.tvalues

intercept     3.644060
ar.L1        18.314437
ar.L2         6.061021
ma.L1        16.702826
sigma2       38.381224
dtype: float64

In [11]:
res = tsa.SARIMAX(term, order=(1, 0, 2), trend="c").fit()
res.tvalues


intercept      3.615044
ar.L1        116.674005
ma.L1         25.845690
ma.L2         -3.657096
sigma2        37.608982
dtype: float64

In [12]:
res = tsa.SARIMAX(term, order=(3, 0, 1), trend="c").fit()
res.tvalues

intercept     3.692895
ar.L1        11.741746
ar.L2         4.770352
ar.L3        -1.868836
ma.L1        12.630009
sigma2       37.700351
dtype: float64

In [13]:
res = tsa.SARIMAX(term, order=(2, 0, 2), trend="c").fit()
res.tvalues

intercept     3.173498
ar.L1         2.130598
ar.L2         3.043375
ma.L1         4.925914
ma.L2         1.637967
sigma2       37.886769
dtype: float64

In [14]:
stg_res = tsa.SARIMAX(term, order=(2, 0, 1), trend="c").fit()
stg_res.summary()

0,1,2,3
Dep. Variable:,TERM,No. Observations:,812.0
Model:,"SARIMAX(2, 0, 1)",Log Likelihood,14.712
Date:,"Mon, 07 Dec 2020",AIC,-19.423
Time:,13:44:36,BIC,4.074
Sample:,04-01-1953,HQIC,-10.403
,- 11-01-2020,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0501,0.014,3.644,0.000,0.023,0.077
ar.L1,0.7126,0.039,18.314,0.000,0.636,0.789
ar.L2,0.2342,0.039,6.061,0.000,0.158,0.310
ma.L1,0.6228,0.037,16.703,0.000,0.550,0.696
sigma2,0.0562,0.001,38.381,0.000,0.053,0.059

0,1,2,3
Ljung-Box (L1) (Q):,0.06,Jarque-Bera (JB):,3141.78
Prob(Q):,0.81,Prob(JB):,0.0
Heteroskedasticity (H):,0.72,Skew:,0.59
Prob(H) (two-sided):,0.01,Kurtosis:,12.56
