In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm
import statsmodels.api as sm
import matplotlib.pyplot as plt
from datetime import datetime
import requests
from io import BytesIO
# Register converters to avoid warnings
pd.plotting.register_matplotlib_converters()
plt.rc("figure", figsize=(16,8))
plt.rc("font", size=14)

In [12]:
friedman2 = requests.get('https://www.stata-press.com/data/r12/friedman2.dta').content
data = pd.read_stata(BytesIO(friedman2))

data.index = data.time
data


Unnamed: 0_level_0,m2,time,m1,consump,pc92
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1946-01-01,,1946-01-01,,134.899994,
1946-04-01,,1946-04-01,,140.100006,
1946-07-01,,1946-07-01,,148.899994,
1946-10-01,,1946-10-01,,153.199997,
1947-01-01,,1947-01-01,,156.600006,912.099976
...,...,...,...,...,...
1997-07-01,3979.270020,1997-07-01,1063.550049,5540.299805,4947.000000
1997-10-01,4046.389893,1997-10-01,1076.040039,5593.200195,4981.000000
1998-01-01,4133.879883,1998-01-01,1081.109985,5676.500000,5055.100098
1998-04-01,4196.100098,1998-04-01,1074.520020,5773.700195,5130.200195


In [13]:
data.index.freq = "QS-OCT"
data


Unnamed: 0_level_0,m2,time,m1,consump,pc92
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1946-01-01,,1946-01-01,,134.899994,
1946-04-01,,1946-04-01,,140.100006,
1946-07-01,,1946-07-01,,148.899994,
1946-10-01,,1946-10-01,,153.199997,
1947-01-01,,1947-01-01,,156.600006,912.099976
...,...,...,...,...,...
1997-07-01,3979.270020,1997-07-01,1063.550049,5540.299805,4947.000000
1997-10-01,4046.389893,1997-10-01,1076.040039,5593.200195,4981.000000
1998-01-01,4133.879883,1998-01-01,1081.109985,5676.500000,5055.100098
1998-04-01,4196.100098,1998-04-01,1074.520020,5773.700195,5130.200195


In [17]:
# # # Variables
endog = data.loc['1959':'1981', 'consump']
endog
# print(type(endog))

time
1959-01-01     310.399994
1959-04-01     316.399994
1959-07-01     321.700012
1959-10-01     323.799988
1960-01-01     327.299988
                 ...     
1980-10-01    1836.800049
1981-01-01    1890.300049
1981-04-01    1923.500000
1981-07-01    1967.400024
1981-10-01    1983.900024
Freq: QS-OCT, Name: consump, Length: 92, dtype: float32

In [18]:
exog = sm.add_constant(data.loc['1959':'1981', 'm2'])
exog


Unnamed: 0_level_0,const,m2
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1959-01-01,1.0,289.149994
1959-04-01,1.0,294.049988
1959-07-01,1.0,296.730011
1959-10-01,1.0,297.799988
1960-01-01,1.0,299.350006
...,...,...
1980-10-01,1.0,1601.099976
1981-01-01,1.0,1638.000000
1981-04-01,1.0,1670.550049
1981-07-01,1.0,1708.380005


In [19]:
# # # Fit the model
mod = sm.tsa.statespace.SARIMAX(endog, exog, order=(1,0,1))
res = mod.fit(disp=False)
print(res.summary())

                               SARIMAX Results                                
Dep. Variable:                consump   No. Observations:                   92
Model:               SARIMAX(1, 0, 1)   Log Likelihood                -340.508
Date:                Sat, 01 May 2021   AIC                            691.015
Time:                        12:33:29   BIC                            703.624
Sample:                    01-01-1959   HQIC                           696.105
                         - 10-01-1981                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        -36.0429     56.698     -0.636      0.525    -147.168      75.082
m2             1.1220      0.036     30.801      0.000       1.051       1.193
ar.L1          0.9349      0.041     22.716      0.0

In [22]:
exog_forecast = data.loc['1959':'1981','m2']
predictions = res.predict(start = '1959', end = '1981',exog=exog_forecast, dynamic= True)
predictions

time
1959-01-01     288.379601
1959-04-01     293.877330
1959-07-01     296.884281
1959-10-01     298.084781
1960-01-01     299.823882
                 ...     
1980-10-01    1760.370458
1981-01-01    1801.771806
1981-04-01    1838.292539
1981-07-01    1880.737259
1981-10-01    1934.379387
Freq: QS-OCT, Name: predicted_mean, Length: 92, dtype: float64