In [1]:
# ECON 662D1 Midterm - Jean-Sébastien Matte 260913682

In [2]:
# Import modules
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib
import matplotlib.pyplot as plt

In [3]:
# Import data
in_dir = '/Users/jsmatte/github/ECON662D1/Midterm'
in_file = str(in_dir + '/e662.midterm.20.dat.txt')

data = pd.read_csv(in_file, sep = ' ', header = None)
data = data.drop(columns = [1, 2, 3])
data.columns = ['Ct', 'Yt']
print(data)

          Ct       Yt
0     554046   612996
1     547349   623964
2     547376   629424
3     545467   636492
4     534089   638328
..       ...      ...
108  1118489  1761120
109  1132225  1784600
110  1143236  1807972
111  1152568  1831992
112  1155482  1848904

[113 rows x 2 columns]


In [4]:
# Add ct and yt to dataset
data['ct'] = np.log(data.Ct)
data['yt'] = np.log(data.Yt)
print(data)

          Ct       Yt         ct         yt
0     554046   612996  13.225003  13.326114
1     547349   623964  13.212842  13.343848
2     547376   629424  13.212891  13.352560
3     545467   636492  13.209398  13.363727
4     534089   638328  13.188318  13.366608
..       ...      ...        ...        ...
108  1118489  1761120  13.927489  14.381461
109  1132225  1784600  13.939695  14.394705
110  1143236  1807972  13.949373  14.407716
111  1152568  1831992  13.957503  14.420914
112  1155482  1848904  13.960028  14.430104

[113 rows x 4 columns]


In [5]:
# Model (2)

# create data for \Delta ct and \Delta yt
Dct_array = []
for i in range(1, len(data.ct.values)):
    temp_Dct = data.ct.values[i] - data.ct.values[i - 1]
    Dct_array.append(temp_Dct)
Dct = np.array([[i] for i in Dct_array])
print('Dct.shape ->', Dct.shape)

Dyt_array = []
for i in range(1, len(data.yt.values)):
    temp_Dyt = data.yt.values[i] - data.yt.values[i - 1]
    Dyt_array.append(temp_Dyt)
Dyt = np.array([[i] for i in Dyt_array])
print('Dyt.shape ->', Dyt.shape)

# create matrix X of the regressors
X2 = np.zeros((112,4))
for i in range(X2.shape[0]):
    X2[i] = [1, data.ct.values[i], Dyt_array[i], data.yt.values[i]]
    
# Regression Model
model2 = sm.OLS(Dct, X2).fit()
print(model2.summary())
print()
print('Residual Std. Error: ',  np.sqrt(model2.scale))

Dct.shape -> (112, 1)
Dyt.shape -> (112, 1)
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.114
Model:                            OLS   Adj. R-squared:                  0.089
Method:                 Least Squares   F-statistic:                     4.614
Date:                Thu, 22 Oct 2020   Prob (F-statistic):            0.00446
Time:                        09:56:33   Log-Likelihood:                 423.57
No. Observations:                 112   AIC:                            -839.1
Df Residuals:                     108   BIC:                            -828.3
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const   

In [6]:
# Extract model parameters and define parameters for (1)
params = model2.params
alpha = params[0]
beta = params[1] + 1
gamma0 = params[2]
gamma1 = params[3] - params[2]
print(alpha, beta, gamma0, gamma1)

stdev = np.sqrt(model2.scale)
print(stdev)

0.21622606362067193 0.9364768899336617 0.3503113044205379 -0.30351571627817325
0.005613301235072004


In [7]:
# Simulation of model (1)
iterates = 113
cts = [data.ct.values[0]]
for t in range(1, iterates):
    u = np.random.normal(0, stdev)
    temp_cts = alpha + (beta * cts[t - 1]) + (gamma0 * data.yt.values[t]) + (gamma1 * data.yt.values[t - 1]) + u
    cts.append(temp_cts)
print(len(cts))

113


In [8]:
# Model (1) from simulated data

# the regressands are now the simulated c_t, omitting the first entry
c_ts = cts[1:]

# exogenous data, omitting the first entry
y_t = data.yt.values[1:]
print('y_t.shape ->', y_t.shape)

# create matrix X of the regressors
X1s = np.zeros((112,4))
for i in range(X1s.shape[0]):
    X1s[i] = [1, cts[i], y_t[i], data.yt.values[i]]
    
# Regression Model
model1s = sm.OLS(c_ts, X1s).fit()
print(model1s.summary())
print()
print('Residual Std. Error: ',  np.sqrt(model1s.scale))

y_t.shape -> (112,)
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 7.962e+04
Date:                Thu, 22 Oct 2020   Prob (F-statistic):          1.98e-180
Time:                        09:56:33   Log-Likelihood:                 434.61
No. Observations:                 112   AIC:                            -861.2
Df Residuals:                     108   BIC:                            -850.3
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2861      0.126

In [9]:
# Model (1)

# the real regressand, omitting the first entry
c_t = data.ct.values[1:]
print('c_t.shape ->', c_t.shape)

# create matrix X of the regressors
X1 = np.zeros((112,4))
for i in range(X1.shape[0]):
    X1[i] = [1, data.ct.values[i], y_t[i], data.yt.values[i]]
    
# Regression Model
model1 = sm.OLS(c_t, X1).fit()
print(model1.summary())
print()
print('Residual Std. Error: ',  np.sqrt(model1.scale))

c_t.shape -> (112,)
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.999
Model:                            OLS   Adj. R-squared:                  0.999
Method:                 Least Squares   F-statistic:                 6.866e+04
Date:                Thu, 22 Oct 2020   Prob (F-statistic):          5.86e-177
Time:                        09:56:33   Log-Likelihood:                 423.57
No. Observations:                 112   AIC:                            -839.1
Df Residuals:                     108   BIC:                            -828.3
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2162      0.158

In [10]:
# Residuals
pred_1 = model1.fittedvalues
pred_1s = model1s.fittedvalues
print(len(pred_1))
print(len(pred_1s))

# Residuals of running (1) on simulated data
resid_s = []
for i in range(len(c_ts)):
    resid_s.append(c_ts[i] - pred_1s[i])
print(len(resid_s))

# Residuals of running (1) on real data
resid = []
for i in range(len(c_t)):
    resid.append(c_t[i] - pred_1[i])
print(len(resid))

N = [i for i in range(len(c_t))]

112
112
112
112


In [1]:
# Plot the residuals
%matplotlib inline
plt.figure(figsize=(11, 8))
plt.scatter(N, resid_s, c = 'blue', marker = 'x', label = 'simulated')
plt.scatter(N, resid, c = 'red', marker = 'v', label = 'real')
plt.hlines(0, N[0], N[-1])
plt.xlabel('Observation')
plt.ylabel('Residual')
# plt.title('Residual Plot of Simulated and Real Data')
plt.legend(loc = 'lower right')
plt.savefig(str(in_dir + '/fig1.eps'))

NameError: name 'plt' is not defined