In [1]:
from this_settings import *

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


## 1. Load data

In [2]:
## Import cleaned data
data = pd.read_parquet(directory_cleandata+'data_a.parquet')
data = data[(data['year']>=1946) &
            (data['year']<=2020)].copy()

## Demean the data to match Mankiw and Shapiro
## 
data.set_index('year',
               inplace=True)
data = data.copy()-data.mean()
data.reset_index(inplace=True)

## 2. Part a

In [3]:
## Regress returns on DP
reg_r = sm.OLS(endog=data['log_ret'],
               #exog=sm.add_constant(data['log_pd_c_tm1']),
               exog=data['log_pd_c_tm1'],
               missing='drop')\
        .fit()

## Regress PD on lagged PD
reg_pd = sm.OLS(endog=data['log_pd_c'],
                #exog=sm.add_constant(data['log_pd_c_tm1']),
                exog=data['log_pd_c_tm1'],
                missing='drop')\
         .fit()

## Grab coefficients
b_r_data = reg_r.params['log_pd_c_tm1']
phi_data = reg_pd.params['log_pd_c_tm1']

In [4]:
## Get coefficients
print(reg_r.summary())

                                 OLS Regression Results                                
Dep. Variable:                log_ret   R-squared (uncentered):                   0.062
Model:                            OLS   Adj. R-squared (uncentered):              0.050
Method:                 Least Squares   F-statistic:                              4.854
Date:                Mon, 11 Apr 2022   Prob (F-statistic):                      0.0307
Time:                        03:10:23   Log-Likelihood:                          32.241
No. Observations:                  74   AIC:                                     -62.48
Df Residuals:                      73   BIC:                                     -60.18
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                   coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------

In [5]:
## Get coefficients
print(reg_pd.summary())

                                 OLS Regression Results                                
Dep. Variable:               log_pd_c   R-squared (uncentered):                   0.880
Model:                            OLS   Adj. R-squared (uncentered):              0.879
Method:                 Least Squares   F-statistic:                              537.0
Date:                Mon, 11 Apr 2022   Prob (F-statistic):                    2.20e-35
Time:                        03:10:23   Log-Likelihood:                          36.815
No. Observations:                  74   AIC:                                     -71.63
Df Residuals:                      73   BIC:                                     -69.33
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                   coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------

In [6]:
## Get covariance matrix of residuals
e_data = pd.DataFrame({'e_r':reg_r.resid,
                       'e_pd':reg_pd.resid})
e_data_vcv = e_data.cov()

In [7]:
e_data_vcv

Unnamed: 0,e_r,e_pd
e_r,0.024826,0.006937
e_pd,0.006937,0.021936


## 3. Part b

In [8]:
M=10000
N=75
p = e_data.corr().iloc[0,1]
e_sim_cov = np.array([[1,p],
                      [p,1]])

In [10]:
%%time
q3b = mankiw_shapiro(M=M,
                     N=N,
                     e_sim_cov=e_sim_cov,
                     phi_data=phi_data)

CPU times: user 20min 45s, sys: 42.3 s, total: 21min 27s
Wall time: 6min 5s


In [None]:
q3b['stats'].to_parquet('../Output/q3b_stats.parquet')
q3b['results'].to_parquet('../Output/q3b_results.parquet')

In [19]:
q3b['stats']

Unnamed: 0,b_r_sim,phi_sim
mean,0.038276,0.923159
std,0.045622,0.048446


In [20]:
%%time
q3b_cov = mankiw_shapiro(M=M,
                     N=N,
                     e_sim_cov=e_data_vcv,
                     phi_data=phi_data)

KeyboardInterrupt: 

## 4. Part c

In [15]:
e_sim_cov0 = np.array([[1,0],
                       [0,1]])

In [16]:
%%time
q3c = mankiw_shapiro(M=M,
                     N=N,
                     e_sim_cov=e_sim_cov0,
                     phi_data=phi_data)

CPU times: user 20min 7s, sys: 37.5 s, total: 20min 44s
Wall time: 5min 35s


In [17]:
print('here')

here


In [18]:
#q3c['stats'].to_parquet('../Output/q3c_stats.parquet')
#q3c['results'].to_parquet('../Output/q3c_results.parquet')

In [8]:
q3c_stats = pd.read_parquet('../Output/q3c_stats.parquet')

In [9]:
q3c_stats

Unnamed: 0,b_r_sim,phi_sim
mean,3e-06,0.923749
std,0.043724,0.04833


In [212]:
##Don't forget to set the seed!
rng = np.random.default_rng(seed)

In [213]:
## Following Mankiw and Shapiro

## Draw e_r and e_pd from size-N bivarioate normal
## variances=1, covariance= corr(e_r,e_pd)
## ordered as e_r, e_pd
e_sim = rng.multivariate_normal(mean=[0,0],
                                cov=np.array([[1,p],
                                              [p,1]]),
                                size=N)
e_r_sim = e_sim[:,0]
e_pd_sim = e_sim[:,1]

## Set Y=v_t
## Set r = e_r_sim
r_sim = e_r_sim

## Generate X+t from eq 3 using innovations e
## Generate pd_t from pd AR(1) using innovations e_pd
## Initial value pd_0 is random from univariate normal, mean 0 and variance 1/(1-phi^2)
## Note: initial value gets used so we keep number of observations = N in the regression
## of pd(t) on pd(t-1)
pd_sim_0 = rng.normal(loc=0,
                      scale=np.sqrt(1.0/(1.0-phi_data**2))) # SD
pd_sim_panda = pd.DataFrame({'e_pd':e_pd_sim})
pd_sim_panda['pd'] = np.nan
pd_sim_panda.loc[0,'pd'] = pd_sim_0
for t in range(1,len(pd_sim_panda)+2):
    if t==0:
        pd_sim_tm1 = pd_sim_0
    if t>0:
        pd_sim_tm1 = pd_sim_panda.loc[t-1,'pd'].copy()
    if t==len(pd_sim_panda)+1:
        pd_sim_panda.loc[t,'pd'] = np.nan
        break
    pd_sim_panda.loc[t,'pd'] = phi_data * pd_sim_tm1 + pd_sim_panda.loc[t-1,'e_pd'].copy()
pd_sim_panda['pd_tm1'] = pd_sim_panda['pd'].copy().shift()
pd_sim_panda.dropna(subset=['pd','pd_tm1'],
                    inplace=True)
pd_sim = pd_sim_panda['pd'].copy().values
pd_sim_tm1 = pd_sim_panda['pd_tm1'].copy().values

## Estimate equation 2, grab t-stat
## Estimate return regression using generated data
reg_r_sim = sm.OLS(endog=r_sim,
                   exog=pd_sim,)\
            .fit()

## Estimate pd regression using generated data
reg_pd_sim = sm.OLS(endog=pd_sim,
                    exog=pd_sim_tm1,)\
             .fit()
             
## Grab regression coefficients
b_r_sim = reg_r_sim.params[0]
phi_sim = reg_pd_sim.params[0]

In [214]:
b_r_sim

0.005840191050040267

In [215]:
b_r_sim

0.005840191050040267

In [204]:
phi_sim

0.9644423919636502

In [None]:
phi_sim

0.9644423919636502

In [168]:
print(reg_r_sim.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.008
Model:                            OLS   Adj. R-squared (uncentered):             -0.006
Method:                 Least Squares   F-statistic:                             0.5842
Date:                Sun, 10 Apr 2022   Prob (F-statistic):                       0.447
Time:                        23:18:08   Log-Likelihood:                         -93.472
No. Observations:                  75   AIC:                                      188.9
Df Residuals:                      74   BIC:                                      191.3
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [169]:
print(reg_pd_sim.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.663
Model:                            OLS   Adj. R-squared (uncentered):              0.658
Method:                 Least Squares   F-statistic:                              145.5
Date:                Sun, 10 Apr 2022   Prob (F-statistic):                    3.83e-19
Time:                        23:18:17   Log-Likelihood:                         -111.26
No. Observations:                  75   AIC:                                      224.5
Df Residuals:                      74   BIC:                                      226.8
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------