# Advanced Microeconometrics EXAM 2022:
## Part I (Project II: Economic Growth)
*9:00 am January 14th to 9:00 am January 16th, 2023*

**Hall & Kjølbye**

# Contents

1. [Data](#Data)
2. [Penalty level](#Penalty-level-and-residuals)
3. [Estimation](#Estimation)
3. [Results](#Results)

In [1]:
import pandas as pd 
import numpy as np 
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso
from scipy.stats import norm
import tools

## Read data 

In [2]:
dat = pd.read_csv('growth.csv')
lbldf = pd.read_csv('labels.csv', index_col='variable')
lbl_all = lbldf.label.to_dict() # as a dictionary
print(f'The data contains {dat.shape[0]} rows (countries) and {dat.shape[1]} columns (variables).')

The data contains 214 rows (countries) and 85 columns (variables).


# Collections of variables

In order to make the analysis simpler, it may be convenient to collect variables in sets that belong together naturally. 

In [3]:
# all available variables
vv_institutions = ['marketref', 'dem', 'demCGV', 'demBMR', 'demreg', 'currentinst'] 
vv_geography = [
        'tropicar','distr', 'distcr', 'distc','suitavg','temp', 'suitgini', 'elevavg', 'elevstd',
        'kgatr', 'precip', 'area', 'abslat', 'cenlong', 'area_ar', 'rough','landlock', 
        'africa',  'asia', 'oceania', 'americas' # 'europe' is the reference
]
vv_geneticdiversity = ['pdiv', 'pdiv_aa', 'pdivhmi', 'pdivhmi_aa']
vv_historical = ['pd1000', 'pd1500', 'pop1000', 'pop1500', 'ln_yst'] # these are often missing: ['pd1', 'pop1']
vv_religion = ['pprotest', 'pcatholic', 'pmuslim']
vv_danger = ['yellow', 'malfal',  'uvdamage']
vv_resources = ['oilres', 'goldm', 'iron', 'silv', 'zinc']
vv_educ = ['ls_bl', 'lh_bl'] # secondary, tertiary: we exclude 'lp_bl' (primary) to avoid rank failure 

vv_all = {'institutions': vv_institutions, 
          'geography': vv_geography, 
          'geneticdiversity': vv_geneticdiversity,
          'historical': vv_historical,
          'religion': vv_religion,
          'danger':vv_danger, 
          'resources':vv_resources,
          'education':vv_educ
         }
list_of_lists = vv_all.values()
vv_all['all'] = [v for sublist in list_of_lists for v in sublist]


In [4]:
# convenient to keep a column of ones in the dataset
dat['constant'] = np.ones((dat.shape[0],))

# Data

In [5]:
# Create new selection variables
vs = vv_all['education'] + vv_all['religion'] +\
['marketref', 'dem', 'demreg','oilres','currentinst','africa',  'asia', 
 'oceania', 'americas', 'landlock', 'area_ar', 'distc', 'precip', 'temp']
zs = vs + ['pop_growth', 'investment_rate']
xs = ['lgdp_initial', 'pop_growth', 'investment_rate'] + vs

# avoiding missings
all_vars = ['gdp_growth'] + xs
I = dat[all_vars].notnull().all(1)

# extract X
X_sel = dat.loc[I, xs]

# extract data
# g is y-variable
g = dat.loc[I,'gdp_growth'].values.reshape((-1)) * 100. #easier to read output when growth is in 100%
z_sel = X_sel.drop(['lgdp_initial'],axis=1).values #controls
y = X_sel.lgdp_initial.values #beta
i_sel = X_sel.columns.get_loc('lgdp_initial')

# make numpy arrays
X = X_sel.values

# standardize 
z_stan = tools.standardize(z_sel)
x_stan = tools.standardize(X_sel)
y_stan = tools.standardize(y)

# Post Double Lasso

## Penalty level and residuals

In [6]:
# compute penalty terms
lambda_BRTyz = tools.BRT(z_stan,y)
lambda_BCCHyz = tools.BCCH(z_stan,y)

# lasso on intial gdp
fit_BRTyz=Lasso(alpha=lambda_BRTyz).fit(z_stan,y)
fit_BCCHyz=Lasso(alpha=lambda_BCCHyz).fit(z_stan,y)

# save residuals
resyz_BRT= y - fit_BRTyz.predict(z_stan)
resyz_BCCH = y - fit_BCCHyz.predict(z_stan)

In [7]:
# compute penalty terms
lambda_BRTgx = tools.BRT(x_stan,g)
lambda_BCCHgx = tools.BCCH(x_stan,g)

# lasso on gdp growth
fit_BRTgx = Lasso(alpha=lambda_BRTgx).fit(x_stan,g) 
fit_BCCHgx = Lasso(alpha=lambda_BCCHgx).fit(x_stan,g)
coefs_BRT = fit_BRTgx.coef_
coefs_BCCH = fit_BCCHgx.coef_

# save residuals
resgxz_BRT = g-fit_BRTgx.predict(x_stan) + y_stan*coefs_BRT[0]
resgxz_BCCH = g-fit_BCCHgx.predict(x_stan) + y_stan*coefs_BCCH[0]

### Estimation

In [8]:
# save residuals
resgzz_BRT = g - fit_BRTgx.predict(x_stan)
resgzz_BCCH = g - fit_BCCHgx.predict(x_stan)

# estimate PDL
PDL_BRT = tools.PDL_ols(resyz_BRT,resgxz_BRT,y)
PDL_BCCH = tools.PDL_ols(resyz_BCCH, resgxz_BCCH,y)

# SE and CI
se_PDL_BRT, CI_PDL_BRT = tools.PDL_CI(resyz_BRT, resgzz_BRT,PDL_BRT)
se_PDL_BCCH, CI_PDL_BCCH = tools.PDL_CI(resyz_BCCH, resgzz_BCCH,PDL_BCCH)

In [9]:
CI_PDL_BRT

(-0.67, 0.15)

In [10]:
# non zero coefficients BRT

# First step
idx_1 = np.where(fit_BRTyz.coef_ != 0)[0]
idx_1 = np.ndarray.tolist(idx_1)
print('First step controls',list(np.array(zs)[idx_1]))

# Second step
idx_0 = np.where(fit_BRTgx.coef_ != 0)[0]
idx_0 = np.ndarray.tolist(idx_0)
print('Second step controls',list(np.array(xs)[idx_0]))

First step controls ['ls_bl', 'pprotest', 'pcatholic', 'demreg', 'asia', 'investment_rate']
Second step controls ['currentinst', 'asia']


In [11]:
# non zero coefficients BCCH

# First step
idx_1 = np.where(fit_BCCHyz.coef_ != 0)[0]
idx_1 = np.ndarray.tolist(idx_1)
print('First step controls',list(np.array(zs)[idx_1]))

# Second step
idx_0 = np.where(fit_BCCHgx.coef_ != 0)[0]
idx_0 = np.ndarray.tolist(idx_0)
print('Second step controls',list(np.array(xs)[idx_0]))

First step controls ['pprotest']
Second step controls []


# Results

In [12]:
estimates = np.array([PDL_BRT, PDL_BCCH]).round(4)
label_over_column = ['(1)','(2)']
label_column = np.array(['PDL', 'PDL'])
label_row = ['' ,'$\\beta$', 'se', 'p','n','$\lambda^{yz}$','$\lambda^{gx}$']
se = np.array([se_PDL_BRT,se_PDL_BCCH]).round(4)
no_controls = np.array([len(zs), len(zs)])
no_obs = np.array([len(y), len(y)])

pens_yz = np.array([lambda_BRTyz.round(4), lambda_BCCHyz.round(4)])
pens_gx = np.array([lambda_BRTgx.round(4), lambda_BCCHgx.round(4)])

data = np.row_stack((label_column ,estimates, se, no_controls,no_obs, pens_yz, pens_gx))

df = pd.DataFrame(data = data, index = label_row, columns = label_over_column)

print(df)

                    (1)      (2)
                    PDL      PDL
$\beta$         -0.2608  -0.1354
se               1.7698   1.2494
p                    21       21
n                    71       71
$\lambda^{yz}$   0.5395   0.8464
$\lambda^{gx}$   0.5618   1.1539
