# Project II: Economic Growth 

This notebook will help you getting started with analyzing the growth dataset, `growth.csv`.

In [None]:
import numpy as np
import numpy.linalg as la
import pandas as pd
from sklearn.linear_model import Lasso
from scipy.stats import norm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures

# autoreload
%load_ext autoreload
%autoreload 2

## Read data 

In [None]:
from Project_2 import *
print(f'The data contains {dat.shape[0]} rows (countries) and {dat.shape[1]} columns (variables).')

# Descriptive plots

In [None]:
dat.plot.scatter(x='lgdp_initial', y='gdp_growth', ylabel='GDP growth rate', xlabel='Log of initial GDP, 1970');

In [None]:
import seaborn as sns 
sns.scatterplot(x='lgdp_initial', y='gdp_growth', data=dat, hue='pdivhmi');

## Data-inspection
We begin by counting the number of observations for each variable, conditional on 'gdp_growth' and 'lgdp_initial' being non-missing

In [None]:
sorted_data = filter_data(dat)

## Collection of variables
In order to ease the following analysis, we group ALL the given variables into subcategories. Based on our data-inspection we exclude a range of variables from the analysis. These are grouped in 'vv_excluded'.

In [None]:
# extracting the variables we will need for further analysis
vv_outcome, vv_key, vv_excluded, vv_all = group_data(dat)

list_of_lists = vv_all.values()
vv_all['all'] = [v for sublist in list_of_lists for v in sublist]

print(f'Variables in total: {len(vv_all["all"]+vv_key+vv_outcome+vv_excluded)}')
print(f'We include {len(vv_all["all"])} control variables in total.')

## Representativity of choosen variables
We check how many observations have non-missing values for ALL the included 66 variables (our two key variables and 64 control variables). We compare this to the corresponding number of observations if all variables (except the two key variables) are excluded.

In [None]:
key_var = vv_outcome + vv_key
all_included = vv_all['all'] + vv_outcome + vv_key

non_missing_key_var = dat[key_var].notnull().all(axis=1).sum()
print(f'Non-missing values in key variables: {non_missing_key_var}')

non_missing_all_included = dat[all_included].notnull().all(axis=1).sum()
print(f'Non-missing values in all included variables: {non_missing_all_included}')

We then check how the observations with non-missing values are distributed across the different geographic regions to get an idea about how representative the remaining observations are. We derive this geographic distribution for all observations ('All_share'), those observations with non-missing values for our key variables ('Key_share'), and those with non-missing values for all the included variables ('Included_share').

In [None]:
regions = ['africa', 'americas', 'asia', 'europe', 'oceania']
table = investigate_data(regions, dat, vv_outcome, vv_key, vv_all)
print(table)

We conclude that the included variables resemble the geographic distribution of the full data set to a reasonable extent, i.e. there there is no extreme regional bias in our included variables.

## Preparing data for analysis
We create a new data set consisting only of the non-missing observations and the included variables. We add a constant to the data (mainly relevant for OLS analysis), and base our analysis on this new dataset.

In [None]:
# convenient to keep a column of ones in the dataset
dat['constant'] = np.ones((dat.shape[0],))
vv_constant = ['constant']
print(dat.shape)

included_rows = dat[vv_outcome + vv_key + vv_all['all']].notnull().all(axis=1)
data = dat[included_rows]
print(data.shape)

In [None]:
data = data[vv_all['all'] + vv_outcome + vv_key + vv_constant]
print(data.shape)
print(f'There are {data.isnull().sum().sum()} missing observations in the data.')

In [None]:
g = data[vv_outcome].squeeze() #*100 to get it in percentage and not decimals
y0 = data[vv_key].squeeze()
Z_basic = data[vv_all['all']]
Z = PolynomialFeatures(1, include_bias=False).fit_transform(Z_basic)

X = np.column_stack((y0,Z))
N = X.shape[0]

def standardize(X):
    X_stan = (X - np.mean(X, axis=0))/np.std(X, axis=0, ddof=1)
    return X_stan

# Standardize data
X_tilde = standardize(X)
Z_tilde = standardize(Z)
y0_tilde = standardize(y0)

In [None]:
# Create a DataFrame with X_names as the index
Z_names = Z_basic.columns
X_names = Z_names.insert(0, y0.name)
print(X_names)

# Data analysis

## OLS estimation
NOTE: Using "original" variables (not standardized)

### Simple analysis

In [None]:
# Creating an matrix with y0 and a constant for simple OLS
simple_y0 = np.column_stack((np.ones(N),y0))
simple_g = np.array(g).reshape(-1,1)

#3. Run OLS
betas_simpleOLS = la.inv(simple_y0.T @ simple_y0) @ simple_y0.T @ simple_g
pd.DataFrame(betas_simpleOLS, index=['constant', 'lgdp_initial'], columns=['gdp_growth'])   

NOTE: It is not possible to use all the included variables with OLS as the rank condition is broken when p>n (as is the case here)

### Adding more controls

In [None]:
# Now, we need a matrix of all control variables and a constant
X_OLS = np.column_stack((np.ones(N), X))
g_OLS = np.array(g).reshape(-1,1)

betas_OLS = la.inv(X_OLS.T @ X_OLS) @ X_OLS.T @ g_OLS

#checking the rank condition
K = X_OLS.shape[1]
assert np.linalg.matrix_rank(X) == X.shape[1], f'X does not have full rank'

In [None]:
xs = vv_constant + vv_key + vv_all['all']

pd.DataFrame({'β': betas_OLS[:,0]}, index=xs).round(3)

## Lasso estimation

### Based on BRT penalty

#### Post-single Lasso

Step 1: Calculate BRT penalty

In [None]:
# Make a function that calculates BRT.
def BRT(X_tilde,g):
    
    (N, p) = X_tilde.shape
    sigma = np.std(g, ddof=1)
    c = 1.1
    alpha = 0.05
    penalty_BRT= (c * sigma) / np.sqrt(N) * norm.ppf(1 - alpha / (2*p))

    return penalty_BRT

penalty_BRT = BRT(X_tilde,g)
print("lambda_BRT =",penalty_BRT.round(4))

Step 2: Lasso g using y0 and Z.

In [None]:
# Implied estimates and selection
fit_BRTgx = Lasso(penalty_BRT, max_iter=10000).fit(X_tilde,g)
coeff_BRTgx = fit_BRTgx.coef_
intercept_BRTgx = fit_BRTgx.intercept_

print('Intercept/constant: ', np.round(intercept_BRTgx,3))
print('Coefficients: ', np.round(coeff_BRTgx,3))
Z_J_BRTgx = Z[:,coeff_BRTgx[1:]!=0] # Note: We use Z and not Z_stan

# Display number of variables in Z_J
print("The number of variables in Z_J is {}".format(Z_J_BRTgx.shape[1]))
selected_variables_BRTgx = (coeff_BRTgx != 0)
print('Selected varriables: ', X_names[selected_variables_BRTgx].to_list())

Step 3: Regress g using y0 and Z_J

In [None]:
# Add a constant to X
xx = np.column_stack((np.ones(N),y0,Z_J_BRTgx))
yy = np.array(g).reshape(-1,1)

# Calculate OLS estimate
coefs_BRT_PSL = la.inv(xx.T@xx)@xx.T@yy

#print the coefficients
pd.DataFrame(coefs_BRT_PSL, index=['constant', 'lgdp_initial'], columns=['gdp_growth']) #same as for the simple OLS above

Calculating the 95 % confidence interval for ${\beta}_{y0}$ (BRT_PSL)

In [None]:
# Estimate variance
res_BRT_PSL = yy - xx@coefs_BRT_PSL
SSR_BRT_PSL = res_BRT_PSL.T@res_BRT_PSL
sigma2_BRT_PSL = SSR_BRT_PSL/(N-xx.shape[1])
var_BRT_PSL = sigma2_BRT_PSL*la.inv(xx.T@xx)

# Calculate standard errors
se_BRT_PSL = np.sqrt(np.diagonal(var_BRT_PSL)).reshape(-1, 1)
se_BRT_PSL=se_BRT_PSL[1][0]

# Display standard error
print("se_BRT_PSL = ",se_BRT_PSL.round(5))

# Calculate the z statistic that corresponds to the 95% confidence interval of a two-sided test
q = norm.ppf(1-0.025)

# Calculate confidence interval
CI_low_BRT_PSL  = coefs_BRT_PSL[1][0]-q*se_BRT_PSL
CI_high_BRT_PSL = coefs_BRT_PSL[1][0]+q*se_BRT_PSL

# Display confidence interval
print("Confidence interval for β_y0 (BRT_PSL) = ",(CI_low_BRT_PSL.round(5),CI_high_BRT_PSL.round(5)))

**Conclusion (BRT_PSL): data is not consistent with beta convergence (${\beta}_{y0}$ is insignificant)**

### Post-double Lasso

Step 1 & 2: Same as for Post-single Lasso

Step 3: Lasso D using Z

In [None]:
penalty_BRTy0z = BRT(Z_tilde, y0)

In [None]:
# Run Lasso
fit_BRTy0z = Lasso(penalty_BRTy0z, max_iter=10000).fit(Z_tilde, y0)
coeff_BRTy0z=fit_BRTy0z.coef_
intercept_BRTy0z = fit_BRTy0z.intercept_

print('Intercept/constant: ', np.round(intercept_BRTy0z,5))
print('Coefficients: ', np.round(coeff_BRTy0z,5))

selected_variables_BRTy0z = (coeff_BRTy0z != 0)
print('Selected varriables: ', Z_names[selected_variables_BRTy0z].to_list())

Step 4: Estimate ${\beta}_{y0}$ (called 'alpha' in the slides)

In [None]:
# Calculate residuals
res_BRTgx = g - fit_BRTgx.predict(X_tilde)
res_BRTgxz = res_BRTgx + y0_tilde*coeff_BRTgx[0]
res_BRTy0z=y0-fit_BRTy0z.predict(Z_tilde)

# Calculate beta_y0
num = res_BRTy0z@res_BRTgxz
denom = res_BRTy0z@y0
coef_BRT_PDL = num/denom

# Display beta_y0
print("Coefficient for β_y0 (BRT_PDL) = ",coef_BRT_PDL.round(5))

Calculating the 95 % confidence interval for ${\beta}_{y0}$ (BRT_PDL)

In [None]:
# Calculate variance    
num = res_BRTy0z**2@res_BRTgx**2/N
denom = (res_BRTy0z.T@res_BRTy0z/N)**2
sigma2_BRT_PDL = num/denom

# Calculate standard error
se_BRT_PDL = np.sqrt(sigma2_BRT_PDL/N)

# Display standard error
print("se_BRT_PDL = ",se_BRT_PDL.round(5))

# Calculate the z statistic that corresponds to the 95% confidence interval of a two-sided test
q = norm.ppf(1-0.025)

# Calculate confidence interval
CI_low_BRT_PDL  = coef_BRT_PDL-q*se_BRT_PDL
CI_high_BRT_PDL = coef_BRT_PDL+q*se_BRT_PDL

# Display confidence interval
print("Confidence interval for β_y0 (BRT_PDL) = ",(CI_low_BRT_PDL.round(5),CI_high_BRT_PDL.round(5)))



**Conclusion (BRT_PDL): data is not consistent with beta convergence (${\beta}_{y0}$ is insignificant)**

### Based on BCCH penalty

#### Post-single Lasso

Step 1: Calculate BCCH penalty

In [None]:
def BCCH(X_tilde,g):

    # BCCH pilot penalty
    n,p = X.shape
    c = 1.1
    alpha = 0.05
    gXscale = (np.max((X_tilde.T ** 2) @ ((g-np.mean(g)) ** 2) / n)) ** 0.5
    penalty_pilot = c / np.sqrt(n) * norm.ppf(1-alpha/(2*p)) * gXscale # Note: Have divided by 2 due to Python definition of Lasso
    
    #Create predicted value using Lasso 
    pred = Lasso(alpha=penalty_pilot).fit(X_tilde,g).predict(X_tilde)

    # Updated penalty
    eps = g - pred #eps: epsilon/residuals 
    epsXscale = (np.max((X_tilde.T ** 2) @ (eps ** 2) / n)) ** 0.5
    penalty_BCCH = c / np.sqrt(n) * norm.ppf(1-alpha/(2*p))*epsXscale

    return penalty_BCCH

penalty_BCCH = BCCH(X_tilde,g)
print("lambda_BCCH =",penalty_BCCH.round(4))

Step 2: Lasso g using y0 and Z.

In [None]:
# Implied estimates and selection
fit_BCCHgx = Lasso(penalty_BCCH, max_iter=10000).fit(X_tilde,g)
coeff_BCCHgx = fit_BCCHgx.coef_
intercept_BCCHgx = fit_BCCHgx.intercept_

print('Intercept/constant: ', np.round(intercept_BCCHgx,3))
print('Coefficients: ', np.round(coeff_BCCHgx,3))
Z_J_BCCHgx = Z[:,coeff_BCCHgx[1:]!=0] # Note: We use Z and not Z_stan

# Display number of variables in Z_J
print("The number of variables in Z_J is {}".format(Z_J_BCCHgx.shape[1]))
selected_variables_BCCHgx = (coeff_BCCHgx != 0)
print('Selected varriables: ', X_names[selected_variables_BCCHgx].to_list())

Step 3 and 4 will yield the same result as for BRT_PSL, as BCCH_PSL return the same Z_J (containing no control variables) 

**Conclusion (BCCH_PSL): data is not consistent with beta convergence (${\beta}_{y0}$ is insignificant)**

### Post-double Lasso

Step 1 & 2: Same as for Post-single Lasso

Step 3: Lasso D using Z

In [None]:
penalty_BCCHy0z = BCCH(Z_tilde, y0)

In [None]:
# Run Lasso
fit_BCCHy0z = Lasso(penalty_BCCHy0z, max_iter=10000).fit(Z_tilde, y0)
coeff_BCCHy0z=fit_BCCHy0z.coef_
intercept_BCCHy0z = fit_BCCHy0z.intercept_

print('Intercept/constant: ', np.round(intercept_BCCHy0z,5))
print('Coefficients: ', np.round(coeff_BCCHy0z,5))

selected_variables_BCCHy0z = (coeff_BCCHy0z != 0)
print('Selected varriables: ', Z_names[selected_variables_BCCHy0z].to_list())

Step 4: Estimate ${\beta}_{y0}$ (called 'alpha' in the slides)

In [None]:
# Calculate residuals
res_BCCHgx = g - fit_BCCHgx.predict(X_tilde)
res_BCCHgxz = res_BCCHgx + y0_tilde*coeff_BCCHgx[0]
res_BCCHy0z=y0-fit_BCCHy0z.predict(Z_tilde)

# Calculate beta_y0
num = res_BCCHy0z@res_BCCHgxz
denom = res_BCCHy0z@y0
coef_BCCH_PDL = num/denom

# Display beta_y0
print("Coefficient for β_y0 (BCCH_PDL) = ",coef_BCCH_PDL.round(5))

Calculating the 95 % confidence interval for ${\beta}_{y0}$ (BCCH_PDL)

In [None]:
# Calculate variance    
num = res_BCCHy0z**2@res_BCCHgx**2/N
denom = (res_BCCHy0z.T@res_BCCHy0z/N)**2
sigma2_BCCH_PDL = num/denom

# Calculate standard error
se_BCCH_PDL = np.sqrt(sigma2_BCCH_PDL/N)

# Display standard error
print("se_BCCH_PDL = ",se_BCCH_PDL.round(5))

# Calculate the z statistic that corresponds to the 95% confidence interval of a two-sided test
q = norm.ppf(1-0.025)

# Calculate confidence interval
CI_low_BCCH_PDL  = coef_BCCH_PDL-q*se_BCCH_PDL
CI_high_BCCH_PDL = coef_BCCH_PDL+q*se_BCCH_PDL

# Display confidence interval
print("Confidence interval for β_y0 (BCCH_PDL) = ",(CI_low_BCCH_PDL.round(5),CI_high_BCCH_PDL.round(5)))



**Conclusion (BCCH_PDL): data is not consistent with beta convergence (${\beta}_{y0}$ is insignificant)**

# Analysis 2 (based on dataset with fewer variables / more observations)

## Preparing data for analysis
First we create a dataset (data2) consisting of those observations which have non-missing values for gdp_growth, lgdp_initial and investment_rate (these variables are explictly mentioned in the assignment text).
We then subset data2 such that it include only variables with zero missing values.

In [None]:
included_rows2 = dat[vv_outcome + vv_key + ['investment_rate'] ].notnull().all(axis=1)
data2 = dat[included_rows2]

data2 = data2.dropna(axis=1)

print(f'The number of observations left in data2 is {data2.shape[0]}.')
print(f'The number of variables with no missing values is {data2.shape[1]}.')
print(f'The remaining variables are: {data2.columns.to_list()}')

In [None]:
g = data2[vv_outcome].squeeze() #*100 to get it in percentage and not decimals
y0 = data2[vv_key].squeeze()
Z_basic = data2.drop(["gdp_growth", "lgdp_initial",  # Drop outcome and key explanatory variable
                      "code", "constant", "gdp_initial", "lpop_initial", "pother", "europe"], axis=1) # Drop irrelevant/perfectly correlated/reference variables
Z = PolynomialFeatures(1, include_bias=False).fit_transform(Z_basic)
X = np.column_stack((y0,Z))
N = X.shape[0]

def standardize(X):
    X_stan = (X - np.mean(X, axis=0))/np.std(X, axis=0, ddof=1)
    return X_stan

# Standardize data
X_tilde = standardize(X)
Z_tilde = standardize(Z)
y0_tilde = standardize(y0)

print(f'The number of variables in Z is {Z.shape[1]}.')

In [None]:
# Create a DataFrame with X_names as the index
Z_names = Z_basic.columns
X_names = Z_names.insert(0, y0.name)
print(X_names)

## Lasso estimation

### Based on BRT penalty

#### Post-single Lasso

Step 1: Calculate BRT penalty

In [None]:
# Make a function that calculates BRT.
def BRT(X_tilde,g):
    
    (N, p) = X_tilde.shape
    sigma = np.std(g, ddof=1)
    c = 1.1
    alpha = 0.05
    penalty_BRT= (c * sigma) / np.sqrt(N) * norm.ppf(1 - alpha / (2*p))

    return penalty_BRT

penalty_BRT = BRT(X_tilde,g)
print("lambda_BRT =",penalty_BRT.round(4))

Step 2: Lasso g using y0 and Z.

In [None]:
# Implied estimates and selection
fit_BRTgx = Lasso(penalty_BRT, max_iter=10000).fit(X_tilde,g)
coeff_BRTgx = fit_BRTgx.coef_
intercept_BCCHgx = fit_BCCHgx.intercept_

print('Intercept/constant: ', np.round(intercept_BCCHgx,3))
print('Coefficients: ', np.round(coeff_BRTgx,3))
Z_J_BRTgx = Z[:,coeff_BRTgx[1:]!=0] # Note: We use Z and not Z_stan

# Display number of variables in Z_J
print("The number of variables in Z_J is {}".format(Z_J_BRTgx.shape[1]))
selected_variables_BRTgx = (coeff_BRTgx != 0)
print('Selected varriables: ', X_names[selected_variables_BRTgx].to_list())

Step 3: Regress g using y0 and Z_J

In [None]:
# Add a constant to X
xx = np.column_stack((np.ones(N),y0,Z_J_BRTgx))
yy = np.array(g).reshape(-1,1)

# Calculate OLS estimate
coefs_BRT_PSL = la.inv(xx.T@xx)@xx.T@yy

#print the coefficients
pd.DataFrame(coefs_BRT_PSL, index=[['constant', 'lgdp_initial']+X_names[selected_variables_BRTgx].to_list()], columns=['gdp_growth']) #same as for the simple OLS above

Calculating the 95 % confidence interval for ${\beta}_{y0}$ (BRT_PSL)

In [None]:
# Estimate variance
res_BRT_PSL = yy - xx@coefs_BRT_PSL
SSR_BRT_PSL = res_BRT_PSL.T@res_BRT_PSL
sigma2_BRT_PSL = SSR_BRT_PSL/(N-xx.shape[1])
var_BRT_PSL = sigma2_BRT_PSL*la.inv(xx.T@xx)

# Calculate standard errors
se_BRT_PSL = np.sqrt(np.diagonal(var_BRT_PSL)).reshape(-1, 1)
se_BRT_PSL=se_BRT_PSL[1][0]

# Display standard error
print("se_BRT_PSL = ",se_BRT_PSL.round(5))

# Calculate the z statistic that corresponds to the 95% confidence interval of a two-sided test
q = norm.ppf(1-0.025)

# Calculate confidence interval
CI_low_BRT_PSL  = coefs_BRT_PSL[1][0]-q*se_BRT_PSL
CI_high_BRT_PSL = coefs_BRT_PSL[1][0]+q*se_BRT_PSL

# Display confidence interval
print("Confidence interval for β_y0 (BRT_PSL) = ",(CI_low_BRT_PSL.round(5),CI_high_BRT_PSL.round(5)))

**Conclusion (BRT_PSL): data is not consistent with beta convergence (${\beta}_{y0}$ is insignificant)**

### Post-double Lasso

Step 1 & 2: Same as for Post-single Lasso

Step 3: Lasso D using Z

In [None]:
penalty_BRTy0z = BRT(Z_tilde, y0)

In [None]:
# Run Lasso
fit_BRTy0z = Lasso(penalty_BRTy0z, max_iter=10000).fit(Z_tilde, y0)
coeff_BRTy0z=fit_BRTy0z.coef_
intercept_BRTy0z = fit_BRTy0z.intercept_

print('Intercept/constant: ', np.round(intercept_BRTy0z,5))
print('Coefficients: ', np.round(coeff_BRTy0z,5))

selected_variables_BRTy0z = (coeff_BRTy0z != 0)
print('Selected varriables: ', Z_names[selected_variables_BRTy0z].to_list())

Step 4: Estimate ${\beta}_{y0}$ (called 'alpha' in the slides)

In [None]:
# Calculate residuals
res_BRTgx = g - fit_BRTgx.predict(X_tilde)
res_BRTgxz = res_BRTgx + y0_tilde*coeff_BRTgx[0]
res_BRTy0z=y0-fit_BRTy0z.predict(Z_tilde)

# Calculate beta_y0
num = res_BRTy0z@res_BRTgxz
denom = res_BRTy0z@y0
coef_BRT_PDL = num/denom

# Display beta_y0
print("Coefficient for β_y0 (BRT_PDL) = ",coef_BRT_PDL.round(5))

Calculating the 95 % confidence interval for ${\beta}_{y0}$ (BRT_PDL)

In [None]:
# Calculate variance    
num = res_BRTy0z**2@res_BRTgx**2/N
denom = (res_BRTy0z.T@res_BRTy0z/N)**2
sigma2_BRT_PDL = num/denom

# Calculate standard error
se_BRT_PDL = np.sqrt(sigma2_BRT_PDL/N)

# Display standard error
print("se_BRT_PDL = ",se_BRT_PDL.round(5))

# Calculate the z statistic that corresponds to the 95% confidence interval of a two-sided test
q = norm.ppf(1-0.025)

# Calculate confidence interval
CI_low_BRT_PDL  = coef_BRT_PDL-q*se_BRT_PDL
CI_high_BRT_PDL = coef_BRT_PDL+q*se_BRT_PDL

# Display confidence interval
print("Confidence interval for β_y0 (BRT_PDL) = ",(CI_low_BRT_PDL.round(5),CI_high_BRT_PDL.round(5)))



**Conclusion (BRT_PDL): data is not consistent with beta convergence (${\beta}_{y0}$ is insignificant)**

### Based on BCCH penalty

#### Post-single Lasso

Step 1: Calculate BCCH penalty

In [None]:
def BCCH(X_tilde,g):

    # BCCH pilot penalty
    n,p = X.shape
    c = 1.1
    alpha = 0.05
    gXscale = (np.max((X_tilde.T ** 2) @ ((g-np.mean(g)) ** 2) / n)) ** 0.5
    penalty_pilot = c / np.sqrt(n) * norm.ppf(1-alpha/(2*p)) * gXscale # Note: Have divided by 2 due to Python definition of Lasso
    
    #Create predicted value using Lasso 
    pred = Lasso(alpha=penalty_pilot).fit(X_tilde,g).predict(X_tilde)

    # Updated penalty
    eps = g - pred #eps: epsilon/residuals 
    epsXscale = (np.max((X_tilde.T ** 2) @ (eps ** 2) / n)) ** 0.5
    penalty_BCCH = c / np.sqrt(n) * norm.ppf(1-alpha/(2*p))*epsXscale

    return penalty_BCCH

penalty_BCCH = BCCH(X_tilde,g)
print("lambda_BCCH =",penalty_BCCH.round(4))

Step 2: Lasso g using y0 and Z.

In [None]:
# Implied estimates and selection
fit_BCCHgx = Lasso(penalty_BCCH, max_iter=10000).fit(X_tilde,g)
coeff_BCCHgx = fit_BCCHgx.coef_
intercept_BCCHgx = fit_BCCHgx.intercept_

print('Intercept/constant: ', np.round(intercept_BCCHgx,3))
print('Coefficients: ', np.round(coeff_BCCHgx,3))
Z_J_BCCHgx = Z[:,coeff_BCCHgx[1:]!=0] # Note: We use Z and not Z_stan

# Display number of variables in Z_J
print("The number of variables in Z_J is {}".format(Z_J_BCCHgx.shape[1]))
selected_variables_BCCHgx = (coeff_BCCHgx != 0)
print('Selected varriables: ', X_names[selected_variables_BCCHgx].to_list())

Step 3 and 4 will yield the same result as for BRT_PSL, as BCCH_PSL return the same Z_J (containing no control variables) 

**Conclusion (BCCH_PSL): data is not consistent with beta convergence (${\beta}_{y0}$ is insignificant)**

### Post-double Lasso

Step 1 & 2: Same as for Post-single Lasso

Step 3: Lasso D using Z

In [None]:
penalty_BCCHy0z = BCCH(Z_tilde, y0)

In [None]:
# Run Lasso
fit_BCCHy0z = Lasso(penalty_BCCHy0z, max_iter=10000).fit(Z_tilde, y0)
coeff_BCCHy0z=fit_BCCHy0z.coef_
intercept_BCCHy0z = fit_BCCHy0z.intercept_

print('Intercept/constant: ', np.round(intercept_BCCHy0z,5))
print('Coefficients: ', np.round(coeff_BCCHy0z,5))

selected_variables_BCCHy0z = (coeff_BCCHy0z != 0)
print('Selected varriables: ', Z_names[selected_variables_BCCHy0z].to_list())

Step 4: Estimate ${\beta}_{y0}$ (called 'alpha' in the slides)

In [None]:
# Calculate residuals
res_BCCHgx = g - fit_BCCHgx.predict(X_tilde)
res_BCCHgxz = res_BCCHgx + y0_tilde*coeff_BCCHgx[0]
res_BCCHy0z=y0-fit_BCCHy0z.predict(Z_tilde)

# Calculate beta_y0
num = res_BCCHy0z@res_BCCHgxz
denom = res_BCCHy0z@y0
coef_BCCH_PDL = num/denom

# Display beta_y0
print("Coefficient for β_y0 (BCCH_PDL) = ",coef_BCCH_PDL.round(5))

Calculating the 95 % confidence interval for ${\beta}_{y0}$ (BCCH_PDL)

In [None]:
# Calculate variance    
num = res_BCCHy0z**2@res_BCCHgx**2/N
denom = (res_BCCHy0z.T@res_BCCHy0z/N)**2
sigma2_BCCH_PDL = num/denom

# Calculate standard error
se_BCCH_PDL = np.sqrt(sigma2_BCCH_PDL/N)

# Display standard error
print("se_BCCH_PDL = ",se_BCCH_PDL.round(5))

# Calculate the z statistic that corresponds to the 95% confidence interval of a two-sided test
q = norm.ppf(1-0.025)

# Calculate confidence interval
CI_low_BCCH_PDL  = coef_BCCH_PDL-q*se_BCCH_PDL
CI_high_BCCH_PDL = coef_BCCH_PDL+q*se_BCCH_PDL

# Display confidence interval
print("Confidence interval for β_y0 (BCCH_PDL) = ",(CI_low_BCCH_PDL.round(5),CI_high_BCCH_PDL.round(5)))



**Conclusion (BCCH_PDL): data is not consistent with beta convergence (${\beta}_{y0}$ is insignificant)**