# AME Project 1

In [1]:
# a. import packeages 
import pandas as pd 
import numpy as np
from numpy import linalg as la
from tabulate import tabulate
from scipy.stats import chi2

# b. import LinearModels.py file 
import LinearModels as lm

# c. supress Future Warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# d. set autoreloads 
%load_ext autoreload
%autoreload 2

In [2]:
# a. import data
dat = pd.read_csv('firms.csv')
dat = dat[dat.year <= 1970].reset_index(drop=True)

In [3]:
# b. defining dimentions 
N = dat.firmid.unique().size
T = dat.year.unique().size
assert dat.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Data has N=441 and T=3


In [4]:
# a. making the data narrays
# i. y
y = dat.ldsa.values.reshape((N*T,1))

# ii. x (labour, capital)
l = dat.lemp.values.reshape((N*T,1))
k = dat.lcap.values.reshape((N*T,1))
x = np.hstack([l, k])   

# iii. set labels
label_y = 'Output'
label_x = ['Labor','Capital']

## Estimators

### Pooled OLS (PLOS)

In [5]:
# a. estimate coefficients
ols_result = lm.estimate(y, x, '', T=T, robust = True)

# b. print table
lm.print_table((label_y, label_x), ols_result, title="OLS", floatfmt='.4f')

OLS
Dependent variable: Output

           Beta      Se    t-values  p-value
-------  ------  ------  ----------  ------------
Labor    0.6860  0.0208     32.9632  0.0000 (***)
Capital  0.2768  0.0183     15.1375  0.0000 (***)
R² = 0.917
σ² = 0.127


### Fixed Effects (FE) 

In [6]:
# a. create demeaning matrix and use it
def demeaning_matrix(T):
    Q_T = np.eye(T) - np.tile(1/T,(T,T))
    return Q_T
Q_T = demeaning_matrix(T)

# b. demean data
y_demean = lm.perm(Q_T, y)
x_demean = lm.perm(Q_T, x)

# c. create function to check rank of demeaned matrix, and return its eigenvalues
def check_rank(x):
    print(f'Rank of demeaned x: {la.matrix_rank(x)}')
    lambdas, V = la.eig(x.T@x)
    np.set_printoptions(suppress=True)  # This is just to print nicely.
    print(f'Eigenvalues of x: {lambdas.round(decimals=0)}')

# d. check rank of demeaned x
check_rank(x_demean)

# e. estimate FE using the demeaned variables
fe_result = lm.estimate(y_demean, x_demean, transform='fe', T=T, robust=True)

# f. print results
print('\n')
lm.print_table((label_y, label_x), fe_result, title='FE regression', floatfmt='.4f')

Rank of demeaned x: 2
Eigenvalues of x: [9. 5.]


FE regression
Dependent variable: Output

           Beta      Se    t-values  p-value
-------  ------  ------  ----------  ------------
Labor    0.6004  0.0497     12.0916  0.0000 (***)
Capital  0.0502  0.0477      1.0533  0.2924
R² = 0.284
σ² = 0.008


### First-difference (FD)

In [7]:
# a. create transformation matrix function and use it
def fd_matrix(T):
    D_T = np.eye(T, k = 0) - np.eye(T, k=-1)
    D_T = D_T[1:]
    return D_T
D_T = fd_matrix(T)

# b. transform the data
y_diff = lm.perm(D_T, y)
x_diff = lm.perm(D_T, x)

# c. check rank of x_diff
check_rank(x_diff)

# d. estimate FD using the transformed variables
fd_result = lm.estimate(y_diff, x_diff, transform='fd', T=T-1, robust=True)

# e. Print results
print('\n')
lm.print_table((label_y, label_x), fd_result, title='FD regression', floatfmt='.4f')

Rank of demeaned x: 2
Eigenvalues of x: [11.  7.]


FD regression
Dependent variable: Output

           Beta      Se    t-values  p-value
-------  ------  ------  ----------  ------------
Labor    0.5509  0.0491     11.2173  0.0000 (***)
Capital  0.0381  0.0493      0.7723  0.4401
R² = 0.217
σ² = 0.013


### Between (BE)

In [8]:
# a. create transformation matrix function and use it
def mean_matrix(T):
    P_T = np.tile(1/T, (1,T))
    return P_T
P_T = mean_matrix(T)

# b. transform the data
y_mean = lm.perm(P_T, y)
x_mean = lm.perm(P_T, x)

# c. check rank of x_mean
check_rank(x_mean)

# d. estimate 
be_result = lm.estimate(y_mean, x_mean, transform = 'be', T=T, robust=True)

# e. print results
print('\n')
lm.print_table((label_y, label_x), be_result, title="Between Estimator", floatfmt='.4f')

Rank of demeaned x: 2
Eigenvalues of x: [  64. 1403.]


Between Estimator
Dependent variable: Output

           Beta      Se    t-values  p-value
-------  ------  ------  ----------  ------------
Labor    0.6856  0.0356     19.2572  0.0000 (***)
Capital  0.2778  0.0314      8.8594  0.0000 (***)
R² = 0.921
σ² = 0.122


### Random Effects (RE)

In [9]:
# a. calculate lambda from sigma2_u and sigma2_c
sigma2_u = fe_result['sigma2']
sigma2_c = be_result['sigma2'] - sigma2_u/T
_lambda = 1 - np.sqrt((sigma2_u)/(sigma2_u + T*sigma2_c))

# b. create the transformation matrix and use it
def quasi_matrix(T,_lambda):
    return np.eye(T) - _lambda*P_T
C_T = quasi_matrix(T,_lambda)

# c. transforming data
y_re = lm.perm(C_T,y)
x_re = lm.perm(C_T,x)

# d. check rank of x_re
check_rank(x_re)

# e. estimate 
re_result = lm.estimate(y_re, x_re, transform="re", T=T, robust=True, sigma2_u = sigma2_u, sigma2_c = sigma2_c) 

# f. print results
print('\n')
lm.print_table((label_y, label_x), re_result, title="Random Effects", floatfmt='.4f', _lambda = _lambda)

Rank of demeaned x: 2
Eigenvalues of x: [  9. 102.]


Random Effects
Dependent variable: Output

           Beta      Se    t-values  p-value
-------  ------  ------  ----------  ------------
Labor    0.6912  0.0490     14.0963  0.0000 (***)
Capital  0.2477  0.0468      5.2920  0.0000 (***)
R² = 0.797
σ² = 0.008
λ = 0.851


### All Estimators

In [10]:
# a. collecting all estimated parameters and standard errors
estimator_params = np.concatenate((ols_result['b_hat'], fe_result['b_hat'], fd_result['b_hat'], re_result['b_hat']), axis = 1).round(4)
estimator_se = np.concatenate((ols_result['se'], fe_result['se'], fd_result['se'], re_result['se']), axis = 1).round(4)
estimator_ast = np.concatenate((ols_result['ast'], fe_result['ast'], fd_result['ast'], re_result['ast']), axis = 1)

# b. create model headers and row names
model_headers = ["OLS", "FE", "FD", "RE"]
row_names = np.array([["beta_L", "beta_K"]])

# c. combine estimator_params and estimator_se
combined_values = []
for i in range(estimator_params.shape[0]):
    # i. combining estimator_params and estimator_se in parenthesis
    row_values = [
        f"{estimator_params[i, j]} ({estimator_se[i, j]}) {estimator_ast[i, j]}"
        for j in range(estimator_params.shape[1])
    ]
    # ii. appending row_values
    combined_values.append(row_values)

# d. convert combined_values to a NumPy array
combined_array = np.array(combined_values)

# e. concatenate row_names and combined_array
est_comp = np.hstack((row_names.T, combined_array))

# f. create the table using tabulate
est_table = tabulate(est_comp, headers=model_headers, floatfmt='.4f')

# g. print the table
print('All estimators:')
print(est_table)
print('Note: Parentheses contain standard errors and asterisks contain significans level')

All estimators:
        OLS                  FE                   FD                   RE
------  -------------------  -------------------  -------------------  -------------------
beta_L  0.686 (0.0208) ***   0.6004 (0.0497) ***  0.5509 (0.0491) ***  0.6912 (0.049) ***
beta_K  0.2768 (0.0183) ***  0.0502 (0.0477)      0.0381 (0.0493)      0.2477 (0.0468) ***
Note: Parentheses contain standard errors and asterisks contain significans level


## Tests

### Wald Test - Constant Return to Scale

In [11]:
# a. imposing restrictions
R = np.array([[1, 1]])  
r = 1
Q = 1

# b. create function to perform Wald test
def Wald_test(params, cov, headers_col, headers_row, R=R, r=r):
    # i. Number of test of interest 
    M = params.shape[1]  
    # ii. Create empty matrix
    out = np.zeros((3, M))
    # iii. Loop over estimators
    for i in range(M):
        # o. Test statistic
        test_stat = (R @ params[:, i] - r) @ la.inv(R @ cov[i] @ R.T) @ (R @ params[:, i] - r)
        # oo. Critical value
        critical_value = chi2.ppf(0.95, df = Q)  
        # ooo. p-value
        p_value = chi2.sf(test_stat, df = Q)     
        # oooo. Store results
        out[:, i] = [test_stat, critical_value, p_value]
    # iv. Make and print table
    table = tabulate(np.hstack((headers_row.T, out)), headers_col, floatfmt=".4f")
    print('Wald Test:')
    print(table)

# c. create headers for rows and columns 
headers_row = np.array([['Test value', 'Critical value', 'p-value']])
headers_col = ["POLS", "FE", "FD", "RE"]

# d. extract the covariance matrices
estimator_cov = np.array([ols_result['cov'], fe_result['cov'], fd_result['cov'], re_result['cov']])

# f. perform Wald test
Wald_test(estimator_params, estimator_cov, headers_col, headers_row)

Wald Test:
                   POLS       FE       FD      RE
--------------  -------  -------  -------  ------
Test value      24.8805  38.6404  40.3613  1.5202
Critical value   3.8415   3.8415   3.8415  3.8415
p-value          0.0000   0.0000   0.0000  0.2176


### Hausman

In [12]:
# a. unpack estimators and covariance matrices
b_fe = fe_result['b_hat']
b_re = re_result['b_hat']
cov_fe = fe_result['cov']
cov_re = re_result['cov']

# b. calculate the test statistic
b_diff = b_fe - b_re 
cov_diff = cov_fe - cov_re
H = b_diff.T @ la.inv(cov_diff) @ b_diff

# c. find critical value and p-value at 5% significance level of chi^2 with M degrees of freedom
K = b_fe.shape[0]
crit_val = chi2.ppf(0.95, df = K)
p_val = chi2.sf(H.item(), df = K)

# d. Print the results
print('Hausman Test:')
print(f'The test statistic is {H.item():.2f}.')
print(f'The critical value at a 5% significance level is {crit_val:.2f}.')
print(f'The p-value is {p_val:.8f}.')

Hausman Test:
The test statistic is 95.16.
The critical value at a 5% significance level is 5.99.
The p-value is 0.00000000.


### Strict Exogeneity for FE Estimator

In [13]:
# a. remove the last observed year for lead 
F_T = np.eye(T, k = 1)[:-1]
lab_w = lm.perm(F_T, x[:, 0].reshape(-1, 1))

# b. remove the last observed year
I_T =  np.eye(T, k = 0)[:-1]
x_exo = lm.perm(I_T, x)
y_exo = lm.perm(I_T, y)

# c. add lab_lead and cap_lead to x_exo
x_exo = np.hstack((x_exo, lab_w))

# d. within transform the data
Q_T = demeaning_matrix(T-1)
yw_exo = lm.perm(Q_T, y_exo)
xw_exo = lm.perm(Q_T, x_exo)

# e. estimate FE model
exo_test = lm.estimate(yw_exo, xw_exo, transform='fe', T=T-1)

# f. print results
label_exo = label_x + ['Labor w']
lm.print_table((label_y, label_exo), exo_test, title='Exogeneity test of FE', floatfmt='.4f')

# g. impose restrictions and degrees of freedom
R = np.array([[0,0,1]])
r = np.zeros((1,1))
Q = r.shape[0]

# h. calculate test statistic, critical value and p-value
stat = (R@exo_test['b_hat'] - r).T@la.inv((R@exo_test['cov']@R.T))@(R@exo_test['b_hat'] - r)
crit_val = chi2.ppf(0.95, df = Q)
p_val = chi2.sf(stat.item(), df = Q)

# i. print results
print('\n')
print(f'The test statistic is {stat.item():.2f}.')
print(f'The critical value at a 5% significance level is {crit_val:.2f}.')
print(f'The p-value is {p_val:.8f}.')

Exogeneity test of FE
Dependent variable: Output

           Beta      Se    t-values  p-value
-------  ------  ------  ----------  ------------
Labor    0.4797  0.0507      9.4576  0.0000 (***)
Capital  0.0835  0.0616      1.3564  0.1753
Labor w  0.1608  0.0501      3.2126  0.0014 (**)
R² = 0.220
σ² = 0.006


The test statistic is 10.32.
The critical value at a 5% significance level is 3.84.
The p-value is 0.00131533.


### Strict Exogeneity for FD Estimator

In [14]:
# a. remove first observation and thus create lag capital and labor
F_T = np.eye(T, k=-1)[1:] 
lab_w = lm.perm(F_T, x[:, 0].reshape(-1, 1))

# b. FD transform the data
D_T = fd_matrix(T)
yw_exo = lm.perm(D_T, y)
xw_exo = lm.perm(D_T, x)

# c. stack x and lab_w (lab_w should not be FD transformed)
xw_exo_new = np.hstack((xw_exo, lab_w))

# d. estimate model
exo_test = lm.estimate(yw_exo, xw_exo_new, transform='fd', T=T-1)

# e. print results
label_exo = label_x + ['Labor w']
lm.print_table((label_y, label_exo), exo_test, title='Exogeneity test of FD', floatfmt='.4f')

# f. impose restrictions and degrees of freedom
R = np.array([[0,0,1]])
r = np.zeros((1,1))
Q = r.shape[0]

# g. calculate test statistic, critical value and p-value
stat = (R@exo_test['b_hat'] - r).T@la.inv((R@exo_test['cov']@R.T))@(R@exo_test['b_hat'] - r)
crit_val = chi2.ppf(0.95, df = Q)
p_val = chi2.sf(stat.item(), df = Q) 

# h. print results
print('\n')
print(f'The test statistic is {stat.item():.2f}.')
print(f'The critical value at a 5% significance level is {crit_val:.2f}.')
print(f'The p-value is {p_val:.8f}.')

Exogeneity test of FD
Dependent variable: Output

            Beta      Se    t-values  p-value
-------  -------  ------  ----------  ------------
Labor     0.5450  0.0368     14.8153  0.0000 (***)
Capital   0.0289  0.0437      0.6596  0.5097
Labor w  -0.0045  0.0033     -1.3614  0.1737
R² = 0.219
σ² = 0.013


The test statistic is 1.85.
The critical value at a 5% significance level is 3.84.
The p-value is 0.17339947.


### Test for serial correlation in the errors of FD
#### An auxilliary AR(1) model

In [15]:
# a. make function to calculate the serial correlation
def serial_corr(y, x, T):
    # i. calculate the residuals
    b_hat = la.inv(x.T @ x) @ (x.T @ y)
    e = y - (x @ b_hat)
    
    # ii. create a lag transformation matrix
    L_T = np.eye(T, k = -1)[1:]
    
    # iii. lag residuals
    e_l = lm.perm(L_T, e)

    # iv. create a transformation matrix that removes the first observation of each individal
    I_T = np.eye(T, k = 0)[1:]
    
    # v. remove first observation of each individual
    e = lm.perm(I_T, e)
    
    # vi. calculate the serial correlation
    return lm.estimate(e, e_l, transform='fd', T=T-1)

# b. estimate serial correlation
corr_result = serial_corr(y_diff, x_diff, T-1)

# c. print results
label_ye = 'OLS residual, e\u1d62\u209c'
label_e = ['e\u1d62\u209c\u208B\u2081']
lm.print_table((label_ye, label_e), corr_result, title='Serial Correlation', floatfmt='.4f')

Serial Correlation
Dependent variable: OLS residual, eᵢₜ

          Beta      Se    t-values  p-value
-----  -------  ------  ----------  ------------
eᵢₜ₋₁  -0.1849  0.0483     -3.8295  0.0001 (***)
R² = 0.032
σ² = 0.013
