# AME Project 1

In [2]:
# Import packeages 
import pandas as pd 
import numpy as np
from numpy import linalg as la
from tabulate import tabulate
from scipy.stats import chi2

# Import LinearModels.py file 
import LM_ex3 as lm

# Supress Future Warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Set autoreloads 
%load_ext autoreload
%autoreload 2

In [3]:
# Import data
dat = pd.read_csv('firms.csv')

In [4]:
# Defining dimentions 
N = dat.firmid.unique().size
T = dat.year.unique().size
assert dat.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Data has N=441 and T=12


In [5]:
# Making the data narrays
# a. y
y = dat.ldsa.values.reshape((N*T,1))

# b. x (constant, labour, capital)
ones = np.ones((N*T,1))
l = dat.lemp.values.reshape((N*T,1))
k = dat.lcap.values.reshape((N*T,1))
# i. Disregarding constant term, as data is demeaned
x = np.hstack([l, k])   

# c. set labels
label_y = 'Output'
label_x = ['Labor','Capital']

## Estimators

### Pooled OLS (PLOS)

In [6]:
# Estimate coefficients
ols_result = lm.estimate(y, x, '', T=T)

# Print table
lm.print_table((label_y, label_x), ols_result, title="Pooled OLS", floatfmt='.4f')

Pooled OLS
Dependent variable: Output

           Beta      Se    t-values
-------  ------  ------  ----------
Labor    0.6748  0.0102     66.4688
Capital  0.3100  0.0091     33.9269
R² = 0.914
σ² = 0.131


### Fixed Effects (FE) 

In [7]:
# Create transformation matrix
def demeaning_matrix(T):
    Q_T = np.eye(T) - np.tile(1/T,(T,T))
    return Q_T

# Print the matrix
Q_T = demeaning_matrix(T)

# Transform the data
y_demean = lm.perm(Q_T, y)
x_demean = lm.perm(Q_T, x)

# Create function to check rank of demeaned matrix, and return its eigenvalues.
def check_rank(x):
    print(f'Rank of demeaned x: {la.matrix_rank(x)}')
    lambdas, V = la.eig(x.T@x)
    np.set_printoptions(suppress=True)  # This is just to print nicely.
    print(f'Eigenvalues of within-transformed x: {lambdas.round(decimals=0)}')

# Check rank of demeaned x
check_rank(x_demean)

# Estimate FE OLS using the demeaned variables.
fe_result = lm.estimate(y_demean, x_demean, transform='fe', T=T)

# Print results
print('\n')
lm.print_table((label_y, label_x), fe_result, title='FE regression', floatfmt='.4f')

Rank of demeaned x: 2
Eigenvalues of within-transformed x: [ 58. 214.]


FE regression
Dependent variable: Output

           Beta      Se    t-values
-------  ------  ------  ----------
Labor    0.6942  0.0147     47.2447
Capital  0.1546  0.0130     11.9311
R² = 0.477
σ² = 0.018


### First-difference (FD)

In [8]:
# create transformation matrix
def fd_matrix(T):
    D_T = np.eye(T, k = 0) - np.eye(T, k=-1)
    D_T = D_T[1:]
    return D_T

# use the function to create the matrix
D_T = fd_matrix(T)

# Transform the data.
y_diff = lm.perm(D_T, y)
x_diff = lm.perm(D_T, x)

# Check rank of x_diff
check_rank(x_diff)

# Estimate FE OLS using the demeaned variables.
fd_result = lm.estimate(y_diff, x_diff, transform='fd', T=T-1)

# Print results
print('\n')
lm.print_table((label_y, label_x), fd_result, title='FD regression', floatfmt='.4f')

Rank of demeaned x: 2
Eigenvalues of within-transformed x: [47. 35.]


FD regression
Dependent variable: Output

           Beta      Se    t-values
-------  ------  ------  ----------
Labor    0.5487  0.0183     29.9635
Capital  0.0630  0.0191      3.3043
R² = 0.165
σ² = 0.014


### Between (BE)

In [9]:
# Transform the data
def mean_matrix(T):
    P_T = np.tile(1/T, (1,T))
    return P_T

P_T = mean_matrix(T)

# 
y_mean = lm.perm(P_T, y)
x_mean = lm.perm(P_T, x)

# Estimate 
be_result = lm.estimate(y_mean, x_mean, transform = 'be', T=T)
lm.print_table((label_y, label_x), be_result, title="Between Estimator", floatfmt='.4f')

Between Estimator
Dependent variable: Output

           Beta      Se    t-values
-------  ------  ------  ----------
Labor    0.6672  0.0343     19.4794
Capital  0.3188  0.0308     10.3348
R² = 0.923
σ² = 0.115


### Random Effects (RE)

In [10]:
# Calculate lambda (note lambda is a reserved keyword in Python, so we use _lambda instead)
sigma2_u = fe_result['sigma2']
sigma2_c = be_result['sigma2'] - sigma2_u/T
_lambda = 1 - np.sqrt((sigma2_u)/(sigma2_u + T*sigma2_c))

# Create the transformation matrix
def quasi_matrix(T,_lambda):
    return np.eye(T) - _lambda*P_T
C_T = quasi_matrix(T,_lambda)

# Transforming data
y_re = lm.perm(C_T,y)
x_re = lm.perm(C_T,x)

# Estimate 
re_result = lm.estimate(y_re, x_re, transform="re", T=T) 
lm.print_table((label_y, label_x), re_result, title="Random Effects", floatfmt='.4f', _lambda = _lambda)

Random Effects
Dependent variable: Output

           Beta      Se    t-values
-------  ------  ------  ----------
Labor    0.7197  0.0131     54.8368
Capital  0.1989  0.0117     17.0325
R² = 0.642
σ² = 0.018
λ = 0.887


### All Estimators

In [11]:
# Collecting all estimated parameters and standard errors
estimator_params = np.concatenate((ols_result['b_hat'], fe_result['b_hat'], fd_result['b_hat'], re_result['b_hat']), axis = 1).round(4)
estimator_se = np.concatenate((ols_result['se'], fe_result['se'], fd_result['se'], re_result['se']), axis = 1).round(4)

# Create model headers and row names
model_headers = ["OLS", "FE", "FD", "RE"]
row_names = np.array([["beta_L", "beta_K"]])

# Combine estimator_params and estimator_se
combined_values = []
for i in range(estimator_params.shape[0]):
    row_values = [
        f"{estimator_params[i, j]} ({estimator_se[i, j]})"
        for j in range(estimator_params.shape[1])
    ]
    combined_values.append(row_values)

# Convert combined_values to a NumPy array
combined_array = np.array(combined_values)

# Concatenate row_names and combined_array
est_comp = np.hstack((row_names.T, combined_array))

# Create the table using tabulate
est_table = tabulate(est_comp, headers=model_headers, floatfmt='.4f')

# Print the table
print(est_table)
print('Note: Parentheses contain standard errors')

        OLS              FE               FD               RE
------  ---------------  ---------------  ---------------  ---------------
beta_L  0.6748 (0.0102)  0.6942 (0.0147)  0.5487 (0.0183)  0.7197 (0.0131)
beta_K  0.31 (0.0091)    0.1546 (0.013)   0.063 (0.0191)   0.1989 (0.0117)
Note: Parentheses contain standard errors


## Tests

### Wald Test - Constant Return to Scale

In [12]:
# Setting up matrices
R = np.array([[1, 1]])  # Imposing restrictions
r = 1 

# Create function to perform Wald test
def Wald_test(params, cov, headers_col, headers_row, R=R, r=r):
    # a. Number of test of interest 
    M = params.shape[1]  
    # b. Create empty matrix
    out = np.zeros((3, M))
    # c. Loop over estimators
    for i in range(M):
        # i. Test statistic
        test_stat = (R @ params[:, i] - r) @ la.inv(R @ cov[i] @ R.T) @ (R @ params[:, i] - r)
        # ii. Critical value
        critical_value = chi2.ppf(0.95, r)  
        # iii. p-value
        p_value = chi2.sf(test_stat, r)     
        # iv. Store results
        out[:, i] = [test_stat, critical_value, p_value]
    # d. Make and print table
    table = tabulate(np.hstack((headers_row.T, out)), headers_col, floatfmt=".4f")
    print(table)

# Create headers for rows and columns 
headers_row = np.array([['Test value', 'Critical value', 'p-value']])
headers_col = ["POLS", "FE", "FD", "RE"]

# Extract the covariance matrices
estimator_cov = np.array([ols_result['cov'], fe_result['cov'], fd_result['cov'], re_result['cov']])

# Perform Wald test
Wald_test(estimator_params, estimator_cov, headers_col, headers_row)

                   POLS        FE        FD       RE
--------------  -------  --------  --------  -------
Test value      13.0086  135.2718  251.6338  74.2907
Critical value   3.8415    3.8415    3.8415   3.8415
p-value          0.0003    0.0000    0.0000   0.0000


### Hausman

In [13]:
# Unpack estimators and covariance matrices
b_fe = fe_result['b_hat']
b_re = re_result['b_hat']
cov_fe = fe_result['cov']
cov_re = re_result['cov']

# Calculate the test statistic
b_diff = b_fe - b_re 
cov_diff = cov_fe - cov_re
H = b_diff.T @ la.inv(cov_diff) @ b_diff

# Find critical value and p-value at 5% significance level of chi^2 with M degrees of freedom
M = estimator_params.shape[0] 
crit_val = chi2.ppf(0.95, M)
p_val = chi2.sf(H.item(), M)

# Print the results
print(f'The test statistic is {H.item():.2f}.')
print(f'The critical value at a 5% significance level is {crit_val:.2f}.')
print(f'The p-value is {p_val:.8f}.')

The test statistic is 73.64.
The critical value at a 5% significance level is 5.99.
The p-value is 0.00000000.


### Strict Exogeneity for FE Estimator

In [26]:
# Remove the last observed year
F_T = np.eye(T, k = 1)[:-1]
lab_lead = lm.perm(F_T, x[:, 0].reshape(-1, 1))
cap_lead = lm.perm(F_T, x[:, 1].reshape(-1, 1))

# Remove the last observed year
I_T =  np.eye(T, k = 0)[:-1]
x_exo = lm.perm(I_T, x)
y_exo = lm.perm(I_T, y)

# Add lab_lead and cap_lead to x_exo
x_exo = np.hstack((x_exo, cap_lead, lab_lead))

# Within transform the data
Q_T = demeaning_matrix(T-1)
yw_exo = lm.perm(Q_T, y_exo)
xw_exo = lm.perm(Q_T, x_exo)

# Estimate model
exo_test = lm.estimate(yw_exo, xw_exo, transform='fe', T=T-1)

# Print results
label_exo = label_x + ['Labor lead', 'Capital lead']
lm.print_table((label_y, label_exo), exo_test, title='Exogeneity test', floatfmt='.4f')

# Impose restrictions
R = np.array([[0,0,1,0],[0,0,0,1]])
r = np.zeros((2,1))

stat = (R@exo_test['b_hat'] - r).T@la.inv((R@exo_test['cov']@R.T))@(R@exo_test['b_hat'] - r)
crit_val = chi2.ppf(0.95, 2)
p_val = chi2.sf(stat.item(), 2)

print('\n')
print(f'The test statistic is {stat.item():.2f}.')
print(f'The critical value at a 5% significance level is {crit_val:.2f}.')
print(f'The p-value is {p_val:.8f}.')

Exogeneity test
Dependent variable: Output

                Beta      Se    t-values
------------  ------  ------  ----------
Labor         0.5408  0.0234     23.0904
Capital       0.0280  0.0230      1.2153
Labor lead    0.1667  0.0258      6.4706
Capital lead  0.1419  0.0225      6.3134
R² = 0.478
σ² = 0.016


The test statistic is 88.59.
The critical value at a 5% significance level is 5.99.
The p-value is 0.00000000.


### Strict Exogeneity for FD Estimator

In [35]:
F_T = np.eye(T, k=1)[:-1]
print(F_T)

# Lead capital and labor
lab_lead = lm.perm(F_T, x[:, 0].reshape(-1, 1))
#print(x[:, 0].reshape(-1, 1).shape)
print(lab_lead.shape)
cap_lead = lm.perm(F_T, x[:, 1].reshape(-1, 1))
print(cap_lead.shape)

# Remove the last observed year for every individial
I_T =  np.eye(T, k = 0)[:-1]
#print(I_T)
x_exo = lm.perm(I_T, x)
print(x_exo.shape)
y_exo = lm.perm(I_T, y)

# Add cap_lead and lab_lead to x_exo
x_exo = np.hstack((x_exo, cap_lead, lab_lead))

# Within transform the data
Q_T = demeaning_matrix(T-1)
yw_exo = lm.perm(Q_T, y_exo)
xw_exo = lm.perm(Q_T, x_exo)

# Estimate model
exo_test = lm.estimate(yw_exo, xw_exo, transform='fd', T=T-1)

# Print results
label_exo = label_x + ['Capital lead', 'Labor lead']
lm.print_table((label_y, label_exo), exo_test, title='Exogeneity test', floatfmt='.4f')

# Impose restrictions
R = np.array([[0,0,1,0],[0,0,0,1]])
r = np.zeros((2,1))

stat = (R@exo_test['b_hat'] - r).T@la.inv((R@exo_test['cov']@R.T))@(R@exo_test['b_hat'] - r)
crit_val = chi2.ppf(0.95, 2)
p_val = chi2.sf(stat.item(), 2)

print('\n')
print(f'The test statistic is {stat.item():.2f}.')
print(f'The critical value at a 5% significance level is {crit_val:.2f}.')
print(f'The p-value is {p_val:.8f}.')

[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]
(4851, 1)
(4851, 1)
(4851, 2)
Exogeneity test
Dependent variable: Output

                Beta      Se    t-values
------------  ------  ------  ----------
Labor         0.5408  0.0223     24.2184
Capital       0.0280  0.0220      1.2746
Capital lead  0.1667  0.0246      6.7867
Labor lead    0.1419  0.0214      6.6218
R² = 0.478
σ² = 0.015


The test statistic is 97.45.
The critical value at a 5% significance level is 5.99.
The p-value is 0.00000000.
