# AME Project 1

In [67]:
# Import packeages 
import pandas as pd 
import numpy as np
import seaborn as sns
from numpy import linalg as la
from io import StringIO
from tabulate import tabulate
from matplotlib import pyplot as plt
from scipy.stats import chi2

# Import LinearModels.py file 
import LM_ex3 as lm

# Supress Future Warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Set autoreloads 
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [68]:
# Import data
dat = pd.read_csv('firms.csv')

In [69]:
# Defining dimentions 
N = dat.firmid.unique().size
T = dat.year.unique().size
assert dat.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Data has N=441 and T=12


In [70]:
# Making the data narrays
# a. y
y = dat.ldsa.values.reshape((N*T,1))

# b. x (constant, labour, capital)
ones = np.ones((N*T,1))
l = dat.lemp.values.reshape((N*T,1))
k = dat.lcap.values.reshape((N*T,1))
# i. stack the arrays
x = np.hstack([l, k])   # Disregarding constant term, as data is demeaned

# c. set labels
label_y = 'sales'
label_x = ['labour','capital']

## Estimators

### Pooled OLS (PLOS)

In [71]:
# Estimate coefficients
ols_result = lm.estimate(y, x, '', T=T)

# Print table
lm.print_table((label_y, label_x), ols_result, title="Pooled OLS", floatfmt='.4f')

Pooled OLS
Dependent variable: sales

           Beta      Se    t-values
-------  ------  ------  ----------
labour   0.6748  0.0102     66.4688
capital  0.3100  0.0091     33.9269
R² = 0.914
σ² = 0.131


### Fixed Effects (FE) 

In [72]:
# Create transformation matrix
def demeaning_matrix(T):
    Q_T = np.eye(T) - np.tile(1/T,(T,T))
    return Q_T

# Print the matrix
Q_T = demeaning_matrix(T)

# Transform the data
y_demean = lm.perm(Q_T, y)
x_demean = lm.perm(Q_T, x)

# Create function to check rank of demeaned matrix, and return its eigenvalues.
def check_rank(x):
    print(f'Rank of demeaned x: {la.matrix_rank(x)}')
    lambdas, V = la.eig(x.T@x)
    np.set_printoptions(suppress=True)  # This is just to print nicely.
    print(f'Eigenvalues of within-transformed x: {lambdas.round(decimals=0)}')

# Check rank of demeaned x
check_rank(x_demean)

# Estimate FE OLS using the demeaned variables.
fe_result = lm.estimate(y_demean, x_demean, transform='fe', T=T)

# Print results
print('\n')
lm.print_table((label_y, label_x), fe_result, title='FE regression', floatfmt='.4f')

Rank of demeaned x: 2
Eigenvalues of within-transformed x: [ 58. 214.]


FE regression
Dependent variable: sales

           Beta      Se    t-values
-------  ------  ------  ----------
labour   0.6942  0.0147     47.2447
capital  0.1546  0.0130     11.9311
R² = 0.477
σ² = 0.018


### First-difference (FD)

In [73]:
# create transformation matrix
def fd_matrix(T):
    D_T = np.eye(T, k = 0) - np.eye(T, k=-1)
    D_T = D_T[1:]
    return D_T

# use the function to create the matrix
D_T = fd_matrix(T)

# Transform the data.
y_diff = lm.perm(D_T, y)
x_diff = lm.perm(D_T, x)

# Check rank of x_diff
check_rank(x_diff)

# Estimate FE OLS using the demeaned variables.
fd_result = lm.estimate(y_diff, x_diff, transform='fd', T=T-1)

# Print results
print('\n')
lm.print_table((label_y, label_x), fd_result, title='FD regression', floatfmt='.4f')

Rank of demeaned x: 2
Eigenvalues of within-transformed x: [47. 35.]


FD regression
Dependent variable: sales

           Beta      Se    t-values
-------  ------  ------  ----------
labour   0.5487  0.0183     29.9635
capital  0.0630  0.0191      3.3043
R² = 0.165
σ² = 0.014


### Between (BE)

In [74]:
# Transform the data
def mean_matrix(T):
    P_T = np.tile(1/T, (1,T))
    return P_T

P_T = mean_matrix(T)

# 
y_mean = lm.perm(P_T, y)
x_mean = lm.perm(P_T, x)

# Estimate 
be_result = lm.estimate(y_mean, x_mean, transform = 'be', T=T)
lm.print_table((label_y, label_x), be_result, title="Between Estimator", floatfmt='.4f')

Between Estimator
Dependent variable: sales

           Beta      Se    t-values
-------  ------  ------  ----------
labour   0.6672  0.0343     19.4794
capital  0.3188  0.0308     10.3348
R² = 0.923
σ² = 0.115


### Random Effects (RE)

In [75]:
# Calculate lambda (note lambda is a reserved keyword in Python, so we use _lambda instead)
sigma2_u = fe_result['sigma2']
sigma2_c = be_result['sigma2'] - sigma2_u/T
_lambda = 1 - np.sqrt((sigma2_u)/(sigma2_u + T*sigma2_c))

# Create the transformation matrix
def quasi_matrix(T,_lambda):
    return np.eye(T) - _lambda*P_T
C_T = quasi_matrix(T,_lambda)

# Transforming data
y_re = lm.perm(C_T,y)
x_re = lm.perm(C_T,x)

# Estimate 
re_result = lm.estimate(y_re, x_re, transform="re", T=T) 
lm.print_table((label_y, label_x), re_result, title="Random Effects", floatfmt='.4f', _lambda = _lambda)

Random Effects
Dependent variable: sales

           Beta      Se    t-values
-------  ------  ------  ----------
labour   0.7197  0.0131     54.8368
capital  0.1989  0.0117     17.0325
R² = 0.642
σ² = 0.018
λ = 0.887


### All Estimators

In [87]:
estimator_params = np.concatenate((ols_result['b_hat'], fe_result['b_hat'], fd_result['b_hat'], re_result['b_hat']), axis = 1).round(decimals=4)

estimator_se = np.concatenate((ols_result['se'], fe_result['se'], fd_result['se'], re_result['se']), axis = 1).round(decimals=4)



for i in 

combined_array = np.concatenate((estimator_params, estimator_se), axis=1)

# Create an array with parentheses
parentheses = np.array([['(', ')']])

# Repeat the parentheses array to match the number of rows in combined_array
parentheses = np.repeat(parentheses, combined_array.shape[0], axis=0)

# Combine the parentheses and combined_array to insert parentheses around columns
result_array = np.hstack((parentheses, combined_array))

model_headers = ["OLS","FE","FD","RE"]

row_names = np.array([["beta_L", "beta_K"]])

est_comp = np.concatenate((row_names.T, result_array), axis = 1)

est_table = tabulate(est_comp, model_headers, floatfmt= '.4f')
print(est_table)


SyntaxError: invalid syntax (3632940328.py, line 7)

## Tests

### Wald test

In [81]:
# Setting up matrices
R = np.array([[1,1]])   # Imposing restrictions
r = 1

def Wald_test(params, cov, headers, R=R, r=1):
    """
    """
    M = params.shape[1]     # Number of models of interest
    out = np.zeros((3, M))
    for i in range(M):
        out[0,i] = (R@params[:,i].reshape(-1,1) - r).T@la.inv((R@cov[i,:,:]@R.T))@(R@params[:,i].reshape(-1,1) - r)
        out[1,i] = chi2.isf(0.05, df = 1)
        out[2,i] = chi2.sf(out[0,i], 1)
    table = tabulate(out, headers, floatfmt=".2f")
    print(table)

model_headers = ["POLS","FE","FD","RE"]
est_cov = np.array((ols_result['cov'], fe_result['cov'], fd_result['cov'], be_result['cov'], re_result['cov']))
Wald_test(est_params, est_cov, model_headers)

NameError: name 'est_params' is not defined

### Hausman

In [80]:
# Unpack
b_fe = fe_result['b_hat']
b_re = re_result['b_hat'][1:5]
cov_fe = fe_result['cov']
cov_re = re_result['cov'][1:5,1:5]

# Calculate the test statistic
b_diff = b_fe - b_re 
cov_diff = cov_fe - cov_re
H = b_diff.T @ la.inv(cov_diff) @ b_diff

# Find critical value and p-value at 5% significance level of chi^2 with M degrees of freedom
M = Q_T.shape[0] 
crit_val = chi2.ppf(0.95, M)
p_val = chi2.sf(H.item(), M)

# Print the results
print(f'The test statistic is {H.item():.2f}.')
print(f'The critical value at a 5% significance level is {crit_val:.2f}.')
print(f'The p-value is {p_val:.8f}.')

The test statistic is 49.36.
The critical value at a 5% significance level is 19.68.
The p-value is 0.00000081.


### Strict Exogeneity for FE Estimator

In [77]:
F_T = np.eye(T, k=1)[:-1]

# Lead capital
cap_lead = lm.perm(F_T, x[:, 1].reshape(-1, 1))

# Lead labour
lab_lead = lm.perm(F_T, x[:, 0].reshape(-1, 1))

# Remove the last observed year for every individial
I_T =  np.eye(T, k = 0)[:-1]

x_exo = lm.perm(I_T, x)
y_exo = lm.perm(I_T, y)

# Add cap_lead and lab_lead to x_exo
x_exo = np.hstack((x_exo, cap_lead, lab_lead))

# Within transform the data
Q_T = demeaning_matrix(T-1)

yw_exo = lm.perm(Q_T, y_exo)
xw_exo = lm.perm(Q_T, x_exo)

# Estimate model
exo_test = lm.estimate(yw_exo, xw_exo, transform='fe', T=T-1)

# Print results
label_exo = label_x + ['Capital lead', 'Labor lead']
lm.print_table((label_y, label_exo), exo_test, title='Exogeneity test', floatfmt='.4f')

# Skal der mere til testen? som fx nogle test-statistics? 

Exogeneity test
Dependent variable: sales

                Beta      Se    t-values
------------  ------  ------  ----------
labour        0.5408  0.0234     23.0904
capital       0.0280  0.0230      1.2153
Capital lead  0.1667  0.0258      6.4706
Labor lead    0.1419  0.0225      6.3134
R² = 0.478
σ² = 0.016


### Strict Exogeneity for FD Estimator

In [78]:
F_T = np.eye(T, k=1)[:-1]

# Lead capital
cap_lead = lm.perm(F_T, x[:, 1].reshape(-1, 1))

# Lead labour
lab_lead = lm.perm(F_T, x[:, 0].reshape(-1, 1))

# Remove the last observed year for every individial
I_T =  np.eye(T, k = 0)[:-1]

x_exo = lm.perm(I_T, x)
y_exo = lm.perm(I_T, y)

# Add cap_lead and lab_lead to x_exo
x_exo = np.hstack((x_exo, cap_lead, lab_lead))

# Within transform the data
Q_T = demeaning_matrix(T-1)

yw_exo = lm.perm(Q_T, y_exo)
xw_exo = lm.perm(Q_T, x_exo)

# Estimate model
exo_test = lm.estimate(yw_exo, xw_exo, transform='fd', T=T-1)

# Print results
label_exo = label_x + ['Capital lead', 'Labor lead']
lm.print_table((label_y, label_exo), exo_test, title='Exogeneity test', floatfmt='.4f')

# Skal der mere til testen? som fx nogle test-statistics? 

Exogeneity test
Dependent variable: sales

                Beta      Se    t-values
------------  ------  ------  ----------
labour        0.5408  0.0223     24.2184
capital       0.0280  0.0220      1.2746
Capital lead  0.1667  0.0246      6.7867
Labor lead    0.1419  0.0214      6.6218
R² = 0.478
σ² = 0.015
