# Linear Panel Estimations



In [144]:
import pandas as pd 
import numpy as np
import seaborn as sns
from numpy import linalg as la
from scipy.stats import chi2
from tabulate import tabulate
import LinearModelsProject1 as lm
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [145]:

## Reading data from the csv file
data = pd.read_csv("firms.csv")

N_list = data.firmid.unique()
T_list = data.year.unique()

N = data.firmid.unique().size
T = data.year.unique().size

y = data.ldsa.values.reshape((N*T,1))
l = data.lemp.values.reshape((N*T,1))
k = data.lcap.values.reshape((N*T,1))

constant = np.ones((y.shape[0], 1))
X = np.hstack([constant, l, k])





In [146]:
# Usi the POLS method to estimate parameters

label_x = ["c", "Log Employment", "Log Adjusted Capital"]
label_y = "Log Deflated Sales"


ols_result = lm.estimate(y, X)

lm.print_table(
    (label_y, label_x), ols_result, title="Pooled OLS", floatfmt='.4f'
)


Pooled OLS
Dependent variable: Log Deflated Sales

                        Beta      Se    t-values
--------------------  ------  ------  ----------
c                     0.0000  0.0050      0.0000
Log Employment        0.6748  0.0102     66.4625
Log Adjusted Capital  0.3100  0.0091     33.9237
R² = 0.914
σ² = 0.131


In [147]:
# Estimate using the FE method

def demeaning_matrix(T):
    Q_T = np.eye(T) - np.tile(1/T, (T, T))
    return Q_T

Q_T = demeaning_matrix(T)


y_demean = lm.perm(Q_T, y)
x_demean = lm.perm(Q_T, X)
x_demean = x_demean[:, 1:]
label_x_fe = label_x[1:]


fe_result = lm.estimate(
    y_demean, x_demean, transform='fe', T=T, robust_se=True
)


lm.print_table(
    (label_y,label_x_fe), 
    fe_result, title='FE regression', floatfmt='.4'
)



FE regression
Dependent variable: Log Deflated Sales

                        Beta       Se    t-values
--------------------  ------  -------  ----------
Log Employment        0.6942  0.04165      16.67
Log Adjusted Capital  0.1546  0.02995       5.163
R² = 0.477
σ² = 0.018


In [148]:
# Estimate using the FD method

def fd_matrix(T):
    D_T = np.eye(T) - np.eye(T, k=-1)
    D_T = D_T[1:]
    return D_T

D_T = fd_matrix(T)

y_diff = lm.perm(D_T, y)
x_diff = lm.perm(D_T, X[:,1:])
fd_result = lm.estimate(y_diff, x_diff, transform='fd', T=T, robust_se=True)

lm.print_table(
    (label_y, label_x[1:]), 
    fd_result, title='FD regression', floatfmt='.4f'
)


FD regression
Dependent variable: Log Deflated Sales

                        Beta      Se    t-values
--------------------  ------  ------  ----------
Log Employment        0.5487  0.0284     19.3056
Log Adjusted Capital  0.0630  0.0229      2.7460
R² = 0.165
σ² = 0.014


In [149]:
# Estimate using the RE method

def mean_matrix(T):
    return np.tile(1/T, (1, T))
P_T = mean_matrix(T)

y_mean = lm.perm(P_T, y)
x_mean = lm.perm(P_T, X)

be_result = lm.estimate(
    y_mean, x_mean, transform='be', robust_se=True)

sigma_u = fe_result['sigma2']
sigma_c = be_result['sigma2'] - sigma_u/T
_lambda = 1 - np.sqrt(sigma_u/(sigma_u + T*sigma_c))

C_t = np.eye(T) - _lambda*mean_matrix(T)

x_re = lm.perm(C_t, X)
y_re = lm.perm(C_t, y)

re_result = lm.estimate(
    y_re, x_re, transform='re', T=T, robust_se=True
)

lm.print_table(
    labels=(label_y, label_x), results=re_result, _lambda=_lambda,
    title='RE',
    floatfmt=['', '.4f', '.4f', '.2f']
)

RE
Dependent variable: Log Deflated Sales

                        Beta      Se    t-values
--------------------  ------  ------  ----------
c                     0.0000  0.0168        0.00
Log Employment        0.7197  0.0335       21.46
Log Adjusted Capital  0.1989  0.0261        7.62
R² = 0.642
σ² = 0.018
λ = 0.887


In [150]:
# run serial correlation test

def serial_corr(y, X, T, T_list):
    """Test for serial correlation in panel data
    
    Args:
        y: dependent variable (NT x 1)
        X: independent variables (NT x K)
        T: number of time periods
        T_list: array of time periods
    """
    b_hat = lm.est_ols(y, X)
    e = y - X@b_hat
    
    N = len(e) // T
    e_panel = e.reshape(N, T)
    
    e_lag = e_panel[:, :-1]
    e_current = e_panel[:, 1:]
    
    e_lag_flat = e_lag.reshape(-1, 1)
    e_current_flat = e_current.reshape(-1, 1)
    
    return lm.estimate(e_current_flat, e_lag_flat)

N = len(data.firmid.unique())
T = len(data.year.unique())

corr_result_fd = serial_corr(y_diff, x_diff, T-1, T_list)
corr_result_fe = serial_corr(y_demean, x_demean, T, T_list)

label_ye = 'OLS residual, e\u1d62\u209c'
label_e = ['e\u1d62\u209c\u208B\u2081']

lm.print_table(
    (label_ye, label_e), corr_result_fd, 
    title='Serial Correlation for First Differencing', 
    floatfmt='.4f'
)

lm.print_table(
    (label_ye, label_e), corr_result_fe, 
    title='Serial Correlation for Fixed Effects', 
    floatfmt='.4f'
)

Serial Correlation for First Differencing
Dependent variable: OLS residual, eᵢₜ

          Beta      Se    t-values
-----  -------  ------  ----------
eᵢₜ₋₁  -0.1987  0.0148    -13.4493
R² = 0.039
σ² = 0.014
Serial Correlation for Fixed Effects
Dependent variable: OLS residual, eᵢₜ

         Beta      Se    t-values
-----  ------  ------  ----------
eᵢₜ₋₁  0.5316  0.0123     43.2811
R² = 0.279
σ² = 0.011


In [151]:
# test for exogeneity in the panel data

def exogeneity_test(X, y, T, T_list):
    
    N = len(y) // T

    X_panel = X.reshape(N, T, -1)
    y_panel = y.reshape(N, T, -1) 
    
    F_T = np.eye(T, k=1)
    F_T = F_T[:-1]
    
    capital_lead = X_panel[:, 1:, 2].reshape(-1, 1) 
    employment_lead = X_panel[:, 1:, 1].reshape(-1, 1) 

    X_exo = X_panel[:, :-1, :].reshape(-1, X.shape[1])
    y_exo = y_panel[:, :-1, :].reshape(-1, 1)
    
    X_exo = np.hstack((X_exo, capital_lead, employment_lead))
    
    Q_T = demeaning_matrix(T - 1)
    
    yw_exo = lm.perm(Q_T, y_exo)
    xw_exo = lm.perm(Q_T, X_exo)
    xw_exo = xw_exo[:, 1:] 
    
    label_exo = label_x_fe + ['Employment Lead', 'Capital Lead']
    
    exo_test = lm.estimate(
        yw_exo, xw_exo, 
        T=T-1, 
        transform='fe'
    )
    
    lm.print_table(
        (label_y, label_exo), 
        exo_test, 
        title='Exogeneity test', 
        floatfmt='.4f'
    )
    
    return exo_test

exogeneity_test(X, y, T, T_list)

Exogeneity test
Dependent variable: Log Deflated Sales

                        Beta      Se    t-values
--------------------  ------  ------  ----------
Log Employment        0.5408  0.0234     23.0904
Log Adjusted Capital  0.0280  0.0230      1.2153
Employment Lead       0.1667  0.0258      6.4706
Capital Lead          0.1419  0.0225      6.3134
R² = 0.478
σ² = 0.016


{'b_hat': array([[0.54082746],
        [0.02799744],
        [0.16672914],
        [0.14194843]]),
 'se': array([[0.02342216],
        [0.02303819],
        [0.02576738],
        [0.02248373]]),
 'sigma2': np.float64(0.01605746632468828),
 't_values': array([[23.09041339],
        [ 1.21526191],
        [ 6.47055075],
        [ 6.31338507]]),
 'R2': np.float64(0.4782370109674602),
 'cov': array([[ 5.48597791e-04, -2.65465894e-05, -1.08715395e-04,
         -3.81354590e-04],
        [-2.65465894e-05,  5.30758241e-04, -4.83673692e-04,
          2.48611743e-05],
        [-1.08715395e-04, -4.83673692e-04,  6.63957847e-04,
         -4.48777200e-05],
        [-3.81354590e-04,  2.48611743e-05, -4.48777200e-05,
          5.05518013e-04]])}

In [152]:
# conduct the Hausman test
b_re = re_result['b_hat']
b_re = b_re[1:]

cov_re = re_result['cov']
cov_re = cov_re[1:,1:]


hat_diff = fe_result['b_hat'] - b_re 

cov_diff = fe_result['cov'] - cov_re


H = hat_diff.T@la.inv(cov_diff)@hat_diff 

# calculate the p-value for the test

p_val = chi2.sf(H.item(), hat_diff.size)


def print_h_test(fe_result, re_result, hat_diff, p_val):
    table = []
    for i in range(len(hat_diff)):
        row = [
            fe_result['b_hat'][i], re_result['b_hat'][1:][i], hat_diff[i]
        ]
        table.append(row)

    print(tabulate(
        table, headers=['b_fe', 'b_re', 'b_diff'], floatfmt='.4f'
        ))
    print(f'The Hausman test statistic is: {H.item():.2f}, with p-value: {p_val:_.2f}.')
print_h_test(fe_result, re_result, hat_diff, p_val)

  b_fe    b_re    b_diff
------  ------  --------
0.6942  0.7197   -0.0255
0.1546  0.1989   -0.0442
The Hausman test statistic is: 10.19, with p-value: 0.01.
