# Panel Data with Unobserved Entity Effect

$y_{it} = \beta_1x_{it} + \alpha_i + u_{it}$

In [1]:
import pandas as pd
from linearmodels import PanelOLS
data = pd.read_excel('https://github.com/poposoto/Programming-Assignments/blob/master/Introduction%20to%20Econometrics/fatality.xlsx?raw=true')
data2 = data.set_index(['state', 'year'])
mod = PanelOLS.from_formula('I(mrall * 10000) ~ 1 + beertax + EntityEffects',
                             data=data2)
res = mod.fit(cov_type='robust')
print(res.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:       I(mrall * 10000)   R-squared:                        0.0407
Estimator:                   PanelOLS   R-squared (Between):             -0.7126
No. Observations:                 336   R-squared (Within):               0.0407
Date:                Sat, Dec 07 2019   R-squared (Overall):             -0.6380
Time:                        15:44:37   Log-likelihood                    107.97
Cov. Estimator:                Robust                                           
                                        F-statistic:                      12.190
Entities:                          48   P-value                           0.0006
Avg Obs:                       7.0000   Distribution:                   F(1,287)
Min Obs:                       7.0000                                           
Max Obs:                       7.0000   F-statistic (robust):             10.410
                            

$y_{it} = \beta_0 + \beta_1x_{it} + \gamma_2D2_i + \gamma_3D3_i + \cdots + \gamma_nDn_i + u_{it}$

In [2]:
import statsmodels.formula.api as smf
data.index = range(data.shape[0])
mod2 = smf.ols('I(mrall * 10000) ~ 1 + beertax + C(state)', data=data)
res2 = mod2.fit().get_robustcov_results()
print(res2.summary())

                            OLS Regression Results                            
Dep. Variable:       I(mrall * 10000)   R-squared:                       0.905
Model:                            OLS   Adj. R-squared:                  0.889
Method:                 Least Squares   F-statistic:                     126.2
Date:                Sat, 07 Dec 2019   Prob (F-statistic):          3.70e-166
Time:                        15:44:37   Log-Likelihood:                 107.97
No. Observations:                 336   AIC:                            -117.9
Df Residuals:                     287   BIC:                             69.09
Df Model:                          48                                         
Covariance Type:                  HC1                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          3.4776      0.351      9.

$\tilde{y}_{it} = \beta_1\tilde{x}_{it} + \tilde{u}_{it}$

where $\tilde{z}_{it} = z_{it} - \bar{z}_i$

In [3]:
import numpy as np
def demean(s):
    mean = []
    for state in data.state.unique():
        temp = data[data.state == state][s]
        mean.append(temp.mean() * np.ones_like(temp))
    data[s + 'i'] = np.hstack(mean)
    data[s + 'dm'] = data[s] - data[s + 'i']
demean('mrall')
demean('beertax')
mod3 = smf.ols('I(mralldm * 10000) ~ beertaxdm - 1', data=data)
res3 = mod3.fit().get_robustcov_results()
print(res3.summary())

                                 OLS Regression Results                                
Dep. Variable:     I(mralldm * 10000)   R-squared (uncentered):                   0.041
Model:                            OLS   Adj. R-squared (uncentered):              0.038
Method:                 Least Squares   F-statistic:                              12.15
Date:                Sat, 07 Dec 2019   Prob (F-statistic):                    0.000556
Time:                        15:44:38   Log-Likelihood:                          107.97
No. Observations:                 336   AIC:                                     -213.9
Df Residuals:                     335   BIC:                                     -210.1
Df Model:                           1                                                  
Covariance Type:                  HC1                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

$\Delta{y}_{it} = \beta_1\Delta{x}_{it} + \Delta{u}_{it}$

where $\Delta{z}_{it} = z_{it} - z_{i,t-1}$

In [4]:
data['mralldiff'] = data.mrall.diff()
data['beertaxdiff'] = data.beertax.diff()
for K in range(data.shape[0]):
    if data.loc[K, 'year'] == 1982:
        data.loc[K, 'mralldiff'] = np.nan
        data.loc[K, 'beertaxdiff'] = np.nan
mod4 = smf.ols('I(mralldiff * 10000) ~ beertaxdiff - 1', data=data)
res4 = mod4.fit().get_robustcov_results()
print(res4.summary())

                                  OLS Regression Results                                 
Dep. Variable:     I(mralldiff * 10000)   R-squared (uncentered):                   0.000
Model:                              OLS   Adj. R-squared (uncentered):             -0.003
Method:                   Least Squares   F-statistic:                            0.01067
Date:                  Sat, 07 Dec 2019   Prob (F-statistic):                       0.918
Time:                          15:44:38   Log-Likelihood:                          58.721
No. Observations:                   288   AIC:                                     -115.4
Df Residuals:                       287   BIC:                                     -111.8
Df Model:                             1                                                  
Covariance Type:                    HC1                                                  
                  coef    std err          t      P>|t|      [0.025      0.975]
--------------------

# Panel Data with Unobserved Entity-Specific Time Trend

$y_{it} = \beta_1x_{it} + \alpha_i + \lambda_it + u_{it}$

$y_{it} = \beta_1x_{it} + \alpha_1D1_i + \cdots + \alpha_nDn_i + \lambda_1(D1_i\times t) + \cdots + \lambda_n(Dn_i\times t) + u_{it}$

In [5]:
s = ''
for state in data.state.unique():
    data['D{0}'.format(state)] = [
            int(data.state[I] == state) for I in range(data.shape[0])]
    s = s + 'I(D{0}'.format(state) + ' * year) + '
mod5 = smf.ols('I(mrall * 10000) ~ C(state) + ' + s + 'beertax - 1', data=data)
res5 = mod5.fit().get_robustcov_results()
print(res5.summary())

                            OLS Regression Results                            
Dep. Variable:       I(mrall * 10000)   R-squared:                       0.956
Model:                            OLS   Adj. R-squared:                  0.939
Method:                 Least Squares   F-statistic:                       nan
Date:                Sat, 07 Dec 2019   Prob (F-statistic):                nan
Time:                        15:44:38   Log-Likelihood:                 237.79
No. Observations:                 336   AIC:                            -281.6
Df Residuals:                     239   BIC:                             88.69
Df Model:                          96                                         
Covariance Type:                  HC1                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
C(state)[1]    -166.0843     46.592     -3.565

  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


$\ddot{y}_{it} = \beta_1\ddot{x}_{it} + \ddot{u}_{it}$

where $\ddot{z}_{it} = z_{it} - \frac{4T+2}{T-1}\bar{z}_i + \frac{6t}{T-1}\bar{z}_i + \frac6{T-1}z_i^* - \frac{12t}{(T+1)(T-1)}z_i^*$

$z_i^* = \frac1T\sum_{t=1}^Ttz_{it}$

In [6]:
def detrend(s):
    trend = []
    T = data.year.unique().size
    for state in data.state.unique():
        temp = data[data.state == state][s]
        time = data[data.state == state].year - 1981
        trend.append(temp.dot(time) * np.ones_like(temp) / T)
    data[s + 't'] = np.hstack(trend)
    time = np.hstack([data.year.unique() - 1981
                      for state in data.state.unique()])
    data[s + 'dt'] = (data[s] - (4*T+2) / (T-1) * data[s + 'i'] +
        6 / (T-1) * data[s + 'i'] * time +
        6 / (T-1) * data[s + 't'] -
        12 / (T+1) / (T-1) * data[s + 't'] * time)
detrend('mrall')
detrend('beertax')
mod6 = smf.ols('I(mralldt * 10000) ~ beertaxdt - 1', data=data)
res6 = mod6.fit().get_robustcov_results()
print(res6.summary())

                                 OLS Regression Results                                
Dep. Variable:     I(mralldt * 10000)   R-squared (uncentered):                   0.005
Model:                            OLS   Adj. R-squared (uncentered):              0.002
Method:                 Least Squares   F-statistic:                              2.315
Date:                Sat, 07 Dec 2019   Prob (F-statistic):                       0.129
Time:                        15:44:38   Log-Likelihood:                          237.79
No. Observations:                 336   AIC:                                     -473.6
Df Residuals:                     335   BIC:                                     -469.8
Df Model:                           1                                                  
Covariance Type:                  HC1                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

$\Delta\Delta{y}_{it} = \beta_1\Delta\Delta{x}_{it} + \Delta\Delta{u}_{it}$

where $\Delta\Delta{z}_{it} = \Delta z_{it} - \Delta z_{i,t-1} = z_{it} - 2z_{i,t-1} + z_{i,t-2}$

In [7]:
data['mralldiff2'] = data.mrall.diff().diff()
data['beertaxdiff2'] = data.beertax.diff().diff()
for K in range(data.shape[0]):
    if data.loc[K, 'year'] == 1982 or data.loc[K, 'year'] == 1983:
        data.loc[K, 'mralldiff2'] = np.nan
        data.loc[K, 'beertaxdiff2'] = np.nan
mod7 = smf.ols('I(mralldiff2 * 10000) ~ beertaxdiff2 - 1', data=data)
res7 = mod7.fit().get_robustcov_results()
print(res7.summary())

                                  OLS Regression Results                                  
Dep. Variable:     I(mralldiff2 * 10000)   R-squared (uncentered):                   0.013
Model:                               OLS   Adj. R-squared (uncentered):              0.008
Method:                    Least Squares   F-statistic:                              4.826
Date:                   Sat, 07 Dec 2019   Prob (F-statistic):                      0.0290
Time:                           15:44:38   Log-Likelihood:                         -56.714
No. Observations:                    240   AIC:                                      115.4
Df Residuals:                        239   BIC:                                      118.9
Df Model:                              1                                                  
Covariance Type:                     HC1                                                  
                   coef    std err          t      P>|t|      [0.025      0.975]
---------