In [50]:
from linearmodels import PanelOLS
from statsmodels.iolib.summary2 import summary_col
import pandas as pd
import numpy as np

In [52]:
df = pd.read_csv('processed3.csv')
df['MSA'].astype('str')
df['state'] = df['MSA'].apply(lambda x: x.split(',')[1].strip())
df['Year'] = df['Year'].astype(int)
df['const'] = 1

In [53]:
tdf = df.set_index(['MSA', 'Year'])

In [54]:
def pprint(results):
    for result in results:
        print(round(result.params.values[0], 4), round(result.std_errors.values[0], 4))

In [55]:
mod = PanelOLS(tdf['Crude Rate'], tdf[['min_dist']], entity_effects=False, time_effects=True)
time = mod.fit(cov_type='robust')
pprint([time])

-0.002 0.0003


In [56]:
print(time)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.0086
Estimator:                   PanelOLS   R-squared (Between):             -0.0565
No. Observations:                3926   R-squared (Within):               0.0000
Date:                Tue, Dec 08 2020   R-squared (Overall):             -0.0525
Time:                        16:19:16   Log-likelihood                   -8655.4
Cov. Estimator:                Robust                                           
                                        F-statistic:                      33.877
Entities:                         105   P-value                           0.0000
Avg Obs:                       37.390   Distribution:                  F(1,3906)
Min Obs:                       1.0000                                           
Max Obs:                       114.00   F-statistic (robust):             51.629
                            

In [57]:
sdf = df.set_index(['state', 'Year'])
mod = PanelOLS(sdf['Crude Rate'], sdf[['min_dist']], entity_effects=True, time_effects=False)
entity = mod.fit(cov_type='robust')
mod = PanelOLS(sdf['Crude Rate'], sdf[['min_dist']], entity_effects=False, time_effects=True)
time = mod.fit(cov_type='robust')
mod = PanelOLS(sdf['Crude Rate'], sdf[['min_dist']], entity_effects=True, time_effects=True)
both = mod.fit(cov_type='robust')
pprint([entity, time, both])

0.0043 0.0007
-0.002 0.0003
0.0043 0.0007


In [58]:
print(time)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.0086
Estimator:                   PanelOLS   R-squared (Between):             -0.0601
No. Observations:                3926   R-squared (Within):              -0.0097
Date:                Tue, Dec 08 2020   R-squared (Overall):             -0.0525
Time:                        16:19:25   Log-likelihood                   -8655.4
Cov. Estimator:                Robust                                           
                                        F-statistic:                      33.877
Entities:                          38   P-value                           0.0000
Avg Obs:                       103.32   Distribution:                  F(1,3906)
Min Obs:                       6.0000                                           
Max Obs:                       476.00   F-statistic (robust):             51.629
                            

In [59]:
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.0086
Estimator:                   PanelOLS   R-squared (Between):              0.1192
No. Observations:                3926   R-squared (Within):               0.0086
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.1049
Time:                        16:19:25   Log-likelihood                   -8235.4
Cov. Estimator:                Robust                                           
                                        F-statistic:                      33.904
Entities:                          38   P-value                           0.0000
Avg Obs:                       103.32   Distribution:                  F(1,3887)
Min Obs:                       6.0000                                           
Max Obs:                       476.00   F-statistic (robust):             38.502
                            

In [60]:
print(both)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.0088
Estimator:                   PanelOLS   R-squared (Between):              0.1197
No. Observations:                3926   R-squared (Within):               0.0086
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.1054
Time:                        16:19:26   Log-likelihood                   -8218.8
Cov. Estimator:                Robust                                           
                                        F-statistic:                      34.387
Entities:                          38   P-value                           0.0000
Avg Obs:                       103.32   Distribution:                  F(1,3869)
Min Obs:                       6.0000                                           
Max Obs:                       476.00   F-statistic (robust):             38.941
                            

In [62]:
# print out dummies
year_str = df.reset_index()[['Year']].astype('str')
msa_str = df.reset_index()[['MSA']].astype('str')
year_str.index = tdf.index
msa_str.index = tdf.index
tdf['year_str'] = year_str
tdf['msa_str'] = msa_str
tdf['year_str'] = tdf['year_str'].astype('str')
tdf['msa_str'] = tdf['msa_str'].astype('str')

In [63]:
mod = PanelOLS(tdf['Crude Rate'], tdf[['const', 'min_dist', 'year_str']], entity_effects=False, time_effects=False, drop_absorbed=True)
entity = mod.fit(cov_type='robust')
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.0168
Estimator:                   PanelOLS   R-squared (Between):             -0.0197
No. Observations:                3926   R-squared (Within):               0.0039
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.0168
Time:                        16:19:51   Log-likelihood                   -8655.4
Cov. Estimator:                Robust                                           
                                        F-statistic:                      3.5225
Entities:                         105   P-value                           0.0000
Avg Obs:                       37.390   Distribution:                 F(19,3906)
Min Obs:                       1.0000                                           
Max Obs:                       114.00   F-statistic (robust):             4.2712
                            

In [64]:
# print out dummies
year_str = df.reset_index()[['Year']].astype('str')
state_str = df.reset_index()[['state']].astype('str')
year_str.index = sdf.index
state_str.index = sdf.index
sdf['year_str'] = year_str
sdf['state_str'] = state_str

In [65]:
# time FE
mod = PanelOLS(sdf['Crude Rate'], sdf[['const','min_dist', 'year_str']], entity_effects=False, time_effects=False)
entity = mod.fit(cov_type='robust')
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.0168
Estimator:                   PanelOLS   R-squared (Between):              0.0225
No. Observations:                3926   R-squared (Within):              -0.0019
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.0168
Time:                        16:19:59   Log-likelihood                   -8655.4
Cov. Estimator:                Robust                                           
                                        F-statistic:                      3.5225
Entities:                          38   P-value                           0.0000
Avg Obs:                       103.32   Distribution:                 F(19,3906)
Min Obs:                       6.0000                                           
Max Obs:                       476.00   F-statistic (robust):             4.2712
                            

In [66]:
# State FE
mod = PanelOLS(sdf['Crude Rate'], sdf[['const','min_dist', 'state_str']], entity_effects=False, time_effects=False)
entity = mod.fit(cov_type='robust')
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.2062
Estimator:                   PanelOLS   R-squared (Between):              1.0000
No. Observations:                3926   R-squared (Within):               0.0086
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.2062
Time:                        16:20:15   Log-likelihood                   -8235.4
Cov. Estimator:                Robust                                           
                                        F-statistic:                      26.576
Entities:                          38   P-value                           0.0000
Avg Obs:                       103.32   Distribution:                 F(38,3887)
Min Obs:                       6.0000                                           
Max Obs:                       476.00   F-statistic (robust):             34.119
                            

In [67]:
# State FE + time FE
mod = PanelOLS(sdf['Crude Rate'], sdf[['const', 'min_dist', 'state_str','year_str']], entity_effects=False, time_effects=False)
entity = mod.fit(cov_type='robust')
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.2129
Estimator:                   PanelOLS   R-squared (Between):              1.0000
No. Observations:                3926   R-squared (Within):               0.0170
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.2129
Time:                        16:20:21   Log-likelihood                   -8218.8
Cov. Estimator:                Robust                                           
                                        F-statistic:                      18.690
Entities:                          38   P-value                           0.0000
Avg Obs:                       103.32   Distribution:                 F(56,3869)
Min Obs:                       6.0000                                           
Max Obs:                       476.00   F-statistic (robust):             23.694
                            

In [69]:
# State FE + Control Sex
mod = PanelOLS(sdf['Crude Rate'], sdf[['const', 'min_dist', 'Sex', 'state_str']], entity_effects=False, time_effects=False)
entity = mod.fit(cov_type='robust')
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.3603
Estimator:                   PanelOLS   R-squared (Between):              1.0000
No. Observations:                3926   R-squared (Within):               0.2011
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.3603
Time:                        16:20:50   Log-likelihood                   -7811.8
Cov. Estimator:                Robust                                           
                                        F-statistic:                      56.123
Entities:                          38   P-value                           0.0000
Avg Obs:                       103.32   Distribution:                 F(39,3886)
Min Obs:                       6.0000                                           
Max Obs:                       476.00   F-statistic (robust):             65.256
                            

In [74]:
year_str = df.reset_index()[['Year']].astype('str')
race_str = df.reset_index()[['Race']].astype('str')
rdf = df.copy()
rdf = rdf.set_index(['Race', 'Year'])
year_str.index = rdf.index
race_str.index = rdf.index

rdf['year_str'] = year_str
rdf['race_str'] = race_str

In [76]:
np.unique(rdf['race_str'])

array(['Asian or Pacific Islander', 'Black or African American', 'White'],
      dtype=object)

In [77]:
# Race FE
mod = PanelOLS(rdf['Crude Rate'], rdf[['const', 'min_dist', 'race_str']], entity_effects=False, time_effects=False)
entity = mod.fit(cov_type='robust')
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.3799
Estimator:                   PanelOLS   R-squared (Between):              1.0000
No. Observations:                3926   R-squared (Within):               0.0406
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.3799
Time:                        16:22:09   Log-likelihood                   -7750.7
Cov. Estimator:                Robust                                           
                                        F-statistic:                      800.93
Entities:                           3   P-value                           0.0000
Avg Obs:                       1308.7   Distribution:                  F(3,3922)
Min Obs:                       122.00                                           
Max Obs:                       3434.0   F-statistic (robust):             1624.3
                            

In [78]:
# race FE and time FE
mod = PanelOLS(rdf['Crude Rate'], rdf[['const', 'min_dist', 'race_str', 'year_str']], entity_effects=False, time_effects=False)
entity = mod.fit(cov_type='robust')
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.3896
Estimator:                   PanelOLS   R-squared (Between):              1.0000
No. Observations:                3926   R-squared (Within):               0.0557
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.3896
Time:                        16:22:13   Log-likelihood                   -7719.6
Cov. Estimator:                Robust                                           
                                        F-statistic:                      118.68
Entities:                           3   P-value                           0.0000
Avg Obs:                       1308.7   Distribution:                 F(21,3904)
Min Obs:                       122.00                                           
Max Obs:                       3434.0   F-statistic (robust):             240.63
                            

In [79]:
# race FE + Sex control
mod = PanelOLS(rdf['Crude Rate'], rdf[['const', 'Sex', 'min_dist', 'race_str']], entity_effects=False, time_effects=False)
entity = mod.fit(cov_type='robust')
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.5368
Estimator:                   PanelOLS   R-squared (Between):              1.0000
No. Observations:                3926   R-squared (Within):               0.2833
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.5368
Time:                        16:22:15   Log-likelihood                   -7178.2
Cov. Estimator:                Robust                                           
                                        F-statistic:                      1135.9
Entities:                           3   P-value                           0.0000
Avg Obs:                       1308.7   Distribution:                  F(4,3921)
Min Obs:                       122.00                                           
Max Obs:                       3434.0   F-statistic (robust):             1604.7
                            

In [95]:
year_str = df.reset_index()[['Year']].astype('str')
plant_str = df.reset_index()[['plant_name']].astype('str')
pdf = df.copy()
pdf = pdf.set_index(['plant_name', 'Year'])
year_str.index = pdf.index
plant_str.index = pdf.index
pdf['year_str'] = year_str
pdf['plant_str'] = plant_str

In [96]:
# Plant FE
mod = PanelOLS(pdf['Crude Rate'], pdf[['const', 'min_dist', 'plant_str']], entity_effects=False, time_effects=False)
entity = mod.fit(cov_type='robust')
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.2833
Estimator:                   PanelOLS   R-squared (Between):              1.0000
No. Observations:                3926   R-squared (Within):               0.0015
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.2833
Time:                        16:27:01   Log-likelihood                   -8034.9
Cov. Estimator:                Robust                                           
                                        F-statistic:                      34.086
Entities:                          45   P-value                           0.0000
Avg Obs:                       87.244   Distribution:                 F(45,3880)
Min Obs:                       6.0000                                           
Max Obs:                       438.00   F-statistic (robust):             37.019
                            

In [98]:
# Plant FE + Sex
mod = PanelOLS(pdf['Crude Rate'], pdf[['const', 'min_dist', 'Sex','plant_str']], entity_effects=False, time_effects=False)
entity = mod.fit(cov_type='robust')
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.4337
Estimator:                   PanelOLS   R-squared (Between):              1.0000
No. Observations:                3926   R-squared (Within):               0.2110
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.4337
Time:                        16:27:32   Log-likelihood                   -7572.6
Cov. Estimator:                Robust                                           
                                        F-statistic:                      64.579
Entities:                          45   P-value                           0.0000
Avg Obs:                       87.244   Distribution:                 F(46,3879)
Min Obs:                       6.0000                                           
Max Obs:                       438.00   F-statistic (robust):             69.983
                            

In [102]:
# Plant FE + Year FE + Sex
mod = PanelOLS(pdf['Crude Rate'], pdf[['const', 'min_dist', 'Sex', 'plant_str', 'year_str']], entity_effects=False, time_effects=False)
entity = mod.fit(cov_type='robust')
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.4398
Estimator:                   PanelOLS   R-squared (Between):              1.0000
No. Observations:                3926   R-squared (Within):               0.2195
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.4398
Time:                        16:28:44   Log-likelihood                   -7551.3
Cov. Estimator:                Robust                                           
                                        F-statistic:                      47.364
Entities:                          45   P-value                           0.0000
Avg Obs:                       87.244   Distribution:                 F(64,3861)
Min Obs:                       6.0000                                           
Max Obs:                       438.00   F-statistic (robust):             51.573
                            

In [101]:
# Plant FE + Sex
mod = PanelOLS(pdf['Crude Rate'], pdf[['const', 'min_dist', 'plant_str']], entity_effects=False, time_effects=False)
entity = mod.fit(cov_type='robust')
print(entity)

                          PanelOLS Estimation Summary                           
Dep. Variable:             Crude Rate   R-squared:                        0.2833
Estimator:                   PanelOLS   R-squared (Between):              1.0000
No. Observations:                3926   R-squared (Within):               0.0015
Date:                Tue, Dec 08 2020   R-squared (Overall):              0.2833
Time:                        16:28:21   Log-likelihood                   -8034.9
Cov. Estimator:                Robust                                           
                                        F-statistic:                      34.086
Entities:                          45   P-value                           0.0000
Avg Obs:                       87.244   Distribution:                 F(45,3880)
Min Obs:                       6.0000                                           
Max Obs:                       438.00   F-statistic (robust):             37.019
                            