In [50]:
import pandas as pd             # data package
import matplotlib.pyplot as plt # graphics 
import datetime as dt
import numpy as np

import requests, io             # internet and input tools  
import zipfile as zf            # zip file tools 
import os  

from numpy.polynomial.polynomial import polyfit

import pyarrow as pa
import pyarrow.parquet as pq

import statsmodels.api as sm
import statsmodels
#import statsmodels.formula.api as smf
from linearmodels.iv import IV2SLS
from linearmodels.panel import PanelOLS

import warnings
warnings.filterwarnings("ignore")

### First Steps

Here we are going to combine the trade and autos data set...

In [51]:
fig_path = "C:\\github\\expenditure_tradeshocks\\figures"

In [52]:
file_path = os.getcwd()

trade_county = pq.read_table(file_path + "\\data\\trade_employment_blssingle19.parquet").to_pandas()

In [53]:
trade_county["time"] = pd.to_datetime(trade_county.time)

trade_county.set_index(["area_fips", "time"],inplace = True)

In [74]:
#trade_county.head(40)

In [55]:
trade_county["log_tariff"] = np.log(1+.01*trade_county["tariff"])

trade_county["log_exp_total"] = np.log(trade_county["total_exp_pc"]).replace(-np.inf, np.nan)

trade_county["log_exp_china"] = np.log(trade_county["china_exp_pc"]).replace(-np.inf, np.nan)

trade_county["log_employment"] = np.log(trade_county["emp_gds"]).replace(-np.inf, np.nan)

trade_county["const"] = 1

In [56]:
trade_county.reset_index(inplace = True)

trade_county.rename({"area_fips": "GEOFIPS"},axis = 1, inplace = True)

trade_county["state_fips"] = trade_county["GEOFIPS"].astype(str).str[0:2]

trade_county["GEOFIPS"] = trade_county["GEOFIPS"].astype(int)

In [57]:
#trade_county["log_value"] = np.log(trade_county["value"]).replace(-np.inf,np.nan)

trade_county.set_index(["GEOFIPS", "time"], inplace = True)

In [58]:
trade_county.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,total_exp_pc,china_exp_pc,tariff,emplvl_2017,fips,total_employment,emp_rtl,emp_all,emp_gds,emp_ngds,rural_share,2010_population,2017_income,2017_population,log_tariff,log_exp_total,log_exp_china,log_employment,const,state_fips
GEOFIPS,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10001,2016-01-01,453.257185,47.280196,1.069532,2843.0,10001,29514.0,9269.0,0.0,0.0,38494.0,0.269694,162310.0,57647.0,173145.0,0.010639,6.11646,3.856092,,1,10
10001,2016-02-01,471.930726,47.211522,1.069499,2843.0,10001,29514.0,9236.0,0.0,0.0,38646.0,0.269694,162310.0,57647.0,173145.0,0.010638,6.156832,3.854638,,1,10
10001,2016-03-01,485.37676,35.078484,1.0695,2843.0,10001,29514.0,9342.0,0.0,0.0,38917.0,0.269694,162310.0,57647.0,173145.0,0.010638,6.184925,3.557588,,1,10
10001,2016-04-01,460.259354,27.991526,1.0695,2843.0,10001,29514.0,9376.0,0.0,0.0,39719.0,0.269694,162310.0,57647.0,173145.0,0.010638,6.13179,3.331902,,1,10
10001,2016-05-01,473.572638,28.235163,1.069499,2843.0,10001,29514.0,9265.0,0.0,0.0,40164.0,0.269694,162310.0,57647.0,173145.0,0.010638,6.160305,3.340568,,1,10


In [59]:
# First take growth rates

# note , I'm a bit confused about why the sorting,
# here is that because of some missing values, the resulting dateframe from the 
# first operation may be out of place, so we need to resort things to make sure that
# the time difference is correct.

trade_county["tariff_change"] = trade_county.groupby(["GEOFIPS"]).tariff.diff(12)

trade_county.sort_values(["GEOFIPS", "time"], inplace = True)

trade_county["log_tariff_change"] = trade_county.groupby(["GEOFIPS"]).log_tariff.diff(12)

trade_county.sort_values(["GEOFIPS", "time"], inplace = True)

trade_county["total_trade_growth"] = trade_county.groupby(["GEOFIPS"]).log_exp_total.diff(12)

trade_county.sort_values(["GEOFIPS", "time"], inplace = True)

trade_county["china_trade_growth"] = trade_county.groupby(["GEOFIPS"]).log_exp_china.diff(12)

trade_county.sort_values(["GEOFIPS", "time"], inplace = True)

### Regression Analysis


In [60]:
trade_county.columns

Index(['total_exp_pc', 'china_exp_pc', 'tariff', 'emplvl_2017', 'fips',
       'total_employment', 'emp_rtl', 'emp_all', 'emp_gds', 'emp_ngds',
       'rural_share', '2010_population', '2017_income', '2017_population',
       'log_tariff', 'log_exp_total', 'log_exp_china', 'log_employment',
       'const', 'state_fips', 'tariff_change', 'log_tariff_change',
       'total_trade_growth', 'china_trade_growth'],
      dtype='object')

#### Total Trade, Weighted

In [61]:
start = "2018-01-01"

weight_var = '2010_population'

In [62]:
all_vars = ["const", "log_tariff_change", 'total_trade_growth','2017_population','2010_population']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,start:"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset[weight_var].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)
# Some countys because of there size and how the QCEW deals with stuff, there are is zero.
# PanelOLS does not like 0 weight, so this the the work around. It does not matter. 


mod = PanelOLS(dataset.total_trade_growth, dataset[exog_vars], weights = weights)

fe_res = mod.fit(cov_type='clustered', cluster_entity=True)

print(fe_res)

18
3252
                          PanelOLS Estimation Summary                           
Dep. Variable:     total_trade_growth   R-squared:                        0.1076
Estimator:                   PanelOLS   R-squared (Between):             -0.0191
No. Observations:               51552   R-squared (Within):               0.1964
Date:                Thu, Dec 05 2019   R-squared (Overall):              0.1076
Time:                        12:27:03   Log-likelihood                 4.972e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                   1.337e+04
Entities:                        2864   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,51550)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             466.79
                    

In [63]:
all_vars = ["const", "log_tariff_change", 'total_trade_growth','2017_population','2010_population']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,start:"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset[weight_var].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)
# Some countys because of there size and how the QCEW deals with stuff, there are is zero.
# PanelOLS does not like 0 weight, so this the the work around. It does not matter. 


mod = PanelOLS(dataset.total_trade_growth, dataset[exog_vars], weights = weights,
               time_effects = True)

fe_res = mod.fit(cov_type='clustered', cluster_entity=True)

print(fe_res)

18
3252
                          PanelOLS Estimation Summary                           
Dep. Variable:     total_trade_growth   R-squared:                        0.0185
Estimator:                   PanelOLS   R-squared (Between):              0.0213
No. Observations:               51552   R-squared (Within):               0.1027
Date:                Thu, Dec 05 2019   R-squared (Overall):              0.0692
Time:                        12:27:03   Log-likelihood                 5.699e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                   1.046e+04
Entities:                        2864   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,51533)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             135.20
                    

In [64]:
all_vars = ["const", "log_tariff_change", 'total_trade_growth','2017_population','2010_population']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,start:"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset[weight_var].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)
# Some countys because of there size and how the QCEW deals with stuff, there are is zero.
# PanelOLS does not like 0 weight, so this the the work around. It does not matter. 


mod = PanelOLS(dataset.total_trade_growth, dataset[exog_vars], weights = weights, entity_effects=True,
               time_effects = True)

fe_res = mod.fit(cov_type='clustered', cluster_entity=True)

print(fe_res)

18
3252
                          PanelOLS Estimation Summary                           
Dep. Variable:     total_trade_growth   R-squared:                        0.0144
Estimator:                   PanelOLS   R-squared (Between):              0.0210
No. Observations:               51552   R-squared (Within):               0.0958
Date:                Thu, Dec 05 2019   R-squared (Overall):              0.0650
Time:                        12:27:04   Log-likelihood                 8.055e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                   1.037e+04
Entities:                        2864   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,48670)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             146.47
                    

#### Total Trade, unweighted

In [65]:
all_vars = ["const", "log_tariff_change", 'total_trade_growth','2017_population','2010_population']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,start:"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

#weights = dataset['2017_population'].to_frame()

#weights.replace(to_replace = 0, value = 0.001,inplace = True)
# Some countys because of there size and how the QCEW deals with stuff, there are is zero.
# PanelOLS does not like 0 weight, so this the the work around. It does not matter. 


mod = PanelOLS(dataset.total_trade_growth, dataset[exog_vars])

fe_res = mod.fit(cov_type='clustered', cluster_entity=True)

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     total_trade_growth   R-squared:                        0.0473
Estimator:                   PanelOLS   R-squared (Between):             -0.0275
No. Observations:               53226   R-squared (Within):               0.1007
Date:                Thu, Dec 05 2019   R-squared (Overall):              0.0473
Time:                        12:27:04   Log-likelihood                 1.784e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      2644.2
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,53224)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             184.81
                            

In [66]:
all_vars = ["const", "log_tariff_change", 'total_trade_growth','2017_population','2010_population']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,start:"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

#weights = dataset['2017_population'].to_frame()

#weights.replace(to_replace = 0, value = 0.001,inplace = True)
# Some countys because of there size and how the QCEW deals with stuff, there are is zero.
# PanelOLS does not like 0 weight, so this the the work around. It does not matter. 

mod = PanelOLS(dataset.total_trade_growth, dataset[exog_vars], time_effects = True)

fe_res = mod.fit(cov_type='clustered', cluster_entity=True)

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     total_trade_growth   R-squared:                        0.0029
Estimator:                   PanelOLS   R-squared (Between):              0.0042
No. Observations:               53226   R-squared (Within):               0.0314
Date:                Thu, Dec 05 2019   R-squared (Overall):              0.0200
Time:                        12:27:04   Log-likelihood                 2.337e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      153.35
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,53207)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             26.584
                            

In [67]:
all_vars = ["const", "log_tariff_change", 'total_trade_growth','2017_population','2010_population']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,start:"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

#weights = dataset['2017_population'].to_frame()

#weights.replace(to_replace = 0, value = 0.001,inplace = True)
# Some countys because of there size and how the QCEW deals with stuff, there are is zero.
# PanelOLS does not like 0 weight, so this the the work around. It does not matter. 

mod = PanelOLS(dataset.total_trade_growth, dataset[exog_vars], time_effects = True,
                              entity_effects=True)

fe_res = mod.fit(cov_type='clustered', cluster_entity=True)

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     total_trade_growth   R-squared:                        0.0015
Estimator:                   PanelOLS   R-squared (Between):              0.0039
No. Observations:               53226   R-squared (Within):               0.0257
Date:                Thu, Dec 05 2019   R-squared (Overall):              0.0166
Time:                        12:27:04   Log-likelihood                  4.38e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      73.492
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,50251)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             11.474
                            

#### Chine Trade, Weighted

In [68]:
all_vars = ["const", "log_tariff_change", 'china_trade_growth','2017_population','2010_population']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,start:"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset[weight_var].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)

mod = PanelOLS(dataset.china_trade_growth, dataset[exog_vars], weights = weights)

fe_res = mod.fit(cov_type='clustered', cluster_entity = True)

print(fe_res)

18
3252
                          PanelOLS Estimation Summary                           
Dep. Variable:     china_trade_growth   R-squared:                        0.1692
Estimator:                   PanelOLS   R-squared (Between):              0.1691
No. Observations:               51552   R-squared (Within):               0.1693
Date:                Thu, Dec 05 2019   R-squared (Overall):              0.1692
Time:                        12:27:05   Log-likelihood                -1.942e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      7520.1
Entities:                        2864   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,51550)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             721.90
                    

In [69]:
all_vars = ["const", "log_tariff_change", 'china_trade_growth','2017_population','2010_population']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,start:"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset[weight_var].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)

mod = PanelOLS(dataset.china_trade_growth, dataset[exog_vars], weights = weights, time_effects = True)

fe_res = mod.fit(cov_type='clustered', cluster_entity = True)

print(fe_res)

18
3252
                          PanelOLS Estimation Summary                           
Dep. Variable:     china_trade_growth   R-squared:                        0.0648
Estimator:                   PanelOLS   R-squared (Between):              0.1893
No. Observations:               51552   R-squared (Within):               0.1343
Date:                Thu, Dec 05 2019   R-squared (Overall):              0.1464
Time:                        12:27:05   Log-likelihood                -1.544e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      91.414
Entities:                        2864   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,51533)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             275.49
                    

In [70]:
all_vars = ["const", "log_tariff_change", 'china_trade_growth','2017_population','2010_population']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,start:"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset[weight_var].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)

mod = PanelOLS(dataset.china_trade_growth, dataset[exog_vars], weights = weights, time_effects = True,
               entity_effects=True)

fe_res = mod.fit(cov_type='clustered', cluster_entity = True)

print(fe_res)

18
3252
                          PanelOLS Estimation Summary                           
Dep. Variable:     china_trade_growth   R-squared:                        0.0169
Estimator:                   PanelOLS   R-squared (Between):              0.1636
No. Observations:               51552   R-squared (Within):               0.1050
Date:                Thu, Dec 05 2019   R-squared (Overall):              0.1179
Time:                        12:27:06   Log-likelihood                   -7928.4
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      6111.4
Entities:                        2864   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,48670)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             119.48
                    

#### China Trade Unweighted

In [71]:
all_vars = ["const", "log_tariff_change", 'china_trade_growth','2017_population']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,start:"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset['2017_population'].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)

mod = PanelOLS(dataset.china_trade_growth, dataset[exog_vars])

fe_res = mod.fit(cov_type='clustered', cluster_entity = True)

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     china_trade_growth   R-squared:                        0.0974
Estimator:                   PanelOLS   R-squared (Between):              0.1165
No. Observations:               53226   R-squared (Within):               0.0933
Date:                Thu, Dec 05 2019   R-squared (Overall):              0.0974
Time:                        12:27:06   Log-likelihood                -5.196e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      5745.8
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,53224)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             354.15
                            

In [72]:
all_vars = ["const", "log_tariff_change", 'china_trade_growth','2017_population']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,start:"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset['2017_population'].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)

mod = PanelOLS(dataset.china_trade_growth, dataset[exog_vars], time_effects = True)

fe_res = mod.fit(cov_type='clustered', cluster_entity = True)

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     china_trade_growth   R-squared:                        0.0418
Estimator:                   PanelOLS   R-squared (Between):              0.1398
No. Observations:               53226   R-squared (Within):               0.0717
Date:                Thu, Dec 05 2019   R-squared (Overall):              0.0838
Time:                        12:27:06   Log-likelihood                -4.559e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      2323.8
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,53207)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             279.09
                            

In [73]:
all_vars = ["const", "log_tariff_change", 'china_trade_growth','2017_population']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,start:"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset['2017_population'].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)

mod = PanelOLS(dataset.china_trade_growth, dataset[exog_vars], time_effects = True,
               entity_effects=True)

fe_res = mod.fit(cov_type='clustered', cluster_entity = True)

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     china_trade_growth   R-squared:                        0.0121
Estimator:                   PanelOLS   R-squared (Between):              0.1282
No. Observations:               53226   R-squared (Within):               0.0597
Date:                Thu, Dec 05 2019   R-squared (Overall):              0.0719
Time:                        12:27:06   Log-likelihood                -3.915e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      614.09
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,50251)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             148.83
                            