In [159]:
import pandas as pd             # data package
import matplotlib.pyplot as plt # graphics 
import datetime as dt
import numpy as np

import requests, io             # internet and input tools  
import zipfile as zf            # zip file tools 
import os  

from numpy.polynomial.polynomial import polyfit

#import weightedcalcs as wc
#import numpy as np

import pyarrow as pa
import pyarrow.parquet as pq

import statsmodels.api as sm
import statsmodels
#import statsmodels.formula.api as smf
from linearmodels.iv import IV2SLS
from linearmodels.panel import PanelOLS

import warnings
warnings.filterwarnings("ignore")

### First Steps

Here we are going to combine the trade and autos data set...

In [160]:
fig_path = "C:\\github\\expenditure_tradeshocks\\figures"

In [161]:
file_path = os.getcwd()
#file_path = "F:\\POLK AUTO DATA\\"

trade_county = pq.read_table(file_path + "\\data\\trade_employment_all.parquet").to_pandas()

In [162]:
trade_county["time"] = pd.to_datetime(trade_county.time)

trade_county.set_index(["area_fips", "time"],inplace = True)

In [163]:
trade_county.head(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_exp_pc,china_exp_pc,tariff,emplvl_2017,fips,total_employment,emply_month
area_fips,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10001,2017-01-01,512.529185,53.382853,1.172229,2843.0,10001,29514.0,0.0
10001,2017-02-01,492.604071,43.72371,1.172278,2843.0,10001,29514.0,0.0
10001,2017-03-01,560.679984,37.347986,1.172365,2843.0,10001,29514.0,0.0
10001,2017-04-01,499.471573,29.463467,1.172366,2843.0,10001,29514.0,0.0
10001,2017-05-01,508.584288,30.009914,1.172366,2843.0,10001,29514.0,0.0
10001,2017-06-01,499.350776,26.428918,1.172366,2843.0,10001,29514.0,0.0
10001,2017-07-01,482.861116,28.199912,1.172365,2843.0,10001,29514.0,0.0
10001,2017-08-01,500.243692,30.920545,1.172365,2843.0,10001,29514.0,0.0
10001,2017-09-01,485.835551,40.123629,1.172365,2843.0,10001,29514.0,0.0
10001,2017-10-01,559.828124,69.302723,1.172365,2843.0,10001,29514.0,0.0


In [164]:
trade_county["log_tariff"] = np.log(1+.01*trade_county["tariff"])

trade_county["log_exp_total"] = np.log(trade_county["total_exp_pc"]).replace(-np.inf, np.nan)

trade_county["log_exp_china"] = np.log(trade_county["china_exp_pc"]).replace(-np.inf, np.nan)

trade_county["log_employment"] = np.log(trade_county["emply_month"]).replace(-np.inf, np.nan)

trade_county["const"] = 1

In [165]:
trade_county.reset_index(inplace = True)

trade_county.rename({"area_fips": "GEOFIPS"},axis = 1, inplace = True)

trade_county["state_fips"] = trade_county["GEOFIPS"].astype(str).str[0:2]

trade_county["GEOFIPS"] = trade_county["GEOFIPS"].astype(int)

In [166]:
#trade_county["log_value"] = np.log(trade_county["value"]).replace(-np.inf,np.nan)

trade_county.set_index(["GEOFIPS", "time"], inplace = True)

In [167]:
trade_county.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,total_exp_pc,china_exp_pc,tariff,emplvl_2017,fips,total_employment,emply_month,log_tariff,log_exp_total,log_exp_china,log_employment,const,state_fips
GEOFIPS,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
10001,2017-01-01,512.529185,53.382853,1.172229,2843.0,10001,29514.0,0.0,0.011654,6.239358,3.97749,,1,10
10001,2017-02-01,492.604071,43.72371,1.172278,2843.0,10001,29514.0,0.0,0.011655,6.199706,3.777891,,1,10
10001,2017-03-01,560.679984,37.347986,1.172365,2843.0,10001,29514.0,0.0,0.011655,6.32915,3.620279,,1,10
10001,2017-04-01,499.471573,29.463467,1.172366,2843.0,10001,29514.0,0.0,0.011655,6.213551,3.383151,,1,10
10001,2017-05-01,508.584288,30.009914,1.172366,2843.0,10001,29514.0,0.0,0.011655,6.231631,3.401528,,1,10


### Summary Statistics

### Simple Cuts of the Auto Data by Tariff Exposure

Here is the strategy. First, compute the quantiles of tariff exposure for each county. Then compute year over year sales growth for each county. Note that since this is in growth rates, this takes out any county level "fixed effect" and removes any seasonal component. 

Then we will simply plot year over year sales growth, for the (mean or median) county in a particular part of the distribution of tariff exposure. For example, compare the sales growth for the counties which are the most exposed and then compare then to the counties which are the least exposed.

In [168]:
# First take growth rates

# note , I'm a bit confused about why the sorting,
# here is that because of some missing values, the resulting dateframe from the 
# first operation may be out of place, so we need to resort things to make sure that
# the time difference is correct.

trade_county["tariff_change"] = trade_county.groupby(["GEOFIPS"]).tariff.diff(12)

trade_county.sort_values(["GEOFIPS", "time"], inplace = True)

trade_county["log_tariff_change"] = trade_county.groupby(["GEOFIPS"]).log_tariff.diff(12)

trade_county.sort_values(["GEOFIPS", "time"], inplace = True)

trade_county["total_trade_growth"] = trade_county.groupby(["GEOFIPS"]).log_exp_total.diff(12)

trade_county.sort_values(["GEOFIPS", "time"], inplace = True)

trade_county["china_trade_growth"] = trade_county.groupby(["GEOFIPS"]).log_exp_china.diff(12)

trade_county.sort_values(["GEOFIPS", "time"], inplace = True)

### Regression Analysis


In [169]:
trade_county.columns

Index(['total_exp_pc', 'china_exp_pc', 'tariff', 'emplvl_2017', 'fips',
       'total_employment', 'emply_month', 'log_tariff', 'log_exp_total',
       'log_exp_china', 'log_employment', 'const', 'state_fips',
       'tariff_change', 'log_tariff_change', 'total_trade_growth',
       'china_trade_growth'],
      dtype='object')

#### Total Trade, Weighted

In [170]:
all_vars = ["const", "log_tariff_change", 'total_trade_growth','total_employment']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,"2018-01-01":"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset['total_employment'].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)
# Some countys because of there size and how the QCEW deals with stuff, there are is zero.
# PanelOLS does not like 0 weight, so this the the work around. It does not matter. 


mod = PanelOLS(dataset.total_trade_growth, dataset[exog_vars], weights = weights)

fe_res = mod.fit(cov_type='clustered')

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     total_trade_growth   R-squared:                        0.1112
Estimator:                   PanelOLS   R-squared (Between):             -0.0219
No. Observations:               53226   R-squared (Within):               0.2144
Date:                Tue, Oct 01 2019   R-squared (Overall):              0.1112
Time:                        07:09:40   Log-likelihood                 5.855e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                   2.154e+04
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,53224)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             1413.1
                            

In [171]:
all_vars = ["const", "log_tariff_change", 'total_trade_growth','total_employment']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,"2018-01-01":"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset['total_employment'].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)
# Some countys because of there size and how the QCEW deals with stuff, there are is zero.
# PanelOLS does not like 0 weight, so this the the work around. It does not matter. 


mod = PanelOLS(dataset.total_trade_growth, dataset[exog_vars], weights = weights,
               time_effects = True)

fe_res = mod.fit(cov_type='clustered')

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     total_trade_growth   R-squared:                        0.0178
Estimator:                   PanelOLS   R-squared (Between):              0.0202
No. Observations:               53226   R-squared (Within):               0.1054
Date:                Tue, Oct 01 2019   R-squared (Overall):              0.0682
Time:                        07:09:42   Log-likelihood                 6.776e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                   2.199e+04
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,53207)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             557.67
                            

In [172]:
all_vars = ["const", "log_tariff_change", 'total_trade_growth','total_employment']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,"2018-01-01":"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset['total_employment'].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)
# Some countys because of there size and how the QCEW deals with stuff, there are is zero.
# PanelOLS does not like 0 weight, so this the the work around. It does not matter. 


mod = PanelOLS(dataset.total_trade_growth, dataset[exog_vars], weights = weights, entity_effects=True,
               time_effects = True)

fe_res = mod.fit(cov_type='clustered')

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     total_trade_growth   R-squared:                        0.0132
Estimator:                   PanelOLS   R-squared (Between):              0.0193
No. Observations:               53226   R-squared (Within):               0.0902
Date:                Tue, Oct 01 2019   R-squared (Overall):              0.0592
Time:                        07:09:44   Log-likelihood                 9.811e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                   3.953e+04
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,50251)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             198.82
                            

#### Total Trade, unweighted

In [173]:
all_vars = ["const", "log_tariff_change", 'total_trade_growth']

dataset = trade_county[all_vars].dropna()

exog_vars = ["const","log_tariff_change"]

mod = PanelOLS(dataset.total_trade_growth, dataset[exog_vars])

fe_res = mod.fit(cov_type='clustered')

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     total_trade_growth   R-squared:                        0.0428
Estimator:                   PanelOLS   R-squared (Between):             -0.0273
No. Observations:               56183   R-squared (Within):               0.0920
Date:                Tue, Oct 01 2019   R-squared (Overall):              0.0428
Time:                        07:09:45   Log-likelihood                 1.748e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      2514.2
Entities:                        3252   P-value                           0.0000
Avg Obs:                       17.276   Distribution:                 F(1,56181)
Min Obs:                       0.0000                                           
Max Obs:                       19.000   F-statistic (robust):             1723.4
                            

In [174]:
all_vars = ["const", "log_tariff_change", 'total_trade_growth']

dataset = trade_county[all_vars].dropna()

exog_vars = ["const","log_tariff_change"]

mod = PanelOLS(dataset.total_trade_growth, dataset[exog_vars], time_effects = True)

fe_res = mod.fit(cov_type='clustered')

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     total_trade_growth   R-squared:                        0.0028
Estimator:                   PanelOLS   R-squared (Between):              0.0031
No. Observations:               56183   R-squared (Within):               0.0295
Date:                Tue, Oct 01 2019   R-squared (Overall):              0.0186
Time:                        07:09:46   Log-likelihood                 2.356e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      157.45
Entities:                        3252   P-value                           0.0000
Avg Obs:                       17.276   Distribution:                 F(1,56163)
Min Obs:                       0.0000                                           
Max Obs:                       19.000   F-statistic (robust):             190.11
                            

In [175]:
all_vars = ["const", "log_tariff_change", 'total_trade_growth']

dataset = trade_county[all_vars].dropna()

exog_vars = ["const","log_tariff_change"]

mod = PanelOLS(dataset.total_trade_growth, dataset[exog_vars],time_effects = True,entity_effects=True)

fe_res = mod.fit(cov_type='clustered')

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     total_trade_growth   R-squared:                        0.0025
Estimator:                   PanelOLS   R-squared (Between):              0.0031
No. Observations:               56183   R-squared (Within):               0.0308
Date:                Tue, Oct 01 2019   R-squared (Overall):              0.0194
Time:                        07:09:48   Log-likelihood                 4.492e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      132.35
Entities:                        3252   P-value                           0.0000
Avg Obs:                       17.276   Distribution:                 F(1,53207)
Min Obs:                       0.0000                                           
Max Obs:                       19.000   F-statistic (robust):             145.14
                            

#### Chine Trade, Weighted

In [176]:
all_vars = ["const", "log_tariff_change", 'china_trade_growth','total_employment']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,"2018-01-01":"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset['total_employment'].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)

mod = PanelOLS(dataset.china_trade_growth, dataset[exog_vars], weights = weights)

fe_res = mod.fit(cov_type='clustered')

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     china_trade_growth   R-squared:                        0.1580
Estimator:                   PanelOLS   R-squared (Between):              0.1179
No. Observations:               53226   R-squared (Within):               0.1683
Date:                Tue, Oct 01 2019   R-squared (Overall):              0.1580
Time:                        07:09:50   Log-likelihood                -1.491e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      8151.7
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,53224)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             1290.9
                            

In [177]:
all_vars = ["const", "log_tariff_change", 'china_trade_growth','total_employment']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,"2018-01-01":"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset['total_employment'].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)

mod = PanelOLS(dataset.china_trade_growth, dataset[exog_vars], weights = weights, time_effects = True)

fe_res = mod.fit(cov_type='clustered')

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     china_trade_growth   R-squared:                        0.0536
Estimator:                   PanelOLS   R-squared (Between):              0.1590
No. Observations:               53226   R-squared (Within):               0.1259
Date:                Tue, Oct 01 2019   R-squared (Overall):              0.1326
Time:                        07:09:51   Log-likelihood                -1.073e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      866.15
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,53207)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             384.57
                            

In [178]:
all_vars = ["const", "log_tariff_change", 'china_trade_growth','total_employment']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,"2018-01-01":"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

weights = dataset['total_employment'].to_frame()

weights.replace(to_replace = 0, value = 0.001,inplace = True)

mod = PanelOLS(dataset.china_trade_growth, dataset[exog_vars], weights = weights,
               time_effects = True, entity_effects=True)

fe_res = mod.fit(cov_type='clustered')

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     china_trade_growth   R-squared:                        0.0158
Estimator:                   PanelOLS   R-squared (Between):              0.1483
No. Observations:               53226   R-squared (Within):               0.1067
Date:                Tue, Oct 01 2019   R-squared (Overall):              0.1152
Time:                        07:09:54   Log-likelihood                   -3436.7
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      4491.7
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,50251)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             209.23
                            

#### China Trade Unweighted

In [179]:
all_vars = ["const", "log_tariff_change", 'china_trade_growth','emplvl_2017']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,"2018-01-01":"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

mod = PanelOLS(dataset.china_trade_growth, dataset[exog_vars])

fe_res = mod.fit(cov_type='clustered')

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     china_trade_growth   R-squared:                        0.0938
Estimator:                   PanelOLS   R-squared (Between):              0.0977
No. Observations:               53226   R-squared (Within):               0.0929
Date:                Tue, Oct 01 2019   R-squared (Overall):              0.0938
Time:                        07:09:55   Log-likelihood                -5.268e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      5506.5
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,53224)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             1569.2
                            

In [180]:
all_vars = ["const", "log_tariff_change", 'china_trade_growth','emplvl_2017']

idx = pd.IndexSlice

#weights = trade_county["emplvl_2017"].loc[idx[:,"2018-01-01":"2019-02-01"]]

dataset = trade_county[all_vars].loc[idx[:,"2018-01-01":"2019-06-01"],:]

exog_vars = ["const","log_tariff_change"]

mod = PanelOLS(dataset.china_trade_growth, dataset[exog_vars],
               time_effects = True, entity_effects=True)

fe_res = mod.fit(cov_type='clustered')

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:     china_trade_growth   R-squared:                        0.0136
Estimator:                   PanelOLS   R-squared (Between):              0.1200
No. Observations:               53226   R-squared (Within):               0.0634
Date:                Tue, Oct 01 2019   R-squared (Overall):              0.0738
Time:                        07:09:56   Log-likelihood                -3.972e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      695.11
Entities:                        2957   P-value                           0.0000
Avg Obs:                       18.000   Distribution:                 F(1,50251)
Min Obs:                       18.000                                           
Max Obs:                       18.000   F-statistic (robust):             545.97
                            

In [181]:
dataset.head(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,const,log_tariff_change,china_trade_growth,emplvl_2017
GEOFIPS,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1001,2018-01-01,1,1.503706e-07,-0.209747,891.0
1001,2018-02-01,1,-2.558031e-09,0.182746,891.0
1001,2018-03-01,1,-2.558031e-09,0.774352,891.0
1001,2018-04-01,1,-1.325442e-08,-0.100634,891.0
1001,2018-05-01,1,-1.325442e-08,0.036884,891.0
1001,2018-06-01,1,-1.325442e-08,0.110591,891.0
1001,2018-07-01,1,0.0007228446,-0.258416,891.0
1001,2018-08-01,1,0.0007228446,-0.273793,891.0
1001,2018-09-01,1,0.001111347,0.440235,891.0
1001,2018-10-01,1,0.004756285,0.301928,891.0
