In [67]:
import pandas as pd             # data package
import matplotlib.pyplot as plt # graphics 
import datetime as dt
import numpy as np

import requests, io             # internet and input tools  
import zipfile as zf            # zip file tools 
import os  

import pyarrow as pa
import pyarrow.parquet as pq

import statsmodels.formula.api as smf
from linearmodels.iv import IV2SLS
from linearmodels.panel import PanelOLS

In [68]:
cwd = os.getcwd()

trade_data = pq.read_table(cwd + "\\data\\total_trade_data.parquet").to_pandas()

trade_data["time"] = pd.to_datetime(trade_data.time)

trade_data.set_index(["area_fips", "time"],inplace = True)

In [69]:
trade_data.head()

exposure = pd.qcut(trade_data.xs('2018-12-1', level=1).tariff, 4 ,labels = False)

most_exposed = exposure[exposure == 3].index.tolist()

trade_data.loc[most_exposed].xs('2018-12-1', level=1).tariff.mean()

6.515098948465189

In [51]:
empl_time_dict_16 = {"January Employment":dt.datetime(2016,1,1),
                 "February Employment":dt.datetime(2016,2,1),
                 "March Employment":dt.datetime(2016,3,1),
                 "April Employment":dt.datetime(2016,4,1),
                 "May Employment":dt.datetime(2016,5,1),
                 "June Employment":dt.datetime(2016,6,1),
                 "July Employment":dt.datetime(2016,7,1),
                 "August Employment":dt.datetime(2016,8,1),
                 "September Employment":dt.datetime(2016,9,1),
                 "October Employment":dt.datetime(2016,10,1),
                 "November Employment":dt.datetime(2016,11,1),
                 "December Employment":dt.datetime(2016,12,1),}

In [70]:
empl_time_dict_17 = {"January Employment":dt.datetime(2017,1,1),
                 "February Employment":dt.datetime(2017,2,1),
                 "March Employment":dt.datetime(2017,3,1),
                 "April Employment":dt.datetime(2017,4,1),
                 "May Employment":dt.datetime(2017,5,1),
                 "June Employment":dt.datetime(2017,6,1),
                 "July Employment":dt.datetime(2017,7,1),
                 "August Employment":dt.datetime(2017,8,1),
                 "September Employment":dt.datetime(2017,9,1),
                 "October Employment":dt.datetime(2017,10,1),
                 "November Employment":dt.datetime(2017,11,1),
                 "December Employment":dt.datetime(2017,12,1),}

empl_time_dict_18 = {"January Employment":dt.datetime(2018,1,1),
                 "February Employment":dt.datetime(2018,2,1),
                 "March Employment":dt.datetime(2018,3,1),
                 "April Employment":dt.datetime(2018,4,1),
                 "May Employment":dt.datetime(2018,5,1),
                 "June Employment":dt.datetime(2018,6,1),
                 "July Employment":dt.datetime(2018,7,1),
                 "August Employment":dt.datetime(2018,8,1),
                 "September Employment":dt.datetime(2018,9,1),
                 "October Employment":dt.datetime(2018,10,1),
                 "November Employment":dt.datetime(2018,11,1),
                 "December Employment":dt.datetime(2018,12,1),}

empl_time_dict_19 = {"January Employment":dt.datetime(2019,1,1),
                 "February Employment":dt.datetime(2019,2,1),
                 "March Employment":dt.datetime(2019,3,1),}

clistQ1 = ['Area\nCode','NAICS','Qtr','January Employment', 'February Employment',
       'March Employment', 'Total Quarterly Wages', 'Average Weekly Wage','Own',"Area Type"]

In [53]:
#url = "https://data.bls.gov/cew/data/files/2016/xls/2016_all_county_high_level.zip"
# This will read in the annual, single file. It's big, but has all we want...

#r = requests.get(url) 

# convert bytes to zip file  
#bls_q2016 = zf.ZipFile(io.BytesIO(r.content)) 
#bls_q2016.extractall(cwd + "\\bls_files")

url = "https://data.bls.gov/cew/data/files/2017/xls/2017_all_county_high_level.zip"
# This will read in the annual, single file. It's big, but has all we want...

r = requests.get(url) 

# convert bytes to zip file  
bls_q2017 = zf.ZipFile(io.BytesIO(r.content)) 
bls_q2017.extractall(cwd + "\\bls_files")

url = "https://data.bls.gov/cew/data/files/2018/xls/2018_all_county_high_level.zip"

r = requests.get(url) 

bls_q2018 = zf.ZipFile(io.BytesIO(r.content)) 
bls_q2018.extractall(cwd + "\\bls_files")

In [54]:
url = "https://data.bls.gov/cew/data/files/2019/xls/2019_all_county_high_level.zip"

r = requests.get(url) 

bls_q2019 = zf.ZipFile(io.BytesIO(r.content)) 
bls_q2019.extractall(cwd + "\\bls_files")

In [55]:
bls_q2019.filelist

[<ZipInfo filename='allhlcn191.xlsx' compress_type=deflate external_attr=0x20 file_size=6839716 compress_size=6715278>]

In [71]:
def clean_bls_quarter(excell_sheet, time_dict):

    df = pd.read_excel(excell_sheet, sheet_name = "US_St_Cn_MSA")

# Take only private

    df = df[df["Own"] == 5] 

# Take aggregate

    df = df[df["NAICS"] == 101] # Take goods producing 
    
    #df = df[df["NAICS"] == 10] # Take all employment in all sectors

# Take only counties 
    df = df[df["Area Type"] == "County"] 

    df.rename({"Area\nCode": "GEOFIPS"},axis = 1, inplace = True)

    df["GEOFIPS"] = df["GEOFIPS"].astype(int)

    df.set_index("GEOFIPS", inplace = True)

    df = df.reindex(trade_data.index.get_level_values(0).unique().astype(int).tolist())

    df = df.iloc[:,[13,14,15]].reset_index()

    df = df.melt("GEOFIPS")

    df.replace(time_dict,inplace = True)

    df.rename({"variable":"time", "value":"emply_month", "GEOFIPS": "area_fips"}, axis = 1, inplace = True)
    
    df["area_fips"] = df["area_fips"].astype(str)
    
    df.set_index(["area_fips", "time"], inplace = True)
    

    return df

In [72]:
#root_name = cwd + "\\bls_files\\"

#root_name = root_name + "allhlcn17"

#quarter = ["1","2","3","4"]

df = pd.DataFrame([])

#for item in quarter:
    
#    file_name = root_name + item + ".xlsx"
    
#    df = df.append(clean_bls_quarter(file_name,empl_time_dict_16))
    
############################################################################

root_name = cwd + "\\bls_files\\"

root_name = root_name + "allhlcn17"

quarter = ["1","2","3","4"]

for item in quarter:
    
    file_name = root_name + item + ".xlsx"
    
    df = df.append(clean_bls_quarter(file_name,empl_time_dict_17))
    
############################################################################  
root_name = cwd + "\\bls_files\\"

root_name = root_name + "allhlcn18"

quarter = ["1","2","3","4"]

for item in quarter:
    
    file_name = root_name + item + ".xlsx"
    
    df = df.append(clean_bls_quarter(file_name,empl_time_dict_18))
    
############################################################################  
root_name = cwd + "\\bls_files\\"

root_name = root_name + "allhlcn19"

quarter = ["1"]

for item in quarter:
    
    file_name = root_name + item + ".xlsx"
    
    df = df.append(clean_bls_quarter(file_name,empl_time_dict_19))

In [58]:
df.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,emply_month
area_fips,time,Unnamed: 2_level_1
9009,2019-03-01,44134.0
9011,2019-03-01,21704.0
9013,2019-03-01,5038.0
9015,2019-03-01,7180.0
9999,2019-03-01,2338.0


In [59]:
df.sort_values(["area_fips", "time"], inplace = True)

In [65]:
trade_employ = trade_data.merge(df, left_index = True, right_index = True, how = "right")

In [66]:
trade_employ.head(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_exp_pc,china_exp_pc,tariff,emplvl_2017,fips,total_employment,emply_month
area_fips,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10001,2016-01-01,,,,,,,0.0
10001,2016-02-01,,,,,,,0.0
10001,2016-03-01,,,,,,,0.0
10001,2016-04-01,,,,,,,0.0
10001,2016-05-01,,,,,,,0.0
10001,2016-06-01,,,,,,,0.0
10001,2016-07-01,,,,,,,0.0
10001,2016-08-01,,,,,,,0.0
10001,2016-09-01,,,,,,,0.0
10001,2016-10-01,,,,,,,0.0


In [62]:
file_path = os.getcwd() + "\\data\\trade_employment_goods.parquet"

pq.write_table(pa.Table.from_pandas(trade_employ.reset_index()), file_path)

#file_path = os.getcwd() + "\\data\\trade_employment_all.parquet"

#pq.write_table(pa.Table.from_pandas(trade_employ.reset_index()), file_path)

In [63]:
trade_employ.corr()

Unnamed: 0,total_exp_pc,china_exp_pc,tariff,emplvl_2017,total_employment,emply_month
total_exp_pc,1.0,0.705856,0.288051,0.04176,-0.027994,0.015532
china_exp_pc,0.705856,1.0,0.168035,-0.008813,-0.050778,-0.031848
tariff,0.288051,0.168035,1.0,-0.016043,-0.07393,-0.04089
emplvl_2017,0.04176,-0.008813,-0.016043,1.0,0.896478,0.974721
total_employment,-0.027994,-0.050778,-0.07393,0.896478,1.0,0.927028
emply_month,0.015532,-0.031848,-0.04089,0.974721,0.927028,1.0


In [64]:
trade_employ.head(25)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_exp_pc,china_exp_pc,tariff,emplvl_2017,fips,total_employment,emply_month
area_fips,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10001,2017-01-01,512.529185,53.382853,1.172229,2843.0,10001,29514.0,0.0
10001,2017-02-01,492.604071,43.72371,1.172278,2843.0,10001,29514.0,0.0
10001,2017-03-01,560.679984,37.347986,1.172365,2843.0,10001,29514.0,0.0
10001,2017-04-01,499.471573,29.463467,1.172366,2843.0,10001,29514.0,0.0
10001,2017-05-01,508.584288,30.009914,1.172366,2843.0,10001,29514.0,0.0
10001,2017-06-01,499.350776,26.428918,1.172366,2843.0,10001,29514.0,0.0
10001,2017-07-01,482.861116,28.199912,1.172365,2843.0,10001,29514.0,0.0
10001,2017-08-01,500.243692,30.920545,1.172365,2843.0,10001,29514.0,0.0
10001,2017-09-01,485.835551,40.123629,1.172365,2843.0,10001,29514.0,0.0
10001,2017-10-01,559.828124,69.302723,1.172365,2843.0,10001,29514.0,0.0
