In [1]:
import pandas as pd             # data package
import matplotlib.pyplot as plt # graphics 
import datetime as dt
import numpy as np
from census import Census # This is new...

import requests, io             # internet and input tools  
import zipfile as zf            # zip file tools 
import os  

#import weightedcalcs as wc
#import numpy as np

import pyarrow as pa
import pyarrow.parquet as pq

This file creates the trade file behind the [Phase One Tracker](https://www.tradewartracker.com/). It proceeds in several steps.

1. Grabs the trade data

2. Constructs the relavent Phase One product catagories and the associated goals/targets.

3. Maps the data to the county level.

In [2]:
trade_type = "exports"

my_key = "&key=34e40301bda77077e24c859c6c6c0b721ad73fc7"
# This is my key. I'm nice and I have it posted. If you will be doing more with this
# please get your own key!

---
#### Grabe the trade data using the Census's API

In [3]:
def census_trade(url, trade_type, country, product_level):
    
    r = requests.get(url) 
    
    print(r)
    
    df = pd.DataFrame(r.json()[1:]) # This then converts it to a dataframe
    # Note that the first entry is the labels

    df.columns = r.json()[0]

    df.time = pd.to_datetime(df.time, format="%Y-%m")
    # This is so I can call this correctly...
    
    if trade_type == "imports":
        
        trade_label = country + "_" + trade_type
        
        df[trade_label] = df["CON_VAL_MO"].astype(float)
        
        df[product_level] = df["I_COMMODITY"].astype(str)
        
        df.drop(["CON_VAL_MO", "I_COMMODITY", "COMM_LVL"], axis = 1, inplace = True)
        
    if trade_type == "exports":
    
        trade_label = country + "_" + trade_type
        
        df[trade_label] = df["ALL_VAL_MO"].astype(float)

        df[product_level] = df["E_COMMODITY"].astype(str)
        
        df["china_exports_quant"] = df["QTY_1_MO"].astype(float)
    
        df["quant_type"] = df["UNIT_QY1"].astype(str)
        
        df.drop(["ALL_VAL_MO", "E_COMMODITY", "COMM_LVL", "QTY_1_MO", "UNIT_QY1"], axis = 1, inplace = True)
    
    return df

In [4]:
end_use = "hs?get=E_COMMODITY,ALL_VAL_MO,UNIT_QY1,QTY_1_MO"

url = "https://api.census.gov/data/timeseries/intltrade/exports/" + end_use 
url = url + my_key + "&time==from+2013-01" + "&COMM_LVL=HS6"

url = url + "&CTY_CODE=5700"

df = census_trade(url, trade_type, "china", "hs6")

df["hs4"] = df["hs6"].str[0:4]

df.time.max()

<Response [200]>


Timestamp('2020-10-01 00:00:00')

In [5]:
df.head()

Unnamed: 0,time,CTY_CODE,china_exports,hs6,china_exports_quant,quant_type,hs4
0,2013-01-01,5700,4806.0,841090,0.0,-,8410
1,2013-01-01,5700,287000.0,481031,0.0,-,4810
2,2013-01-01,5700,311650.0,481160,0.0,-,4811
3,2013-01-01,5700,63701.0,481620,0.0,-,4816
4,2013-01-01,5700,40265.0,490290,0.0,-,4902


---

Now this is the **new** stuff from the Census. The plan is to append it to the regular data set...

In [6]:
#census_yrl = "https://www.census.gov/foreign-trade/Press-Release/2020pr/06/Prelim_US_C_ETA/June%202020.xlsx"
census_yrl = "https://www.census.gov/foreign-trade/Press-Release/2020pr/11/Prelim_US_C_ETA/November%202020.xlsx"

dfC = pd.read_excel(census_yrl, skiprows = [0,1])

dfC.columns = ["hs4_o", "C_description", "china_exports"]

dfC["time"] = "2020-11-01"

dfC.time = pd.to_datetime(dfC.time, format="%Y-%m-%d")

dfC.dropna(inplace = True)

dfC.tail()

Unnamed: 0,hs4_o,C_description,china_exports,time
549,9018,"Instruments And Appliances Used In Medical, Su...",272644262.0,2020-11-01
550,9019,"Mechano-Therapy, Massage, Psychological Aptitu...",19836484.0,2020-11-01
551,9020,Breathing Appliances Nesoi And Gas Masks Havin...,2584086.0,2020-11-01
552,9021,Orthopedic Appliances; Splints Etc.; Artificia...,63211772.0,2020-11-01
553,9022,"X-Ray Etc. Apparatus, Including Radiography Or...",55482408.0,2020-11-01


In [7]:
dfproducts = pd.read_csv(".\\data"+ "\\annex-6-1.csv", dtype = {"hs4": str, "hs4_o": str})

In [8]:
dfC = dfC[["hs4_o","china_exports","time"]].merge(dfproducts, left_on = "hs4_o", right_on = "hs4_o", how = "left")

In [9]:
dfC.head()

Unnamed: 0,hs4_o,china_exports,time,description,low_catagory,high_catagory,hs4
0,101,50819.0,2020-11-01,"Hors,es ass,es mules and hinnies, live",Other agricultural commodities,2. Agriculture,101
1,102,0.0,2020-11-01,"Bovine anima,ls live",Other agricultural commodities,2. Agriculture,102
2,103,1440508.0,2020-11-01,"Swine, live",Other agricultural commodities,2. Agriculture,103
3,104,0.0,2020-11-01,"Sheep and goa,ts live",Other agricultural commodities,2. Agriculture,104
4,105,0.0,2020-11-01,"Poultry, live; chickens, ducks, geese, turkeys...",Other agricultural commodities,2. Agriculture,105


---
#### Bring in the Phase One Product list

In [10]:
not_energy = dfproducts.high_catagory != "3. Energy"

energy_list = list(dfproducts.loc[~not_energy,"hs4"])

print(energy_list)

['2701', '2709', '2710', '2711', '2711', '2711', '2711', '2713', '2713', '2905']


Next step, we will remove the energy products, these will be added on below from the ``phase-one-energy`` notebook

In [11]:
df = df[df["hs4"].isin(energy_list) == False]

Now merge the non energy products with the product lists

In [12]:
df_phaseone = df.merge(dfproducts, left_on = "hs4", right_on = "hs4", how = "left", indicator = True)

Grabe the column names so the energy data will be the same order otherwise pandas complains

In [13]:
cnames = list(df_phaseone.columns)
print(cnames)

['time', 'CTY_CODE', 'china_exports', 'hs6', 'china_exports_quant', 'quant_type', 'hs4', 'hs4_o', 'description', 'low_catagory', 'high_catagory', '_merge']


Then read in the energy data set

In [14]:
file = ".\\data"+ "\\phaseone-energy.parquet"

df_energy = pq.read_table(file).to_pandas()

df_energy[df_energy.low_catagory == "Liquefied natural gas"]

Unnamed: 0,time,CTY_CODE,hs4_o,hs6,china_exports,china_exports_quant,quant_type,description,low_catagory,high_catagory,hs4,_merge
561,2013-04-01,5700,271111,271111,119031.0,996.0,M3,Liquefied natural gas,Liquefied natural gas,3. Energy,2711,both
562,2013-05-01,5700,271111,271111,0.0,0.0,M3,Liquefied natural gas,Liquefied natural gas,3. Energy,2711,both
563,2013-06-01,5700,271111,271111,0.0,0.0,M3,Liquefied natural gas,Liquefied natural gas,3. Energy,2711,both
564,2013-07-01,5700,271111,271111,0.0,0.0,M3,Liquefied natural gas,Liquefied natural gas,3. Energy,2711,both
565,2013-08-01,5700,271111,271111,0.0,0.0,M3,Liquefied natural gas,Liquefied natural gas,3. Energy,2711,both
...,...,...,...,...,...,...,...,...,...,...,...,...
626,2020-06-01,5700,271111,271111,40687326.0,332510.0,M3,Liquefied natural gas,Liquefied natural gas,3. Energy,2711,both
627,2020-07-01,5700,271111,271111,44254302.0,328083.0,M3,Liquefied natural gas,Liquefied natural gas,3. Energy,2711,both
628,2020-08-01,5700,271111,271111,52565450.0,318840.0,M3,Liquefied natural gas,Liquefied natural gas,3. Energy,2711,both
629,2020-09-01,5700,271111,271111,47949278.0,340983.0,M3,Liquefied natural gas,Liquefied natural gas,3. Energy,2711,both


And then append it to the main dataframe

In [15]:
df_phaseone = df_phaseone.append(df_energy)

In [16]:
df_phaseone = df_phaseone.append(dfC)

In [17]:
df_phaseone.tail()

Unnamed: 0,time,CTY_CODE,china_exports,hs6,china_exports_quant,quant_type,hs4,hs4_o,description,low_catagory,high_catagory,_merge
549,2020-11-01,,272644262.0,,,,9018,9018,"Instruments and appliances used in medic,al su...",Optical and medical instruments,1. Manufactured Goods,
550,2020-11-01,,19836484.0,,,,9019,9019,Mechano-therapy appliances; massage apparatus;...,Optical and medical instruments,1. Manufactured Goods,
551,2020-11-01,,2584086.0,,,,9020,9020,"Other breathing appliances and gas masks, excl...",Optical and medical instruments,1. Manufactured Goods,
552,2020-11-01,,63211772.0,,,,9021,9021,"Orthopedic appliances, including crutch,es su...",Optical and medical instruments,1. Manufactured Goods,
553,2020-11-01,,55482408.0,,,,9022,9022,Apparatus based on the use of X-rays or of alp...,Optical and medical instruments,1. Manufactured Goods,


In [18]:
# then we can see the stuff with the orginal code
df_phaseone[df_phaseone.hs4_o == "271112"]

Unnamed: 0,time,CTY_CODE,china_exports,hs6,china_exports_quant,quant_type,hs4,hs4_o,description,low_catagory,high_catagory,_merge
631,2013-10-01,5700,49461021.0,271112,1105106.0,BBL,2711,271112,Liquefied propane,Refined products,3. Energy,both
632,2013-11-01,5700,20872845.0,271112,388600.0,BBL,2711,271112,Liquefied propane,Refined products,3. Energy,both
633,2013-12-01,5700,70036260.0,271112,1154000.0,BBL,2711,271112,Liquefied propane,Refined products,3. Energy,both
634,2014-02-01,5700,39743760.0,271112,577000.0,BBL,2711,271112,Liquefied propane,Refined products,3. Energy,both
635,2014-03-01,5700,29323140.0,271112,577000.0,BBL,2711,271112,Liquefied propane,Refined products,3. Energy,both
...,...,...,...,...,...,...,...,...,...,...,...,...
717,2020-07-01,5700,58943415.0,271112,392862.0,M3,2711,271112,Liquefied propane,Refined products,3. Energy,both
718,2020-08-01,5700,5520900.0,271112,43722.0,M3,2711,271112,Liquefied propane,Refined products,3. Energy,both
719,2020-09-01,5700,7178500.0,271112,46583.0,M3,2711,271112,Liquefied propane,Refined products,3. Energy,both
720,2020-10-01,5700,0.0,271112,0.0,M3,2711,271112,Liquefied propane,Refined products,3. Energy,both


In [19]:
df_phaseone.high_catagory = df_phaseone.high_catagory.fillna("not in aggreement")

Then the outfiles ``phaseone-tradedata.parquet`` is the main file used in ``phase-one-plots.ipynb`` notebook

In [20]:
df_phaseone.tail()

Unnamed: 0,time,CTY_CODE,china_exports,hs6,china_exports_quant,quant_type,hs4,hs4_o,description,low_catagory,high_catagory,_merge
549,2020-11-01,,272644262.0,,,,9018,9018,"Instruments and appliances used in medic,al su...",Optical and medical instruments,1. Manufactured Goods,
550,2020-11-01,,19836484.0,,,,9019,9019,Mechano-therapy appliances; massage apparatus;...,Optical and medical instruments,1. Manufactured Goods,
551,2020-11-01,,2584086.0,,,,9020,9020,"Other breathing appliances and gas masks, excl...",Optical and medical instruments,1. Manufactured Goods,
552,2020-11-01,,63211772.0,,,,9021,9021,"Orthopedic appliances, including crutch,es su...",Optical and medical instruments,1. Manufactured Goods,
553,2020-11-01,,55482408.0,,,,9022,9022,Apparatus based on the use of X-rays or of alp...,Optical and medical instruments,1. Manufactured Goods,


In [21]:
out_file = ".\\data"+ "\\phaseone-tradedata-C-november.parquet"

pq.write_table(pa.Table.from_pandas(df_phaseone), out_file)

#### This then constructs the benchmark and goal measures

In [22]:
df_phaseone.set_index("time", inplace = True)

In [23]:
grp = df_phaseone.loc["2017"].groupby("high_catagory")

benchmarks = grp.agg({"china_exports": "sum"})

benchmarks.columns = ["2017 Values"]

In [24]:
grp = df_phaseone.loc["2020"].groupby("high_catagory")

current = grp.agg({"china_exports": "sum"})

current.columns = ["2020 Values"]

In [25]:
benchmarks = benchmarks.merge(current, left_index = True, right_index = True)

In [26]:
benchmarks

Unnamed: 0_level_0,2017 Values,2020 Values
high_catagory,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Manufactured Goods,50151480000.0,42257200000.0
2. Agriculture,20969790000.0,17559240000.0
3. Energy,7649222000.0,7171394000.0
not in aggreement,50264670000.0,24824160000.0


In [26]:
benchmarks

Unnamed: 0_level_0,2017 Values,2020 Values
high_catagory,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Manufactured Goods,50151480000.0,47353290000.0
2. Agriculture,20969790000.0,22528440000.0
3. Energy,7649222000.0,8112074000.0
not in aggreement,50264670000.0,28391070000.0


In [27]:
# These are the goals from the AGREEMENT

benchmarks["Goals"] = 32900000000

benchmarks.iloc[1,2] = 12500000000

benchmarks.iloc[2,2] = 18500000000

benchmarks.iloc[3,2] = np.nan

Then the ``phaseone-goals.parquet`` file is the data file used to create the bar graph.

In [28]:
out_file = ".\\data"+ "\\phaseone-goals-C-november.parquet"

pq.write_table(pa.Table.from_pandas(benchmarks), out_file)

In [29]:
benchmarks

Unnamed: 0_level_0,2017 Values,2020 Values,Goals
high_catagory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1. Manufactured Goods,50151480000.0,47353290000.0,32900000000.0
2. Agriculture,20969790000.0,22528440000.0,12500000000.0
3. Energy,7649222000.0,8112074000.0,18500000000.0
not in aggreement,50264670000.0,28391070000.0,
