In [9]:
import pandas as pd             # data package
import matplotlib.pyplot as plt # graphics 
import datetime as dt
import numpy as np
from datetime import datetime

import requests, io             # internet and input tools  
import zipfile as zf            # zip file tools
import os
import geopandas as gpd # this is the main geopandas 

import pyarrow as pa
import pyarrow.parquet as pq
 
from bokeh.palettes import brewer, Spectral6
from bokeh.io import show, output_file, curdoc
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, Panel, Tabs, GeoJSONDataSource, LinearColorMapper
from bokeh.models import ColorBar
from bokeh.layouts import column, gridplot, row
from bokeh.transform import factor_cmap
from bokeh.models import NumeralTickFormatter, Title, Label, Paragraph, Div, CustomJSHover, BoxAnnotation

This just sets up some color pallates for the figures and the background. The background stuff helps the figure "float" 

In [10]:
start_date = "2013-01-01"
end_date = "2020-08-01"

In [11]:
crl = ["darkblue","slategray","crimson"]

background = "#ffffff"

In [13]:
#file = ".\\data"+ "\\phaseone-goals-C-august.parquet"
file =".\\data"+ "\\phaseone-goals.parquet"

goaldf = pq.read_table(file).to_pandas()

In [14]:
goaldf = goaldf[["2017 Values", "Goals", "2020 Values"]]

In [15]:
goaldf["Goals"] = goaldf["2017 Values"] + goaldf["Goals"]

goaldf = goaldf.T

In [16]:
goaldf["0. Total"] = goaldf["1. Manufactured Goods"] +  goaldf["2. Agriculture"] + goaldf["3. Energy"]

In [17]:
goaldf.head()

high_catagory,1. Manufactured Goods,2. Agriculture,3. Energy,not in aggreement,0. Total
2017 Values,50151480000.0,20969790000.0,7649222000.0,50264670000.0,78770490000.0
Goals,83051480000.0,33469790000.0,26149220000.0,,142670500000.0
2020 Values,32865300000.0,9689355000.0,4779958000.0,21818610000.0,47334610000.0


In [18]:
goaldf.head()

high_catagory,1. Manufactured Goods,2. Agriculture,3. Energy,not in aggreement,0. Total
2017 Values,50151480000.0,20969790000.0,7649222000.0,50264670000.0,78770490000.0
Goals,83051480000.0,33469790000.0,26149220000.0,,142670500000.0
2020 Values,32865300000.0,9689355000.0,4779958000.0,21818610000.0,47334610000.0


In [19]:
goals_cat = list(goaldf.index)

color_map = dict(zip(goals_cat,crl))

name_map = dict(zip(goals_cat,["2017 Exports ", "2020 Commitments ","2020 Exports "]))

goaldf["colors"] = goaldf.index.map(color_map)

goaldf["name"] = goaldf.index.map(name_map)

In [20]:
def make_source(df, goods_type):
    
    df["position"] = df.reset_index().index.values
    
    goal_met = df[goods_type].loc["2020 Values"] / df[goods_type].loc["Goals"]

    goal_met = str(round(100*goal_met,1))
    
    df["hover_label"] = (df[goods_type]/1000000000).map('{:,.1f}'.format)
    
    source = ColumnDataSource(df)
    
    return source, goal_met

In [21]:
def make_bar_chart(df, goods_type):

    width = 575
    height = 450

    source, goal_met = make_source(df, goods_type)
    if goods_type == "0. Total":
        
        p = figure(plot_height=height, plot_width = width, title="" + goods_type[3:] + " Trade",
           toolbar_location = 'below',
           tools = "reset")
    
    else:
        p = figure(plot_height=height, plot_width = width, title="" + goods_type[3:],
               toolbar_location = 'below',
               tools = "reset")
    
    p.vbar(x = "position", top = goods_type, width = 0.6, color = "colors", alpha = 0.75,
       hatch_pattern = " ",hatch_alpha = 0.10,
       source = source, legend_field=  "name")

##########################################################################
    TIMETOOLTIPS = """
    <div style="background-color:#F5F5F5; opacity: 0.95; border: 0px 0px 0px 0px">
        <div style = "text-align:left;">
            <span style="font-size: 13px; font-weight: bold">@name:</span>
        </div>
        <div style = "text-align:left;">
            <span style="font-size: 13px; font-weight: bold">$@hover_label Billion</span>
        </div>
    </div>
    """

    p.add_tools(HoverTool(tooltips = TIMETOOLTIPS))
##########################################################################

    #p.ygrid.grid_line_color = None
    p.xgrid.grid_line_color = None
    
    p.title.text_font_size = '13pt'
    p.xaxis.major_tick_line_color = None  # turn off x-axis major ticks
    p.xaxis.minor_tick_line_color = None  # turn off x-axis minor ticks
    p.xaxis.major_label_text_font_size = '0pt'  # turn off x-axis tick labels

    p.yaxis.formatter = NumeralTickFormatter(format="($0. a)")
    p.yaxis.minor_tick_line_color = None
    p.y_range.start = 0 
    


    p.y_range.end = 180000000000
    mytext = Label(x=340, y=235, text='''China's progress towards''', text_font_size="1.2em", text_font_style = "bold",
                      x_units='screen', y_units='screen')
    p.add_layout(mytext)
    mytext = Label(x=340, y=215, text='meeting commitments:', text_font_size="1.2em", text_font_style = "bold",
                      x_units='screen', y_units='screen')
    p.add_layout(mytext)
    mytext = Label(x=340, y=165, text= goal_met + '%', text_font_size="3em", text_font_style = "bold",
                      x_units='screen', y_units='screen')
    p.add_layout(mytext)

    
    
    p.border_fill_color = background    
    p.legend.orientation = "horizontal"
    p.legend.background_fill_color = background  
    p.legend.location = "top_left"
    p.legend.background_fill_alpha = 0.10
    p.legend.label_text_font_size = "1em"
    
    p.background_fill_color = background 
    p.background_fill_alpha = 0.75    
    
    p.toolbar.autohide = True
    
    p.outline_line_color = None
    p.sizing_mode= "scale_both"
    p.max_height = height
    p.max_width = width
    p.min_height = int(0.25*height)
    p.min_width = int(0.25*width)
    return p

In [22]:
file = ".\\data"+ "\\phaseone-tradedata.parquet"

df = pq.read_table(file).to_pandas()

df.head()

Unnamed: 0,time,CTY_CODE,china_exports,hs6,china_exports_quant,quant_type,hs4,hs4_o,description,low_catagory,high_catagory,_merge,naics,naics3
0,2013-01-01,5700,4806.0,841090,0.0,-,8410,8410.0,"Hydraulic turbines, water wheels and regulator...",Industrial mac.hin ery,1. Manufactured Goods,both,333611,333
1,2013-01-01,5700,287000.0,481031,0.0,-,4810,,,,not in aggreement,left_only,322130,322
2,2013-01-01,5700,311650.0,481160,0.0,-,4811,,,,not in aggreement,left_only,322220,322
3,2013-01-01,5700,63701.0,481620,0.0,-,4816,,,,not in aggreement,left_only,339940,339
4,2013-01-01,5700,40265.0,490290,0.0,-,4902,,,,not in aggreement,left_only,323111,323


In [23]:
def relative_exports(df):
    # what this function does is take a dataframe and create a relative 
    # trade value, it says relative to 2017 Month, how much extra trade is there
    # in 2020 of this month. Need to be able to explain better
    
    foo_grp = df.groupby(["time"])
    # need to groupby first, because within (what ever catagory) there are many
    # hs codes
    
    out_df = foo_grp.agg({"china_exports": "sum", "low_catagory": "first"})
    # then aggregate somehow, here we do this at the low_catagory level, could be hs2 or whatever
    
    idx = pd.date_range(start_date, end_date,freq='MS')
    
    out_df = out_df.reindex(idx, fill_value= np.nan)
    
    out_df.index.name = 'time'
    
    out_df["rel_exports"] = np.nan
    
    out_df["rel_exports"].loc["2013"] = 100*((out_df.loc["2013"].china_exports / out_df.china_exports.shift(-48)) - 1)
    
    out_df["rel_exports"].loc["2014"] = 100*((out_df.loc["2014"].china_exports / out_df.china_exports.shift(-36)) - 1)
    
    out_df["rel_exports"].loc["2015"] = 100*((out_df.loc["2015"].china_exports / out_df.china_exports.shift(-24)) - 1)
    
    out_df["rel_exports"].loc["2016"] = 100*((out_df.loc["2016"].china_exports / out_df.china_exports.shift(-12)) - 1)
    
    out_df["rel_exports"].loc["2017"] = 100*((out_df.loc["2017"].china_exports / out_df.china_exports.shift(0)) - 1)
    
    out_df["rel_exports"].loc["2018"] = 100*((out_df.loc["2018"].china_exports / out_df.china_exports.shift(12)) - 1)
    
    out_df["rel_exports"].loc["2019"] = 100*((out_df.loc["2019"].china_exports / out_df.china_exports.shift(24)) - 1)
    
    out_df["rel_exports"].loc["2020"] = 100*((out_df.loc["2020"].china_exports / out_df.china_exports.shift(36)) - 1)
    # This is the thing to change if we have more time (figure out a way to automate)
    
    print(out_df["low_catagory"].iloc[0])
    
    return out_df
    # returns only 2020 stuff, probably want to do this starting in October or December of 2019

In [24]:
def relative_exports_quant(df):
    # what this function does is take a dataframe and create a relative 
    # trade value, it says relative to 2017 Month, how much extra trade is there
    # in 2020 of this month. Need to be able to explain better
    
    df.sort_index(inplace = True)
    
    foo_grp = df.groupby(["time"])
    # need to groupby first, because within (what ever catagory) there are many
    # hs codes
    
    out_df = foo_grp.agg({"china_exports_quant": "sum", "low_catagory": "first", "quant_type": "first"})
    
    idx = pd.date_range(start_date, end_date, freq='MS')
    
    out_df = out_df.reindex(idx, fill_value= np.nan)
    
    out_df.index.name = 'time'
    
    # This will make sure the dates are all consistent
    # then aggregate somehow, here we do this at the low_catagory level, could be hs2 or whatever
    
    out_df["rel_exports_quant"] = np.nan
    
    out_df["rel_exports_quant"].loc["2013"] = 100*((out_df.loc["2013"].china_exports_quant / out_df.china_exports_quant.shift(-48)) - 1)
    
    out_df["rel_exports_quant"].loc["2014"] = 100*((out_df.loc["2014"].china_exports_quant / out_df.china_exports_quant.shift(-36)) - 1)
    
    out_df["rel_exports_quant"].loc["2015"] = 100*((out_df.loc["2015"].china_exports_quant / out_df.china_exports_quant.shift(-24)) - 1)
    
    out_df["rel_exports_quant"].loc["2016"] = 100*((out_df.loc["2016"].china_exports_quant / out_df.china_exports_quant.shift(-12)) - 1)
    
    out_df["rel_exports_quant"].loc["2017"] = 100*((out_df.loc["2017"].china_exports_quant / out_df.china_exports_quant.shift(0)) - 1)
    
    out_df["rel_exports_quant"].loc["2018"] = 100*((out_df.loc["2018"].china_exports_quant / out_df.china_exports_quant.shift(12)) - 1)
    
    out_df["rel_exports_quant"].loc["2019"] = 100*((out_df.loc["2019"].china_exports_quant / out_df.china_exports_quant.shift(24)) - 1)
    
    out_df["rel_exports_quant"].loc["2020"] = 100*((out_df.loc["2020"].china_exports_quant / out_df.china_exports_quant.shift(36)) - 1)
    # This is the thing to change if we have more time (figure out a way to automate)
    
    print(out_df["low_catagory"].iloc[0])
    
    return out_df

In [25]:
def make_trade_time(df, catagory):
    
    if catagory != "0. Total":
    
        foo_df = df[df["high_catagory"] == catagory].copy()
        
        grp = foo_df.groupby(["low_catagory"])
        
    else:
        # Grab the catagory
        foo_df = df[df["high_catagory"] != "not in aggreement"].copy()
        # group by subcatagories 
        grp = foo_df.groupby(["high_catagory"])
        
    foobar = grp.apply(relative_exports)
    # apply function described above
        
    # now need to do some reashaping
    foobar.drop(labels = ["low_catagory"], axis = 1, inplace = True)
    # drop redundant columns
    if catagory != "0. Total":
        
        foobar = foobar.reset_index().pivot(index = "time", columns = "low_catagory")
        
    else:
        
        foobar = foobar.reset_index().pivot(index = "time", columns = "high_catagory")
    # reset the index and then reshape it
    
    #foobar.columns = foobar.columns.droplevel()
    # get rid of the upper level column label
    
    foobar.replace(np.inf, np.nan, inplace = True)
    # some catagories, had 0 exports in a month, so value is inf, replace it
    
    return foobar

In [26]:
def make_trade_time_quant(df, catagory):
    
    if catagory != "3. Energy":
        print("don't do this, only works for energy")
    
    energy_list = ["Liquefied natural gas", "Crude oil", "Coal"]
    
    foo_df = df[df["low_catagory"].isin(energy_list)].copy()
        
    grp = foo_df.groupby(["low_catagory"])
             
    foobar = grp.apply(relative_exports_quant)
    # apply function described above
        
    # now need to do some reashaping
    foobar.drop(labels = ["low_catagory"], axis = 1, inplace = True)
    # drop redundant columns
    foobar = foobar.reset_index().pivot(index = "time", columns = "low_catagory")

    # reset the index and then reshape it
    
    #foobar.columns = foobar.columns.droplevel()
    # get rid of the upper level column label
    
    foobar.replace(np.inf, np.nan, inplace = True)
    # some catagories, had 0 exports in a month, so value is inf, replace it
    
    return foobar

In [27]:
def make_time_by_product(df, goods_type, level):

    height = 533
    width = 675

    if level == "china_exports":
        title = "US Exports to China"
        foobar = make_trade_time(df,goods_type)
    
    if level == "rel_exports":
        title = "% Change in US Exports to China Relative to Same Month in 2017"
        foobar = make_trade_time(df,goods_type)
        
    if level == "rel_exports_quant":
        title = "% Change in Volume to China Relative to Same Month in 2017"
        foobar = make_trade_time_quant(df,goods_type)

    p = figure(plot_height=height, plot_width = width, x_axis_type="datetime",toolbar_location = 'below',
           tools = "box_zoom, reset, pan, xwheel_zoom", title = title, x_range = (dt.datetime(2017,7,1),dt.datetime(2020,11,1)) ) 

    numlines=len(foobar[level].columns)

    if level != "rel_exports_quant":
        multi_line_source = ColumnDataSource({
            'xs': [foobar.index.values]*numlines,
            'ys': [foobar[level, name].values for name in foobar[level]],
            'label': [name for name in foobar[level].columns]})
    
    if level == "rel_exports_quant":
        multi_line_source = ColumnDataSource({
            'xs': [foobar.index.values]*numlines,
            'ys': [foobar[level, name].values for name in foobar[level]],
            'label': [name for name in foobar[level].columns],
            'q_type': [foobar["quant_type", name].dropna().unique() for name in foobar["quant_type"]]}) 
    

    p.multi_line(xs= "xs",
                ys= "ys",
                line_width=2.5, line_alpha=0.5, line_color = "slategray",
                 hover_line_alpha=0.75, hover_line_width = 5,
                hover_line_color= "crimson", source = multi_line_source)
    
    y_custom = CustomJSHover(code=""" return '' + special_vars.data_y
            """)
            
    TIMETOOLTIPS = """
            <div style="background-color:#F5F5F5; opacity: 0.95; border: 5px 5px 5px 5px;">
            <div style = "text-align:left;">
            <span style="font-size: 13px; font-weight: bold"> @label
             </span>
             </div>
             <div style = "text-align:left;">"""
    
    if level == "china_exports":
        TIMETOOLTIPS = TIMETOOLTIPS + """
            <span style="font-size: 13px; font-weight: bold"> $data_x{%b %Y}:  $data_y{$0.0a}</span>   
            </div>
            </div>
            """
    if level == "rel_exports":
        TIMETOOLTIPS = TIMETOOLTIPS + """
            <span style="font-size: 13px; font-weight: bold"> $data_x{%b %Y}:  $data_y{0}%</span>   
            </div>
            </div>
            """
    if level == "rel_exports_quant":
        TIMETOOLTIPS = TIMETOOLTIPS + """
            <span style="font-size: 13px; font-weight: bold"> $data_x{%b %Y}:  $data_y{0}%</span>
            </div>
            <span style="font-size: 13px; font-weight: bold"> Volume Measure: @q_type
             </span>
            </div>
            """

    p.add_tools(HoverTool(tooltips = TIMETOOLTIPS,  line_policy='nearest', formatters={'$data_x': 'datetime'}))
    p.title.text_font_size = '13pt'
    p.background_fill_color = background 
    p.background_fill_alpha = 0.75
    p.border_fill_color = background 
    
    tradewar_box = BoxAnnotation(left=dt.datetime(2018,7,1), right=dt.datetime(2019,10,11), fill_color='red', fill_alpha=0.1)
    p.add_layout(tradewar_box)
    
    tradewar_box = BoxAnnotation(left=dt.datetime(2020,1,1), right=dt.datetime(2020,12,31), fill_color='blue', fill_alpha=0.1)
    p.add_layout(tradewar_box)
    
    #p.yaxis.axis_label = 
    p.yaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = "13px"

    p.yaxis.minor_tick_line_color = None
    #p.y_range.start = 0
    if level == "rel_exports":
        if goods_type == "3. Energy":
            p.y_range.end = 800
            
    if level == "rel_exports_quant":
        p.y_range.end = 900
    
    #p.x_range.max_interval = dt.datetime(2020,6,1)
    
    #description = """Each line is a major product catagory in Annex 6-1 of The Agreement."""
    #p.add_layout(Title(text=description, text_font_style="normal", text_font_size="9pt"), 'below')
    #description = "Red marks the period of Section 301 tariffs and retaliation. Blue is period of agreement."""
    #p.add_layout(Title(text=description, text_font_style="normal", text_font_size="9pt"), 'below')
    
    div0 = Div(text = """Each line is a major product catagory in Annex 6-1 of The Agreement. 
    Red marks the period of Section 301 tariffs and retaliation. Blue is period of agreement.
    """, width=555, background = background )
    
    if level == "china_exports":
        p.yaxis.formatter = NumeralTickFormatter(format="($0. a)")
    
    p.outline_line_color = None
    p.sizing_mode = "scale_both"
    div0.sizing_mode= "scale_both"
    
    p.toolbar.active_drag = None
    
    #p.WheelZoomTool.maintain_focus = False
    #print(p.y_range.end)
    p = column(p,div0, sizing_mode = "scale_both", max_height = height, max_width = width,
              min_height = int(0.25*height), min_width = int(0.25*width))
  
    return p

In [28]:
p = make_time_by_product(df, "3. Energy", "rel_exports_quant")

show(p)

Coal
Crude oil
nan


In [29]:
def make_panel_fig(goods_type):

    pbar = make_bar_chart(goaldf,goods_type)
    
    div0 = Div(text = """Offical Text from the <b>ECONOMIC AND TRADE AGREEMENT<b>""", max_width=555, background = background )
    div0.sizing_mode= "scale_both"
    
    div1 = Div(text="""<b>Chapter 6 (page 6-1), Article 6.2: Trade Opportunities.<b>""", max_width=555, background = background )
    div1.sizing_mode= "scale_both"
    
    if goods_type == '1. Manufactured Goods':
        
        div2 = Div(text="""(a) For the category of manufactured goods identified in Annex 6.1, <b>no less than $32.9
            billion above the corresponding 2017 baseline amount is purchased and imported
            into China from the United States in calendar year 2020</b>, and no less than $44.8
            billion above the corresponding 2017 baseline amount is purchased and imported
            into China from the United States in calendar year 2021;""", max_width=555, background = background )
        div2.sizing_mode= "scale_both"
        
    if goods_type == '2. Agriculture':
        
        div2 = Div(text="""(b) For the category of agricultural goods identified in Annex 6.1, <b>no less than $12.5
        billion above the corresponding 2017 baseline amount is purchased and imported
        into China from the United States in calendar year 2020</b>, and no less than $19.5
        billion above the corresponding 2017 baseline amount is purchased and imported
        into China from the United States in calendar year 2021;""", max_width=555, background = background )
        div2.sizing_mode= "scale_both"
        
    if goods_type == '3. Energy':
        
        div2 = Div(text="""(c) For the category of energy products identified in Annex 6.1, <b>no less than $18.5
        billion above the corresponding 2017 baseline amount is purchased and imported into China 
        from the United States in calendar year 2020</b>, and no less than $33.9
        billion above the corresponding 2017 baseline amount is purchased and imported
        into China from the United States in calendar year 2021;""", max_width=555, background = background )
        div2.sizing_mode= "scale_both"
        
    if goods_type == '0. Total':
        
        div2 = Div(text="""During the two-year period from January 1, 2020 through December 31, 2021, China shall
        ensure that purchases and imports into China from the United States of the manufactured goods,
        agricultural goods, energy products, ... exceed the corresponding
        2017 baseline amount by no less than $200 billion (<b>$64 billion in calandar year 2020</b>);""",
                   max_width=555,
                   background = background,)
        div2.sizing_mode= "scale_both"
        
    plevel = make_time_by_product(df,goods_type, "china_exports")
    
    pchange = make_time_by_product(df,goods_type, "rel_exports")
    
    if goods_type != '3. Energy':
    
        tab1 = Panel(child= plevel, title="Exports in $")

        tab2 = Panel(child= pchange, title="Exports as % of 2017 values")
            
        outfig = row(column(pbar,div0,div1,div2,sizing_mode="scale_both"), Tabs(tabs=[tab2,  tab1], tabs_location = "above"))
        
    if goods_type == '3. Energy':
        
        pq = make_time_by_product(df,goods_type, "rel_exports_quant")
        
        tab1 = Panel(child= plevel, title="Exports in $")

        tab2 = Panel(child= pchange, title="Exports as % of 2017 Values")
        
        tab3 = Panel(child= pq, title="Volume as % of 2017 Values")
            
        outfig = row(column(pbar,div0,div1,div2,sizing_mode="scale_both"), Tabs(tabs=[tab2,  tab1, tab3], tabs_location = "above"))
    
    return outfig

In [30]:
output_file('.\\docs\\' + "phase_one_tracker-august-data-C.html")

tab0 = Panel(child= make_panel_fig('0. Total'), title="Total Trade")

tab1 = Panel(child= make_panel_fig('1. Manufactured Goods'), title="Manufactured Goods")

tab2 = Panel(child= make_panel_fig('2. Agriculture'), title="Agriculture")

tab3 = Panel(child= make_panel_fig('3. Energy'), title="Energy")

tabs = Tabs(tabs=[tab0,  tab1, tab2, tab3])

show(tabs)

Industrial mac.hin ery
Cereals
Coal
Industrial mac.hin ery
Cereals
Coal
Electrical equipment and mac.hioery
Industrial mac.hin ery
Iron and steel
Optical and medical instruments
Other manufactured goods
Pharmaceutical products
Vehicles
Electrical equipment and mac.hioery
Industrial mac.hin ery
Iron and steel
Optical and medical instruments
Other manufactured goods
Pharmaceutical products
Vehicles
Cereals
Cotton
Meat
Oilseeds
Other agricultural commodities
Seafood
Cereals
Cotton
Meat
Oilseeds
Other agricultural commodities
Seafood
Coal
Crude oil
nan
Refined products
Coal
Crude oil
nan
Refined products
Coal
Crude oil
nan


In [31]:
file = ".\\data"+ "\\phaseone-goals.parquet"

df = pq.read_table(file).to_pandas()

df.to_csv(".\\data\\phaseone-goals.csv")

file = ".\\data"+ "\\phaseone-tradedata.parquet"

df = pq.read_table(file).to_pandas()

df.to_csv(".\\data\\phaseone-tradedata.csv")

In [32]:
zipObj = zf.ZipFile('.\\data\\phaseone-trade-tracker-data.zip', 'w')
 
# Add multiple files to the zip
zipObj.write('.\\data\\phaseone-goals.csv')
zipObj.write('.\\data\\phaseone-tradedata.csv')

 # close the Zip File
zipObj.close()