In [19]:
import pandas as pd             # data package
import matplotlib.pyplot as plt # graphics 
import datetime as dt
import numpy as np
from datetime import datetime

import requests, io             # internet and input tools  
import zipfile as zf            # zip file tools
import os
import geopandas as gpd # this is the main geopandas 

import pyarrow as pa
import pyarrow.parquet as pq
 
from bokeh.palettes import brewer, Spectral6
from bokeh.io import show, output_file, curdoc
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, Panel, Tabs, GeoJSONDataSource, LinearColorMapper
from bokeh.models import ColorBar
from bokeh.layouts import column, gridplot, row
from bokeh.transform import factor_cmap
from bokeh.models import NumeralTickFormatter, Title, Label, Paragraph, Div, CustomJSHover, BoxAnnotation

In [20]:
crl = ["darkblue","slategray","crimson"]

background = "#ffffff"

In [21]:
file = ".\\data"+ "\\phaseone-goals.parquet"

goaldf = pq.read_table(file).to_pandas()

In [22]:
goaldf = goaldf[["2017 Values", "Goals", "2020 Values"]]

In [23]:
goaldf["Goals"] = goaldf["2017 Values"] + goaldf["Goals"]

goaldf = goaldf.T

In [24]:
goaldf.head()

high_catagory,1. Manufactured Goods,2. Agriculture,3. Energy,not in aggreement
2017 Values,50235350000.0,21256110000.0,15926170000.0,50266580000.0
Goals,83135350000.0,33756110000.0,34426170000.0,
2020 Values,10886960000.0,3177704000.0,889186700.0,7558639000.0


In [25]:
goals_cat = list(goaldf.index)

color_map = dict(zip(goals_cat,crl))

name_map = dict(zip(goals_cat,["2017 Exports ", "2020 Commitments ","2020 Exports "]))

goaldf["colors"] = goaldf.index.map(color_map)

goaldf["name"] = goaldf.index.map(name_map)

In [26]:
def make_source(df, goods_type):
    
    df["position"] = df.reset_index().index.values
    
    goal_met = df[goods_type].loc["2020 Values"] / df[goods_type].loc["Goals"]

    goal_met = str(round(100*goal_met,1))
    
    df["hover_label"] = (df[goods_type]/1000000000).map('{:,.1f}'.format)
    
    source = ColumnDataSource(df)
    
    return source, goal_met

In [27]:
def make_bar_chart(df, goods_type):

    width = 575
    height = 450

    source, goal_met = make_source(df, goods_type)

    p = figure(plot_height=height, plot_width = width, title="" + goods_type[3:],
               toolbar_location = 'below',
           tools = "box_zoom, reset")
    
    p.vbar(x = "position", top = goods_type, width = 0.6, color = "colors", alpha = 0.75,
       hatch_pattern = " ",hatch_alpha = 0.10,
       source = source, legend_field=  "name")

    mytext = Label(x=1.35, y=90000000000, text='''China's progress towards''', text_font_size="12pt", text_font_style = "bold")
    p.add_layout(mytext)
    mytext = Label(x=1.35, y=84000000000, text='meeting commitments:', text_font_size="12pt", text_font_style = "bold")
    p.add_layout(mytext)
    mytext = Label(x=1.45, y=60000000000, text= goal_met + '%', text_font_size="45pt", text_font_style = "bold")
    p.add_layout(mytext)

##########################################################################
    TIMETOOLTIPS = """
    <div style="background-color:#F5F5F5; opacity: 0.95; border: 0px 0px 0px 0px">
        <div style = "text-align:left;">
            <span style="font-size: 13px; font-weight: normal">@name:</span>
        </div>
        <div style = "text-align:left;">
            <span style="font-size: 13px; font-weight: normal">$@hover_label Billion</span>
        </div>
    </div>
    """

    p.add_tools(HoverTool(tooltips = TIMETOOLTIPS))
##########################################################################

    #p.ygrid.grid_line_color = None
    p.xgrid.grid_line_color = None
    
    p.title.text_font_size = '13pt'
    p.xaxis.major_tick_line_color = None  # turn off x-axis major ticks
    p.xaxis.minor_tick_line_color = None  # turn off x-axis minor ticks
    p.xaxis.major_label_text_font_size = '0pt'  # turn off x-axis tick labels

    p.yaxis.formatter = NumeralTickFormatter(format="($0. a)")
    p.yaxis.minor_tick_line_color = None
    p.y_range.start = 0
    p.y_range.end = 120000000000

    p.border_fill_color = background    
    p.legend.orientation = "horizontal"
    p.legend.background_fill_color = background  
    p.legend.location = "top_left"
    
    p.background_fill_color = background 
    p.background_fill_alpha = 0.75    
    
    p.toolbar.autohide = True
    
    return p

In [28]:
file = ".\\data"+ "\\phaseone-tradedata.parquet"

df = pq.read_table(file).to_pandas()

df.head()

Unnamed: 0,time,CTY_CODE,china_exports,hs6,hs4,description,low_catagory,high_catagory,_merge,naics,naics3
0,2017-01-01,5700,2740.0,10391,103,"Swine, live",Other agricultural commodities,2. Agriculture,both,112210,112
1,2017-01-01,5700,155314.0,10619,106,"Anima,ls liv, e nesoi",Other agricultural commodities,2. Agriculture,both,112990,112
2,2017-01-01,5700,15232.0,20220,202,"Meat of bovine animals, frozen",Meat,2. Agriculture,both,311611,311
3,2017-01-01,5700,5969198.0,20322,203,"Meat of swin, e fresh, chilled, or frozen",Meat,2. Agriculture,both,311611,311
4,2017-01-01,5700,9800714.0,20329,203,"Meat of swin, e fresh, chilled, or frozen",Meat,2. Agriculture,both,311611,311


In [29]:
#foo_df = df[df["high_catagory"] == "2. Agriculture"].copy()
    # Grab the catagory
    
#grp = foo_df.groupby(["low_catagory"])

#meat = grp.get_group("Oilseeds")

#meat[meat["time"] == "2017-01-01"].sort_values(by = "china_exports", ascending = False).iloc[0:3].description.to_list()

In [30]:
def relative_exports(df):
    # what this function does is take a dataframe and create a relative 
    # trade value, it says relative to 2017 Month, how much extra trade is there
    # in 2020 of this month. Need to be able to explain better
    
    foo_grp = df.groupby(["time"])
    # need to groupby first, because within (what ever catagory) there are many
    # hs codes
    
    out_df = foo_grp.agg({"china_exports": "sum", "low_catagory": "first"})
    # then aggregate somehow, here we do this at the low_catagory level, could be hs2 or whatever
    
    out_df["rel_exports"] = np.nan
    
    out_df["rel_exports"].loc["2017"] = 100*(out_df.loc["2017"].china_exports / out_df.china_exports.shift(0) - 1)
    
    out_df["rel_exports"].loc["2018"] = 100*(out_df.loc["2018"].china_exports / out_df.china_exports.shift(12) - 1)
    
    out_df["rel_exports"].loc["2019"] = 100*(out_df.loc["2019"].china_exports / out_df.china_exports.shift(24) - 1)
    
    out_df["rel_exports"].loc["2020"] = 100*(out_df.loc["2020"].china_exports / out_df.china_exports.shift(36))
    # This is the thing to change if we have more time (figure out a way to automate)
    
    print(out_df["low_catagory"].iloc[0])
    
    return out_df
    # returns only 2020 stuff, probably want to do this starting in October or December of 2019

In [31]:
def make_trade_time(df, catagory):

    foo_df = df[df["high_catagory"] == catagory].copy()
    # Grab the catagory
    
    grp = foo_df.groupby(["low_catagory"])
    # group by subcatagories 
    
    foobar = grp.apply(relative_exports)
    # apply function described above
        
    # now need to do some reashaping
    foobar.drop(labels = ["low_catagory"], axis = 1, inplace = True)
    # drop redundant columns
    
    foobar = foobar.reset_index().pivot(index = "time", columns = "low_catagory")
    # reset the index and then reshape it
    
    #foobar.columns = foobar.columns.droplevel()
    # get rid of the upper level column label
    
    foobar.replace(np.inf, np.nan, inplace = True)
    # some catagories, had 0 exports in a month, so value is inf, replace it
    
    return foobar

In [32]:
def make_time_by_product(df, goods_type, level):

    height = 533
    width = 675

    foobar = make_trade_time(df,goods_type)
    
    if level == "china_exports":
        title = "US Exports to China"
    
    if level == "rel_exports":
        title = "% Change in US Exports to China Relative to Same Month in 2017"
    
    p = figure(plot_height=height, plot_width = width, x_axis_type="datetime",toolbar_location = 'below',
           tools = "box_zoom, reset", title = title) 

    numlines=len(foobar[level].columns)

    multi_line_source = ColumnDataSource({
        'xs': [foobar.index.values]*numlines,
        'ys': [foobar[level, name].values for name in foobar[level]],
        'label': [name for name in foobar["rel_exports"].columns]})

    p.multi_line(xs= "xs",
                ys= "ys",
                line_width=3, line_alpha=0.5, line_color = "slategray",
                 hover_line_alpha=0.75, hover_line_width = 5,
                hover_line_color= "crimson", source = multi_line_source)
    
    y_custom = CustomJSHover(code=""" return '' + special_vars.data_y
            """)
            
    TIMETOOLTIPS = """
            <div style="background-color:#F5F5F5; opacity: 0.95; border: 5px 5px 5px 5px;">
            <div style = "text-align:left;">
            <span style="font-size: 13px; font-weight: normal"> @label
             </span>
             </div>
             <div style = "text-align:left;">"""
    
    if level == "china_exports":
        TIMETOOLTIPS = TIMETOOLTIPS + """
            <span style="font-size: 13px; font-weight: normal"> $data_x{%b %Y}:  $data_y{$0.0a}</span>   
            </div>
            </div>
            """
    if level == "rel_exports":
        TIMETOOLTIPS = TIMETOOLTIPS + """
            <span style="font-size: 13px; font-weight: normal"> $data_x{%b %Y}:  $data_y{0}% increase</span>   
            </div>
            </div>
            """

    p.add_tools(HoverTool(tooltips = TIMETOOLTIPS,  line_policy='nearest', formatters={'$data_x': 'datetime'}))
    p.title.text_font_size = '13pt'
    p.background_fill_color = background 
    p.background_fill_alpha = 0.75
    p.border_fill_color = background 
    
    tradewar_box = BoxAnnotation(left=dt.datetime(2018,7,1), right=dt.datetime(2019,10,11), fill_color='red', fill_alpha=0.1)
    p.add_layout(tradewar_box)
    
    tradewar_box = BoxAnnotation(left=dt.datetime(2020,1,1), right=dt.datetime(2020,12,31), fill_color='blue', fill_alpha=0.1)
    p.add_layout(tradewar_box)
    
    #p.yaxis.axis_label = 
    p.yaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = "13px"

    p.yaxis.minor_tick_line_color = None
    #p.y_range.start = 0
    if level == "rel_exports":
        if goods_type == "3. Energy":
            p.y_range.end = 500
    
    p.x_range.start = dt.datetime(2017,7,1)
    
    #description = """Each line is a major product catagory in Annex 6-1 of The Agreement."""
    #p.add_layout(Title(text=description, text_font_style="normal", text_font_size="9pt"), 'below')
    #description = "Red marks the period of Section 301 tariffs and retaliation. Blue is period of agreement."""
    #p.add_layout(Title(text=description, text_font_style="normal", text_font_size="9pt"), 'below')
    
    div0 = Div(text = """Each line is a major product catagory in Annex 6-1 of The Agreement. 
    Red marks the period of Section 301 tariffs and retaliation. Blue is period of agreement.
    """, width=555, background = background )
    
    if level == "china_exports":
        p.yaxis.formatter = NumeralTickFormatter(format="($0. a)")
    
    #print(p.y_range.end)
    p = column(p,div0)
    
    return p

In [33]:
def make_panel_fig(goods_type):

    pbar = make_bar_chart(goaldf,goods_type)
    
    div0 = Div(text = """Offical Text from the <b>ECONOMIC AND TRADE AGREEMENT<b>""", width=555, background = background ) 
    
    div1 = Div(text="""<b>Chapter 6 (page 6-1), Article 6.2: Trade Opportunities.<b>""", width=555, background = background ) 
    
    if goods_type == '1. Manufactured Goods':
        
        div2 = Div(text="""(a) For the category of manufactured goods identified in Annex 6.1, <b>no less than $32.9
            billion above the corresponding 2017 baseline amount is purchased and imported
            into China from the United States in calendar year 2020</b>, and no less than $44.8
            billion above the corresponding 2017 baseline amount is purchased and imported
            into China from the United States in calendar year 2021;""", width=555, background = background )
        
    if goods_type == '2. Agriculture':
        
        div2 = Div(text="""(b) For the category of agricultural goods identified in Annex 6.1, <b>no less than $12.5
        billion above the corresponding 2017 baseline amount is purchased and imported
        into China from the United States in calendar year 2020</b>, and no less than $19.5
        billion above the corresponding 2017 baseline amount is purchased and imported
        into China from the United States in calendar year 2021;""", width=555, background = background )
        
    if goods_type == '3. Energy':
        
        div2 = Div(text="""(c) For the category of energy products identified in Annex 6.1, <b>no less than $18.5
        billion above the corresponding 2017 baseline amount is purchased and imported into China 
        from the United States in calendar year 2020</b>, and no less than $33.9
        billion above the corresponding 2017 baseline amount is purchased and imported
        into China from the United States in calendar year 2021;""", width=555, background = background )
        
    plevel = make_time_by_product(df,goods_type, "china_exports")
    
    pchange = make_time_by_product(df,goods_type, "rel_exports")
    
    tab1 = Panel(child= plevel, title="Exports in $")

    tab2 = Panel(child= pchange, title="Exports as % of 2017 values")
        
    outfig = row(column(pbar,div0,div1,div2), Tabs(tabs=[tab2,  tab1], tabs_location = "above"))
    
    return outfig

In [34]:
output_file('.\\docs\\' + "phase_one_tracker.html")

tab1 = Panel(child= make_panel_fig('1. Manufactured Goods'), title="Manufactured Goods")

tab2 = Panel(child= make_panel_fig('2. Agriculture'), title="Agriculture")

tab3 = Panel(child= make_panel_fig('3. Energy'), title="Energy")

tabs = Tabs(tabs=[ tab1, tab2, tab3])

show(tabs)

Electrical equipment and mac.hioery
Industrial mac.hin ery
Iron and steel
Optical and medical instruments
Other manufactured goods
Pharmaceutical products
Vehicles
Electrical equipment and mac.hioery
Industrial mac.hin ery
Iron and steel
Optical and medical instruments
Other manufactured goods
Pharmaceutical products
Vehicles
Cereals
Cotton
Meat
Oilseeds
Other agricultural commodities
Seafood
Cereals
Cotton
Meat
Oilseeds
Other agricultural commodities
Seafood
Coal
Crude oil
Liquefied natural gas
Refined products
Coal
Crude oil
Liquefied natural gas
Refined products


In [35]:
file = ".\\data"+ "\\phaseone-goals.parquet"

df = pq.read_table(file).to_pandas()

df.to_csv(".\\data\\phaseone-goals.csv")

file = ".\\data"+ "\\phaseone-tradedata.parquet"

df = pq.read_table(file).to_pandas()

df.to_csv(".\\data\\phaseone-tradedata.csv")

In [36]:
zipObj = zf.ZipFile('.\\data\\phaseone-trade-tracker-data.zip', 'w')
 
# Add multiple files to the zip
zipObj.write('.\\data\\phaseone-goals.csv')
zipObj.write('.\\data\\phaseone-tradedata.csv')

 # close the Zip File
zipObj.close()