In [6]:
import pandas as pd             # data package
import matplotlib.pyplot as plt # graphics 
import datetime as dt
import numpy as np
from census import Census # This is new...

import requests, io             # internet and input tools  
import zipfile as zf            # zip file tools 
import os  

#import weightedcalcs as wc
#import numpy as np

import pyarrow as pa
import pyarrow.parquet as pq
 
from bokeh.palettes import brewer, Spectral6
from bokeh.io import show, output_file, curdoc
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, Panel, Tabs, GeoJSONDataSource, LinearColorMapper
from bokeh.models import ColorBar
from bokeh.layouts import column, gridplot, row
from bokeh.transform import factor_cmap
from bokeh.models import NumeralTickFormatter, Title, Label, Paragraph, Div, CustomJSHover, BoxAnnotation

Ok here is teh strategy, grab the monthly data from here:

https://www.census.gov/foreign-trade/Press-Release/ft900_index.html

it looks like everything is systematically organized so that this can be accomplished

In [7]:
years = range(2003,2020)

df = pd.DataFrame([])

##########################################################################################

for xxx in years:

    year = str(xxx)
    
    url = "https://www.census.gov/foreign-trade/Press-Release/" 
    url = url + year + "pr/final_revisions/exh11.xls"
    
    #for some reason 2002 and back have final_revisions capitalized,
    # its ok, this is all I need for this

    foo = pd.read_excel(url,skiprows = 36, nrows = 12, header = None, usecols = [0,5,8])

    foo.columns = ["time", "exports", "imports"]

    foo["time"] = foo["time"] + ", " + year
    
    foo.time = pd.to_datetime(foo.time)
    
    df = df.append(foo)

##########################################################################################
# Then let's add in 2020 which is organized a differently

url = "https://www.census.gov/foreign-trade/Press-Release/2020pr/08/exh12.xls"

foo = pd.read_excel(url, skiprows = 38, nrows = 8, header = None, usecols = [0,5,8])

foo.columns = ["time", "exports", "imports"]

foo.time = foo.time.str.split("(").str[0]

foo["time"] = foo["time"] + ", " + "2020"

foo.time = pd.to_datetime(foo.time)

df = df.append(foo)

out_file = ".\\data"+ "\\aggregate-tradedata.parquet"

pq.write_table(pa.Table.from_pandas(df), out_file)

df.set_index("time", inplace = True)

In [8]:
df.tail()

Unnamed: 0_level_0,exports,imports
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-01,95799,165452
2020-05-01,90549,163931
2020-06-01,105056,179709
2020-07-01,112827,200839
2020-08-01,118069,203518


In [9]:
def growth_trade(foo):
    # what this function does is take a dataframe and create a relative 
    
    foo["import_growth"] = 100*((df.imports/foo.imports.shift(12)) - 1)
    
    foo["export_growth"] = 100*((df.exports/foo.exports.shift(12)) - 1)
    
    return foo

In [10]:
df = growth_trade(df)

In [11]:
def make_covid_GR_df(df,trade_type):

    covid_df = df.copy()

    covid_df["flag"] = np.nan

    covid_df.loc[dt.datetime(2020,3,1),"flag"] = "covid-shock"

    covid_df.flag.ffill(inplace = True)

    covid_df.flag.bfill(inplace = True, limit = 12)

    covid_df.reset_index(inplace = True)

    covid_df = covid_df[covid_df["flag"] == "covid-shock"]

    covid_df.index = list(range(-12, 5))

    covid_dates = covid_df[["time"]]
    
    foo = trade_type
    
    covid_df = covid_df[[foo + "_growth"]]
    
    if foo == "import":
        covid_df.columns = ["Imports, Covid Shock"]
        
    if foo == "export":
        covid_df.columns = ["Exports, Covid Shock"]
        
#############################################################

    GR_df = df.copy()

    GR_df["flag"] = np.nan

    GR_df.loc[dt.datetime(2008,9,1),"flag"] = "GR-shock"

    GR_df.flag.ffill(inplace = True, limit = 12)

    GR_df.flag.bfill(inplace = True, limit = 12)

    GR_df.reset_index(inplace = True)

    GR_df = GR_df[GR_df["flag"] == "GR-shock"]

    GR_df.index = list(range(-12, 13))

    GR_dates = GR_df[["time"]]

    GR_df = GR_df[[foo + "_growth"]]
    
    if foo == "import":
        covid_df.columns = ["Imports, Covid Shock"]
        GR_df.columns = ["Imports, Financial Crisis"]
        
    if foo == "export":
        covid_df.columns = ["Exports, Covid Shock"]
        GR_df.columns = ["Exports, Financial Crisis"]
    

    outdf = GR_df.merge(covid_df, left_index = True, right_index = True, how = "left")
    
    dates = GR_dates.merge(covid_dates, left_index = True, right_index = True, how = "left")
    
    return outdf, covid_dates, GR_dates

In [12]:
crl = ["darkblue","slategray","crimson"]

background = "#ffffff"

In [13]:
def make_trade_time(df, trade_type):
    
    height = 533
    width = 600

    foobar, covid_dates, GR_dates = make_covid_GR_df(df,trade_type)
    
    if trade_type == "import":
        title = "The Growth of US Imports (GOODS) (%YoY) Around Shock"
    
    if trade_type == "export":
        title = "The Growth of US Exports (GOODS) (%YoY) Around Shock"
    
    p = figure(plot_height=height, plot_width = width, toolbar_location = 'below',
           tools = "box_zoom, reset", title = title ) 

    numlines=len(foobar.columns)

    multi_line_source = ColumnDataSource({
        'xs': [foobar.index.values]*numlines,
        'ys': [foobar[name].values for name in foobar.columns],
        'label': [name for name in foobar.columns],
        'color': ["crimson", "darkblue", "slategrey"],
        "legend": ["Financial Crisis", "Covid-19 Pandemic"]})
    
    p.multi_line(xs= "xs",
                ys= "ys",
                line_width=4, line_alpha=0.75, line_color = "color",
                 hover_line_alpha=0.75, hover_line_width = 5,
                hover_line_color= "color", 
                legend_field=  "legend",
                source = multi_line_source)
    
######################################################################################
    singlesourceGR = ColumnDataSource({
        'xs': foobar.index.values,
        'ys': foobar.iloc[:,0].values,
        "dates": np.array(GR_dates.time),
        "label": [foobar.columns[0]]*len(foobar.index.values),
    })
    
    cGR = p.circle(x="xs", y="ys", size=10,
                    source = singlesourceGR, color = "crimson",alpha=0)
    
    singlesourceCovid = ColumnDataSource({
        'xs': foobar.index.values,
        'ys': foobar.iloc[:,1].values,
        "dates": np.array(covid_dates.time),
        "label": [foobar.columns[1]]*len(foobar.index.values),
    })
    
    ccovid = p.circle(x="xs", y="ys", size=10,
                    source = singlesourceCovid, color = "darkblue",alpha=0)
        
            
    TIMETOOLTIPS = """
            <div style="background-color:#F5F5F5; opacity: 0.95; border: 5px 5px 5px 5px;">
            <div style = "text-align:left;">
            <span style="font-size: 13px; font-weight: bold"> @label
             </span>
             </div>
             <div style = "text-align:left;">"""
    
    TIMETOOLTIPS = TIMETOOLTIPS + """
            <span style="font-size: 13px; font-weight: bold"> @dates{%b %Y} $data_y{0.0}%</span>   
            </div>
            </div>"""
    
    #p.hover.renderers = [ctest]
    p.add_tools(HoverTool(tooltips = TIMETOOLTIPS,  line_policy='nearest', formatters={'@dates': 'datetime'}, renderers = [cGR,ccovid]))
    
    p.title.text_font_size = '13pt'
    p.background_fill_color = background 
    p.background_fill_alpha = 0.75
    p.border_fill_color = background
    
    p.vbar(x = 0, 
           color='grey', top = 30, bottom = -50, width = 0.05, alpha = 0.50)

    p.yaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = "13px"

    p.yaxis.minor_tick_line_color = None
    
    p.x_range.start = -12
    p.y_range.start = -40
    p.y_range.end = 30
    
    p.xaxis.axis_label = 'Months Around Shock'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.xaxis.axis_label_text_font_size = "13px"
    
    p.legend.orientation = "vertical"
    p.legend.background_fill_color = background
    p.legend.background_fill_alpha = 0.10
    p.legend.label_text_font_size = "1em"
    
    p.outline_line_color = None
    p.sizing_mode= "scale_both"
    p.max_height = height
    p.max_width = width
    
    p.toolbar.active_drag = None
    p.toolbar.autohide = True
    p.min_border_left = 0
    p.min_border_bottom = 0

    return p

In [14]:
pimports = make_trade_time(df, "import")

pexports = make_trade_time(df, "export")

tab3 = Panel(child= pimports, title="Goods Imports % YoY")

tab4 = Panel(child= pexports, title="Goods Exports % YoY")

output_file('.\\docs\\' + "us_agg_trade.html")

div0 = Div(text = """Month 0 is September 2008 for the financial crisis series, Month 0 is March 2020 for the Covid-19 Pandemic series.
All values are percent changes year over year. Hover tool reports the change and the calander date.""", max_width=555, background = background )
div0.sizing_mode= "scale_both"
        
outfig = column(Tabs(tabs=[tab3,  tab4], tabs_location = "above"), div0, sizing_mode="scale_both")


show(outfig)

ValueError: Length mismatch: Expected axis has 18 elements, new values have 17 elements