In [1]:
import pandas as pd, matplotlib.pyplot as plt, ipywidgets as widgets, numpy as np, geopandas as gpd, plotly, plotly.graph_objects as go
import requests, ipyleaflet, plotly.express as px, branca, urllib, os, datetime, sys, qgrid, functools
from ipywidgets import interact, interactive, fixed, interact_manual

plotly.io.templates.default = "plotly_white"
qgrid.enable()


def pickled(max_age=24):
    try:
        import cpickle as pickle
    except:
        import pickle

    def actual_decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            compressed = ''
            if len(args) > 0:
                compressed = '_' + '_'.join([str(arg)[:10] for arg in args])
            if len(kwargs) > 0:
                compressed += '_' + '_'.join([(str(k)+str(v))[:10] for k,v in kwargs])

            filename = '%s%s.pickle' % (func.__name__, compressed)
            hours = (datetime.datetime.today() - datetime.datetime.fromtimestamp(os.path.getmtime(filename))).total_seconds() / 3600 if os.path.exists(filename) else 100000000
            if hours > max_age:
                result = func(*args, **kwargs)
                pickled = open(filename, 'wb')
                pickle.dump(result, pickled)
                pickled.close()
            else:
                pickled = open(filename, 'rb')
                result = pickle.load(pickled)
                pickled.close()

            return result
        
        return wrapper
    
    return actual_decorator

# Geometries for countries, states and counties
@pickled(max_age=1)
def get_geometries():
    countries_geo = (gpd
        .read_file('data/countries.geo.json')
        .rename(columns={'id': 'iso3'})
        .merge(pd.read_csv('data/UID_ISO_FIPS_LookUp_Table.csv', dtype={"FIPS": str}).rename(columns={'Country_Region': 'CountryRegion'}).query("Province_State.isnull()", engine='python')[['iso3', 'CountryRegion']].drop_duplicates())
        .drop(columns=['iso3', 'name'])
        .assign(FIPS=lambda x: x.CountryRegion, Level='Country')
    )
    states_geo = (gpd
     .read_file('data/us-states.json')
     .rename(columns={'id': 'FIPS', 'name': 'CountryRegion'})
     .assign(FIPS=lambda x: x.CountryRegion, Level='State')
    )
    counties = gpd.read_file('data/geojson-counties-fips.json', dtype={"id": str}).rename(columns={'id': 'FIPS'})[['FIPS', 'STATE', 'geometry', 'NAME']].rename(columns={'NAME': 'County'})
    counties.loc[counties.FIPS == '36061', 'geometry']  = counties[counties.FIPS.isin(['36061', '36005', '36085', '36047', '36081'])].dissolve(by='STATE').geometry.values
    counties_geo = counties.rename(columns={'County': 'CountryRegion'}).drop(columns='STATE').assign(Level='County')
    geo = countries_geo.append(states_geo).append(counties_geo).drop(columns='CountryRegion')
    
    return geo

@pickled(max_age=1)
def get_deaths():
    # Covid data for US counties
    covid = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv', dtype={"fips": str})
    covid.loc[covid.county == 'New York City', 'fips'] = '36061'
    deaths_counties = (pd
     .read_excel('data/PopulationEstimates.xls', header=2, dtype={"FIPS": str})[['FIPS', 'POP_ESTIMATE_2018', 'Area_Name', 'State']]
     .assign(County = lambda x: x.apply(lambda x: '%s, %s' % (x.Area_Name, x.State), axis=1), Level='County')
     .merge(covid, left_on='FIPS', right_on='fips')
     .rename(columns={'POP_ESTIMATE_2018': 'Population', 'date':'Date', 'deaths': 'Cumulative Deaths', 'County': 'CountryRegion'})
     .drop(columns=['county', 'Area_Name', 'State', 'state', 'fips', 'cases'])
     .astype({'Date': 'datetime64[ns]'})
    )

    # Covid data for US states
    deaths_states = (covid
     .groupby(['date', 'state'])
     .sum()
     .reset_index()
     .merge(pd.read_excel('data/PopulationEstimates.xls', header=2, dtype={"FIPS": str})[['POP_ESTIMATE_2018', 'Area_Name', 'FIPS']], left_on='state', right_on='Area_Name')
     .drop(columns=['Area_Name', 'cases'])
     .rename(columns={'POP_ESTIMATE_2018': 'Population', 'date':'Date', 'state': 'CountryRegion', 'deaths': 'Cumulative Deaths'})
     .astype({'Date': 'datetime64[ns]'})
     .assign(Level='State', FIPS=lambda x: x.CountryRegion)
    )

    # Covid data for countries
    deaths_world = (pd
        .read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
        .drop(columns=['Province/State', 'Lat', 'Long'])
        .rename(columns={'Country/Region': 'CountryRegion'})
        .groupby('CountryRegion').sum()
        .reset_index()
        .melt(id_vars=['CountryRegion'], var_name='Date', value_name='Cumulative Deaths')
        .astype({'Date': 'datetime64[ns]'})
        .merge(
            pd
            .read_csv('data/UID_ISO_FIPS_LookUp_Table.csv', dtype={"FIPS": str})
            .rename(columns={'Country_Region': 'CountryRegion'})
            .query("Province_State.isnull()", engine='python')
            [['iso3', 'CountryRegion', 'Population']], 
            on='CountryRegion')
        .drop(columns=['iso3'])
        .assign(Level='Country', FIPS=lambda x: x.CountryRegion)
    )

    # Merge and calculate metrics
    deaths = deaths_world.append(deaths_counties).append(deaths_states)
    deaths['Cumulative Deaths/1M'] = 1000000 * deaths['Cumulative Deaths'] / deaths['Population']
    deaths['Daily Deaths'] = deaths['Cumulative Deaths'] - deaths.groupby('CountryRegion')['Cumulative Deaths'].shift(1).fillna(0)
    deaths['Daily Deaths/1M'] = 1000000 * deaths['Daily Deaths'] / deaths['Population']
    deaths['Daily Increase (%)'] = 100 * deaths['Daily Deaths'] / (deaths['Cumulative Deaths'] - deaths['Daily Deaths']).map(lambda x: max(100, x))
    deaths_since_10 = (deaths
      .reset_index(drop=True)
      .reset_index()
      .merge(deaths.query('`Cumulative Deaths`>=10.0')[['CountryRegion', 'Date']].groupby(['CountryRegion']).min().rename(columns={'Date': '10 dead'}).reset_index())
      .assign(temp=lambda x: (x['Date'] - x['10 dead']))
      .rename(columns={'temp': 'Days Since 10 Deaths'})
    )
    deaths_since_10['Days Since 10 Deaths'] = deaths_since_10['Days Since 10 Deaths'].map(lambda x: x.days)
    deaths = deaths.merge(deaths_since_10[['CountryRegion', 'Date', 'Days Since 10 Deaths']], on=['CountryRegion', 'Date'], how='left').fillna(value={'Days Since 10 Deaths': -100})
    
    return deaths


geo = get_geometries()
deaths = get_deaths()


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.





In [2]:
def table(level, x_axis, y_axis, window_size):
    level_query = str(level.split('+')).replace("'", '"')
    d = deaths.query(f'Level=={level_query} and `Days Since 10 Deaths`>=0')
    
    # Only look at regions with at least 50k residents
    d = d[d.Population > 50000] 
    
    # Sort the countries/regions by the max y value and only include those with at least 10 deaths
    regions = d.groupby('CountryRegion').sum().sort_values(y_axis, ascending=False).query('`Cumulative Deaths`>10').index.values
    fig = go.FigureWidget(layout={'width': 690, 'height': 490, 'margin': {'b': 10, 'l': 10, 'r': 10, 't': 30}, 'autosize': False})

    for region in regions:
        v = d[d['CountryRegion']==region]

        if x_axis == 'Date':
            x = pd.to_datetime(v[x_axis].values)
        elif x_axis == 'Days Since 10 Deaths':
            x = v[x_axis].values
        else:
            x = v[x_axis].map(lambda x: int(x))
            x = x.rolling(window_size).mean().fillna(0).map(lambda x: int(x)).values
        y = v[y_axis].map(lambda x: int(x))
        y = y.rolling(window_size).mean().fillna(0).map(lambda x: int(x)).values
        
        # Only look at 
        if x_axis == 'Days Since 10 Deaths':
            y = y[x>=0]
            x = x[x>=0]
        
        title = f'{region}'
        text = v[-len(x):].assign(x=x, y=y).apply(lambda x: f'''
{region}<br>
{x.Date.date()}<br>
x={int(x['x'])}, y={int(x['y'])}<br><br>
<b>Non-averaged numbers:</b><br>
Daily Deaths: {int(x['Daily Deaths'])}<br>
Cumulative Deaths: {int(x['Cumulative Deaths'])}<br>
Daily Deaths/1M: {int(x['Daily Deaths/1M'])}<br>
Cumulative Deaths/1M: {int(x['Cumulative Deaths/1M'])}
''', axis=1).values
        show_trace = True if region in regions[:10] else 'legendonly'
        fig.add_trace(go.Scatter(x=x, y=y, mode='lines+markers', text=text, name=title, hoverinfo='text+name', textposition='bottom right', visible=show_trace))

    fig.update_annotations(dict(xref="x", yref="y", showarrow=True, arrowhead=7, ax=100, ay=0))
    fig.update_layout(
        title=None,
        xaxis_title=x_axis,
        yaxis_title=y_axis)
    
    display(fig)
#table('Country+State', 'Days Since 10 Deaths', 'Cumulative Deaths/1M', 1)    

In [3]:
def map(level, y_axis, window_size):
    level_query = str(level.split('+')).replace("'", '"')
    d = (deaths
        .query(f'Level=={level_query}')
        .query("CountryRegion!='US'" if '+' in level else "`Cumulative Deaths`>=0")
        .groupby(['CountryRegion', 'FIPS'])
        [['Cumulative Deaths', 'Cumulative Deaths/1M', 'Daily Deaths', 'Daily Deaths/1M', 'Daily Increase (%)']]
        .rolling(window_size)
        .mean()
        .fillna(0)
        .reset_index(level=['CountryRegion', 'FIPS'])
        .merge(deaths[['Date', 'Population', 'Level', 'Days Since 10 Deaths']], left_index=True, right_index=True)
    )

    # Add geometries
    deaths_geo = geo.query(f'Level=={level_query}').merge(d[d.Date == d.Date.max()], on=['FIPS', 'Level'], how='left').fillna(-1)
    
    # Center and scale map according to level
    if level=='County':
        center = (35, -80)
        zoom = 5
    elif level=='State':
        center = (35, -90)
        zoom = 4
    else:
        center = (50.6252978589571, 0.34580993652344)
        zoom = 2    

    m = ipyleaflet.Map(basemap=ipyleaflet.basemaps.CartoDB.Positron, center=center, zoom=zoom)

    # Create color scale
    step = int(np.power(10, np.floor(np.log10(deaths_geo[y_axis].max()))))
    stop = int(np.ceil(deaths_geo[y_axis].max() / step)*step)
    if stop/step < 5:
        step = int(step/2)
    colorscale = branca.colormap.linear.Greens_09.to_step(index=range(0, stop+1, step))
    colorscale.caption = y_axis

    # Add regions to map
    geo_json = ipyleaflet.GeoJSON(
        data=deaths_geo.__geo_interface__,
        style_callback=lambda x: {'fillColor': colorscale(x['properties'][y_axis])},
        style={'opacity': 1, 'weight': 0, 'fillOpacity': 0.7},
        hover_style={'weight': 1, 'dashArray': '9', 'fillOpacity': 0.5}
    )
    m.add_layer(geo_json)

    # Create legend
    def col_to_bkg(c):
        return 'black' if (int(c[1:3], 16) + int(c[3:5], 16) + int(c[5:7], 16))/3 > 100 else 'white'

    colors = ''.join([
        f'<div style="color: {col_to_bkg(colorscale(i))}; text-align: center; background-color:{colorscale(i)}; width: 40px; float: left;">{i}</div><br>' 
        for i in range(step, stop+1, step)
    ])
    m.add_control(ipyleaflet.WidgetControl(widget=widgets.HTML(colors, layout={'margin': '0px 0px 0px 0px'}), position='topright'))

    # Hover
    html = widgets.HTML('Hover over a CountryRegion', layout={'margin': '0px 10px 10px 20px;'})
    hover = ipyleaflet.WidgetControl(widget=html, position='bottomright')
    def update_html(properties, **kwargs):
        html.value = f"""
        <div style="margin: 0px 0px 10px 0px;">
            <div style="height: 16px"><b>{properties['CountryRegion']}</b></div>
            <div style="height: 16px">&nbsp;</div>
            <div style="height: 16px"><b>Averaged numbers ({window_size}d):</b></div>
            <div style="height: 16px">Daily Deaths: {int(properties['Daily Deaths'])}</div>
            <div style="height: 16px">Daily Deaths/1M: {int(properties['Daily Deaths/1M'])}</div>
            <div style="height: 16px">Cumulative Deaths: {int(properties['Cumulative Deaths'])}</div>
            <div style="height: 16px">Cumulative Deaths/1M: {int(properties['Cumulative Deaths/1M'])}</div>
            <div style="height: 16px">Daily Increase: {int(properties['Daily Increase (%)'])}%</div>
            <div style="height: 16px">Population: {int(properties['Population']):,}</div>
        </div>
        """               
    geo_json.on_hover(update_html)
    geo_json.on_mouseover(lambda **x: m.add_control(hover))
    geo_json.on_mouseout(lambda **x: m.remove_control(hover))

    m.layout=widgets.Layout(border='1px black solid', height='500px', padding='0', margin='0')
    display(m)

#map('Country+State', 'Cumulative Deaths/1M', 7)    

In [4]:
def dropdown(options, description, value=None):
    if value:
        v = widgets.Dropdown(options=options, description='', layout=widgets.Layout(padding_left='10px', margin_left='10px', width='150px'), value=value)
    else:
        v = widgets.Dropdown(options=options, description='', layout=widgets.Layout(padding_left='10px', margin_left='10px', width='150px'))
    return v, widgets.VBox([
        widgets.Label(description), 
        v
    ])

level, level_widget = dropdown(options=['Country', 'Country+State', 'State', 'County'], description='Level:')
x_axis, x_axis_widget = dropdown(options=['Days Since 10 Deaths', 'Cumulative Deaths/1M', 'Cumulative Deaths', 'Date'], description='X-axis:')
y_axis, y_axis_widget = dropdown(options=['Cumulative Deaths/1M', 'Cumulative Deaths', 'Daily Deaths/1M', 'Daily Deaths', 'Daily Increase (%)'], description='Y-axis:')
window_size, window_size_widget = dropdown(options=range(1, 15), value=7, description='Moving average (days):') 
f = widgets.Label('', layout=widgets.Layout(width='5px'))
selectors_widget = widgets.HBox([level_widget, f, x_axis_widget, f, y_axis_widget, f, window_size_widget], layout=widgets.Layout(justify_content='flex-end', width='100%'))
title = widgets.HTML(f'<H1><NOBR>COVID-19 - {y_axis.value}</H1>', layout=widgets.Layout(width='800px'))
def update_title(*args):
    title.value = f'<H1><NOBR>COVID-19 - {y_axis.value}</H1>'
y_axis.observe(update_title, 'value')
header_widgets = widgets.HBox([title, selectors_widget])

table_widget = widgets.HBox(
    [widgets.interactive_output(table, {'level': level, 'x_axis': x_axis, 'y_axis': y_axis, 'window_size': window_size})], 
    layout=widgets.Layout(border='1px black solid', width='700px', height='500px', margin='4px 0px 0px 0px')
)
map_widget = widgets.interactive_output(map, {'level': level, 'y_axis': y_axis, 'window_size': window_size})

widgets.AppLayout(
    header=header_widgets,
    left_sidebar=map_widget,
    center=widgets.Label(''),
    right_sidebar=table_widget,
    footer=None,
    pane_widths=[4, '10px', '700px'],
    pane_heights=['80px', 20, 0]
)    

AppLayout(children=(HBox(children=(HTML(value='<H1><NOBR>COVID-19 - Cumulative Deaths/1M</H1>', layout=Layout(…