In [7]:
import matplotlib
from matplotlib import cm
import numpy as np

magma_cmap = matplotlib.cm.get_cmap('magma')
viridis_cmap = matplotlib.cm.get_cmap('Wistia')
#parula_cmap = par.parula_map

viridis_rgb = []
magma_rgb = []
parula_rgb = []
norm = matplotlib.colors.Normalize(vmin=0, vmax=255)

for i in range(0, 255):
       k = matplotlib.colors.colorConverter.to_rgb(magma_cmap(norm(i)))
       magma_rgb.append(k)

for i in range(0, 255):
       k = matplotlib.colors.colorConverter.to_rgb(viridis_cmap(norm(i)))
       viridis_rgb.append(k)

# for i in range(0, 255):
#        k = matplotlib.colors.colorConverter.to_rgb(parula_cmap(norm(i)))
#        parula_rgb.append(k)

def matplotlib_to_plotly(cmap, pl_entries):
    h = 1.0/(pl_entries-1)
    pl_colorscale = []

    for k in range(pl_entries):
        C = map(np.uint8, np.array(cmap(k*h)[:3])*255)
        C = list(C)
        pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])

    return pl_colorscale

magma = matplotlib_to_plotly(magma_cmap, 255)
viridis = matplotlib_to_plotly(viridis_cmap, 255)


In [8]:
viridis

[[0.0, 'rgb(228, 255, 122)'],
 [0.003937007874015748, 'rgb(228, 254, 120)'],
 [0.007874015748031496, 'rgb(228, 254, 118)'],
 [0.011811023622047244, 'rgb(229, 253, 117)'],
 [0.015748031496062992, 'rgb(229, 253, 115)'],
 [0.01968503937007874, 'rgb(230, 253, 114)'],
 [0.023622047244094488, 'rgb(230, 252, 112)'],
 [0.027559055118110236, 'rgb(230, 252, 111)'],
 [0.031496062992125984, 'rgb(231, 252, 109)'],
 [0.03543307086614173, 'rgb(231, 251, 108)'],
 [0.03937007874015748, 'rgb(232, 251, 106)'],
 [0.04330708661417323, 'rgb(232, 251, 105)'],
 [0.047244094488188976, 'rgb(233, 250, 103)'],
 [0.051181102362204724, 'rgb(233, 250, 102)'],
 [0.05511811023622047, 'rgb(233, 249, 100)'],
 [0.05905511811023622, 'rgb(234, 249, 99)'],
 [0.06299212598425197, 'rgb(234, 249, 97)'],
 [0.06692913385826771, 'rgb(235, 248, 96)'],
 [0.07086614173228346, 'rgb(235, 248, 94)'],
 [0.07480314960629922, 'rgb(236, 248, 93)'],
 [0.07874015748031496, 'rgb(236, 247, 91)'],
 [0.0826771653543307, 'rgb(236, 247, 90)'],
 [0

In [16]:
# %load make_data.py
import warnings
import pandas as pd
import glob
import os
import logging
import boto3
from botocore.exceptions import ClientError
import COVID19Py
import pandas
import datetime


# Cancel copy warnings of pandas
warnings.filterwarnings(
    "ignore", category=pd.core.common.SettingWithCopyWarning)


covid19_csbs = COVID19Py.COVID19(data_source="csbs").getAll(timelines=True)
covid19_jhu = COVID19Py.COVID19(data_source="jhu").getAll(timelines=True)


def upload_file(file_name, bucket, object_name=None):
    """Upload a file to an S3 bucket

    :param file_name: File to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name. If not specified then file_name is used
    :return: True if file was uploaded, else False
    """

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = file_name

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(
            file_name, bucket, object_name,  ExtraArgs={'ACL': 'public-read'})
    except ClientError as e:
        logging.error(e)
        return False
    return True


def parse_api_json(json, source):
    for_pandas = []
    confirmed = []
    deaths = []
    recovered = []
    for location in json['locations']:
        timeline_entry = {}
        entry = {
            'id': location['id'],
            'lat': location['coordinates']['latitude'],
            'lon': location['coordinates']['longitude'],
            'Date': pandas.to_datetime(location['last_updated']),
            'province': location['province'],
            'country_code': location['country_code'],
            'country': location['country'],
            'confirmed': location['latest']['confirmed'],
            'deaths': location['latest']['deaths'],
            'recovered': location['latest']['recovered'],
            'county': ''}
        if 'county' in location.keys():
            entry['county'] = location['county']
        if 'state' in location.keys():
            entry['state'] = location['state']
        entry['source'] = source
        for_pandas.append(entry)

        if 'timelines' in location.keys():
            for status in location['timelines']:
                for date in location['timelines'][status]['timeline']:
                    sub_entry = {'id': location['id'],
                                 'Date': pandas.to_datetime(date),
                                 status: location['timelines'][status]['timeline'][date]}
                    if status == 'confirmed':
                        confirmed.append(sub_entry)
                    elif status == 'deaths':
                        deaths.append(sub_entry)
                    else:
                        recovered.append(sub_entry)
    if confirmed:
        # return confirmed,deaths,recovered
        timeline_df = pandas.DataFrame(confirmed)
        if deaths:
            timeline_df = timeline_df.merge(
                pandas.DataFrame(deaths), on=['id', 'Date'])
        else:
            timeline_df['deaths'] = 0
        if recovered:
            timeline_df = timeline_df.merge(
                pandas.DataFrame(recovered), on=['id', 'Date'])
        else:
            timeline_df['recovered'] = 0.0

        main_df = pandas.DataFrame(for_pandas)
        timeline_df = timeline_df.merge(
            main_df[['id', 'lat', 'lon', 'province', 'country_code', 'country', 'source']], on=['id'])

        return main_df, timeline_df
    else:
        return pandas.DataFrame(for_pandas)


def per_x_cases(grouper, df, date_mapper):
    new_cases_by_country = []
    dates = date_mapper['Date']
    sub_group = df[df[grouper] != ""]
    groupers = sub_group[grouper].unique()

    for group in groupers:
        sub_country = sub_group[sub_group[grouper] == group]
        new_cases_by_country.append(
            {grouper: group, 'Date': dates[0],
             'New Cases': sub_country.loc[sub_country['Date'] == dates[0], 'confirmed'].sum(),
             'New Deaths': 0,
             'New Recovery': 0})
        for date_index in range(1, len(dates)):
            current_date = dates[date_index]
            day_before = dates[date_index-1]
            # print(current_date,day_before)
            t_c, t_d, t_r = sub_country.loc[sub_country['Date']
                                            == current_date, :].sum()[['confirmed', 'deaths', 'recovered']]

            y_c, y_d, y_r = sub_country.loc[sub_country['Date']
                                            == day_before, :].sum()[['confirmed', 'deaths', 'recovered']]

            new_cases = t_c - y_c
            new_deaths = t_d - y_d
            new_recovery = t_r - y_r
            new_cases_by_country.append(
                {grouper: group, 'Date': current_date, 'New Cases': new_cases,
                 'New Deaths': new_deaths, 'New Recovery': new_recovery})
    return pd.DataFrame(new_cases_by_country)


jhu_df, jhu_df_time = parse_api_json(covid19_jhu, 'JHU')
csbs_df = parse_api_json(covid19_csbs, 'CSBS')
csbs_df_old = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/csbs_df.csv.gz', index_col=0)

csbs_df =pandas.concat([csbs_df, csbs_df_old])
dt = datetime.date.today()
datetime_string = dt.strftime('%m_%d_%Y')
# This will probably be a bug someday
# jhu_df_time = jhu_df_time[jhu_df_time['province'].str.split(
#     ', ').str.len() == 1]

date_mapper = pd.DataFrame(
    jhu_df_time['Date'].unique(), columns=['Date'])
date_mapper['Date_text'] = date_mapper['Date'].dt.strftime('%m/%d/%y')

provence_df_per_day = per_x_cases('province', jhu_df_time, date_mapper)
country_df_per_day = per_x_cases('country', jhu_df_time, date_mapper)
print('Generated Data')

jhu_df.to_csv('Data/jhu_df.csv.gz', compression='gzip')
jhu_df_time.to_csv('Data/jhu_df_time.csv.gz', compression='gzip')
csbs_df.to_csv('Data/csbs_df.csv.gz', compression='gzip')
csbs_df_old.to_csv('Data/csbs_df_Archive_{}.csv.gz'.format(datetime_string), compression='gzip')

provence_df_per_day.to_csv(
    'Data/provence_df_per_day.csv.gz', compression='gzip')
country_df_per_day.to_csv('Data/country_df_per_day.csv.gz', compression='gzip')


print('Syncing Data')
ea = ExtraArgs = {'ACL': 'public-read'}
gs = glob.glob('Data/*.csv.gz')
for file in gs:
    upload_file(file, 'jordansdatabucket', os.path.join(
        'covid19data', os.path.basename(file)))
    print("Uploaded " + os.path.basename(file))


Generated Data
Syncing Data
Uploaded provence_df_per_day.csv.gz
Uploaded jhu_df_time.csv.gz
Uploaded country_df_per_day.csv.gz
Uploaded Merged_df.csv.gz
Uploaded csbs_df_Archive_03_25_2020.csv.gz
Uploaded per_day_stats_by_country.csv.gz
Uploaded jhu_df.csv.gz
Uploaded csbs_df.csv.gz
Uploaded per_day_stats_by_state.csv.gz
Uploaded per_day_stats_by_county.csv.gz


In [35]:
import sys
import warnings
import numpy as np
import pandas as pd
import argparse
import pprint
from datetime import date, timedelta
try:
    import dash
    import dash_core_components as dcc
    import dash_html_components as html
    from dash.exceptions import PreventUpdate
    import dash_table
    from dash.dependencies import Input, Output, State
    import plotly.graph_objects as go
    import plotly.express as px
    from plotly.subplots import make_subplots
except ImportError:
    sys.exit('Please install dash, e.g, pip install dash')


mapbox_style = "mapbox://styles/plotlymapbox/cjvprkf3t1kns1cqjxuxmwixz"
mapbox_style = 'dark'
mapbox_access_token = open('.mapbox_token').readlines()[0]

# Import from S3:
jhu_df = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/jhu_df.csv.gz', index_col=0)
jhu_df_time = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/jhu_df_time.csv.gz', index_col=0)
csbs_df = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/csbs_df.csv.gz', index_col=0)
per_day_stats_by_country = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/country_df_per_day.csv.gz', index_col=0)
per_day_stats_by_state = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/provence_df_per_day.csv.gz', index_col=0)

jhu_df_time['Date'] = pd.to_datetime(jhu_df_time['Date'])
jhu_df['Date'] = pd.to_datetime(jhu_df['Date'])
csbs_df['Date'] = pd.to_datetime(csbs_df['Date'])
jhu_df['state'] = ""


date_mapper = pd.DataFrame(
    jhu_df_time['Date'].unique(), columns=['Date'])
date_mapper['Date_text'] = date_mapper['Date'].dt.strftime('%m/%d/%y')
min_date = date_mapper.index[0]
max_date = date_mapper.index[-1]

latest_date = csbs_df.sort_values('Date')['Date'].iloc[0]
merge = pd.concat([jhu_df, csbs_df[csbs_df['Date'] == latest_date]])
merge = merge.fillna('')
problem_countries = merge[merge['country_code'] == '']['country'].tolist()
merge.loc[merge['country'] == 'Namibia', 'country_code'] = ''
centroid_mapper = pd.read_csv('country_centroids_az8.csv')
problem_states = merge[~merge['country_code'].isin(centroid_mapper['iso_a2'])]
new_merge = merge.merge(
    centroid_mapper, left_on='country_code', right_on='iso_a2')
new_merge['case_rate'] = new_merge['confirmed']/new_merge['pop_est'] * 100
new_merge['death_rate'] = new_merge['deaths']/new_merge['pop_est'] * 100
new_merge['confirmed_no_death'] = new_merge['confirmed'] - new_merge['deaths']


# If something is has the same name for continent and subregion, lets just add the word _subregion
new_merge.loc[new_merge['continent'] == new_merge['subregion'],
              'subregion'] = new_merge['subregion'] + ' Subregion'

# Lets remove the US Data since we are doubel counting htis by merging CSBSS
new_merge_no_us = new_merge[~((new_merge['country'] == 'US') & (
    new_merge['province'] == ''))]



def build_hierarchical_dataframe(df, levels, value_column, color_columns=None):
    """
    Build a hierarchy of levels for Sunburst or Treemap charts.

    Levels are given starting from the bottom to the top of the hierarchy,
    ie the last level corresponds to the root.
    """
    df_all_trees = pd.DataFrame(columns=['id', 'parent', 'value', 'color'])
    for i, level in enumerate(levels):
        df_tree = pd.DataFrame(columns=['id', 'parent', 'value', 'color'])
        dfg = df.groupby(levels[i:]).sum()
        dfg = dfg.reset_index()
        df_tree['id'] = dfg[level].copy()
        if i < len(levels) - 1:
            df_tree['parent'] = dfg[levels[i+1]].copy()
        else:
            df_tree['parent'] = 'total'
        df_tree['value'] = dfg[value_column]
        df_all_trees = df_all_trees.append(df_tree, ignore_index=True)
    total = pd.Series(dict(id='total', parent='',
                              value=df[value_column].sum(),
                              color=""))
    df_all_trees = df_all_trees.append(total, ignore_index=True)
    return df_all_trees

def plot_sunburst():
    levels = ['continent', 'subregion', 'name', 'province']
    levels = levels[::-1]
    value_column = 'confirmed'
    df_higherarchy = build_hierarchical_dataframe(
        new_merge_no_us, levels, value_column)
    df_higherarchy['color'] = df_higherarchy['value'] / \
        df_higherarchy['value'].sum()
    df_higherarchy = df_higherarchy.replace('total', 'Total<br>Cases')

    fig = make_subplots(
        1, 2, specs=[[{"type": "domain"}, {"type": "domain"}]],)
    fig.add_trace(go.Sunburst(
        labels=df_higherarchy['id'],
        parents=df_higherarchy['parent'],
        values=df_higherarchy['value'],
        branchvalues='total',
        marker=dict(
            colors=df_higherarchy['color'],
            colorscale='RdBu_r',
            cmid=new_merge_no_us.groupby('name').sum()['confirmed'].mean()/new_merge_no_us['confirmed'].sum()),
        hovertemplate='<b>%{label} </b> <br> Confirmed Cases: %{value}',
        insidetextorientation='radial',
        name='',
        maxdepth=3
    ), 1, 1)

    levels = ['continent', 'subregion', 'name', 'province']
    levels = levels[::-1]
    value_column = 'deaths'
    df_higherarchy = build_hierarchical_dataframe(
        new_merge_no_us, levels, value_column)
    df_higherarchy['color'] = df_higherarchy['value'] / \
        df_higherarchy['value'].sum()
    df_higherarchy = df_higherarchy.replace('total', 'Total<br>Deaths')

    fig.add_trace(go.Sunburst(
        labels=df_higherarchy['id'],
        parents=df_higherarchy['parent'],
        values=df_higherarchy['value'],
        branchvalues='total',
        marker=dict(
            colors=df_higherarchy['color'],
            colorscale='reds',
            cmid=new_merge_no_us.groupby('name').sum()['deaths'].mean()/new_merge_no_us['deaths'].sum()),
        hovertemplate='<b>%{label} </b> <br> Confirmed Deaths: %{value}',
        name='',
        maxdepth=3
    ), 1, 2)

    fig.update_layout(
        uniformtext=dict(minsize=16, mode='hide'),
        paper_bgcolor='rgb(0,0,0,0)',
        # title=dict(text='Total Confirmed Cases<br>Click to Expand',
        #            font=dict(color='white', size=24)),
        margin=dict(l=40, r=40, t=40, b=40)
    )

    return fig

plot_sunburst()

In [80]:
def update_new_case_graph(hoverData, group):

    sub_df_time = pd.DataFrame()
    if group == 'country' and hoverData:
        country = hoverData['points'][0]['customdata']
        sub_df = per_day_stats_by_country[per_day_stats_by_country['country'] == country]
        sub_df_time = jhu_df_time[jhu_df_time['country'] == country]
    elif group == 'province' and hoverData:
        country = hoverData['points'][0]['customdata']
        sub_df = per_day_stats_by_state[per_day_stats_by_state['province'] == country]
        if sub_df.empty:
            country = "{} - No Data Available".format(country)
        sub_df_time = ""
    else:
        sub_df = per_day_stats_by_country.groupby('Date').sum().reset_index()
        sub_df_time = jhu_df_time.groupby('Date').sum().reset_index()
        country = 'World'

    fig = make_subplots(specs=[[{"secondary_y": True}]])

    dates = date_mapper['Date_text'].unique()
    fig.add_trace(go.Bar(x=dates,
                         y=sub_df['New Cases'],
                         name=country,
                         showlegend=True,
                         text=sub_df['New Cases'],
                         textposition='auto',
                         texttemplate='%{y:,f}',
                         hovertemplate='Date - %{x}<br>New Cases - %{y:,f}',

                         marker=dict(
                             color='white',
                             line=dict(
                                 color='white')
                         )))
    if not sub_df_time.empty:
        fig.add_trace(
            go.Scatter(
                x=dates,
                y=sub_df_time['confirmed'],
                name=country,
                showlegend=False,
                mode='lines+markers',
                hovertemplate='Date - %{x}<br>Total Cases - %{y:,f}',
                marker=dict(
                    color='yellow',
                    size=0.1,
                    line=dict(
                        width=10,
                        color='yellow')
                )),secondary_y=True)

    fig.update_layout(
        title=dict(text='New Cases Per Day: {}'.format(
            country), font=dict(color='white', size=24)),
        xaxis_tickfont_size=14,
        xaxis=dict(
            title='Date',
            color='white'
        ),
        yaxis=dict(
            title_text='New Cases',
#             titlefont_size=,
#             tickfont_size=18,
            showgrid=False,
            side='left',
            color='black',
            rotate='90'
        ),
#         yaxis2=dict(
#             title=dict(text='Total Cases'),
#             titlefont_size=12,
#             tickfont_size=18,
#             showgrid=False,
#             side='right',
#             color='white',
#         ),
        showlegend=False,
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgb(52,51,50)',
        barmode='group',
        bargap=0.15,  # gap between bars of adjacent location coordinates.
        bargroupgap=0.1)  # gap between bars of the same location coordinate.

    fig.update_layout(width=900)
        
        
#         dict(
#                     title=dict(text='New Cases', standoff=2),
#                     titlefont_size=18,
#                     tickfont_size=18,
#                     showgrid=False,
#                     color='white',
#         ),secondary_y=True)
        

    return fig

update_new_case_graph([],'country')

ValueError: Invalid property specified for object of type plotly.graph_objs.layout.YAxis: 'rotate'

    Valid properties:
        anchor
            If set to an opposite-letter axis id (e.g. `x2`, `y`),
            this axis is bound to the corresponding opposite-letter
            axis. If set to "free", this axis' position is
            determined by `position`.
        automargin
            Determines whether long tick labels automatically grow
            the figure margins.
        autorange
            Determines whether or not the range of this axis is
            computed in relation to the input data. See `rangemode`
            for more info. If `range` is provided, then `autorange`
            is set to False.
        calendar
            Sets the calendar system to use for `range` and `tick0`
            if this is a date axis. This does not set the calendar
            for interpreting data on this axis, that's specified in
            the trace or via the global `layout.calendar`
        categoryarray
            Sets the order in which categories on this axis appear.
            Only has an effect if `categoryorder` is set to
            "array". Used with `categoryorder`.
        categoryarraysrc
            Sets the source reference on plot.ly for  categoryarray
            .
        categoryorder
            Specifies the ordering logic for the case of
            categorical variables. By default, plotly uses "trace",
            which specifies the order that is present in the data
            supplied. Set `categoryorder` to *category ascending*
            or *category descending* if order should be determined
            by the alphanumerical order of the category names. Set
            `categoryorder` to "array" to derive the ordering from
            the attribute `categoryarray`. If a category is not
            found in the `categoryarray` array, the sorting
            behavior for that attribute will be identical to the
            "trace" mode. The unspecified categories will follow
            the categories in `categoryarray`. Set `categoryorder`
            to *total ascending* or *total descending* if order
            should be determined by the numerical order of the
            values. Similarly, the order can be determined by the
            min, max, sum, mean or median of all the values.
        color
            Sets default for all colors associated with this axis
            all at once: line, font, tick, and grid colors. Grid
            color is lightened by blending this with the plot
            background Individual pieces can override this.
        constrain
            If this axis needs to be compressed (either due to its
            own `scaleanchor` and `scaleratio` or those of the
            other axis), determines how that happens: by increasing
            the "range" (default), or by decreasing the "domain".
        constraintoward
            If this axis needs to be compressed (either due to its
            own `scaleanchor` and `scaleratio` or those of the
            other axis), determines which direction we push the
            originally specified plot area. Options are "left",
            "center" (default), and "right" for x axes, and "top",
            "middle" (default), and "bottom" for y axes.
        dividercolor
            Sets the color of the dividers Only has an effect on
            "multicategory" axes.
        dividerwidth
            Sets the width (in px) of the dividers Only has an
            effect on "multicategory" axes.
        domain
            Sets the domain of this axis (in plot fraction).
        dtick
            Sets the step in-between ticks on this axis. Use with
            `tick0`. Must be a positive number, or special strings
            available to "log" and "date" axes. If the axis `type`
            is "log", then ticks are set every 10^(n*dtick) where n
            is the tick number. For example, to set a tick mark at
            1, 10, 100, 1000, ... set dtick to 1. To set tick marks
            at 1, 100, 10000, ... set dtick to 2. To set tick marks
            at 1, 5, 25, 125, 625, 3125, ... set dtick to
            log_10(5), or 0.69897000433. "log" has several special
            values; "L<f>", where `f` is a positive number, gives
            ticks linearly spaced in value (but not position). For
            example `tick0` = 0.1, `dtick` = "L0.5" will put ticks
            at 0.1, 0.6, 1.1, 1.6 etc. To show powers of 10 plus
            small digits between, use "D1" (all digits) or "D2"
            (only 2 and 5). `tick0` is ignored for "D1" and "D2".
            If the axis `type` is "date", then you must convert the
            time to milliseconds. For example, to set the interval
            between ticks to one day, set `dtick` to 86400000.0.
            "date" also has special values "M<n>" gives ticks
            spaced by a number of months. `n` must be a positive
            integer. To set ticks on the 15th of every third month,
            set `tick0` to "2000-01-15" and `dtick` to "M3". To set
            ticks every 4 years, set `dtick` to "M48"
        exponentformat
            Determines a formatting rule for the tick exponents.
            For example, consider the number 1,000,000,000. If
            "none", it appears as 1,000,000,000. If "e", 1e+9. If
            "E", 1E+9. If "power", 1x10^9 (with 9 in a super
            script). If "SI", 1G. If "B", 1B.
        fixedrange
            Determines whether or not this axis is zoom-able. If
            true, then zoom is disabled.
        gridcolor
            Sets the color of the grid lines.
        gridwidth
            Sets the width (in px) of the grid lines.
        hoverformat
            Sets the hover text formatting rule using d3 formatting
            mini-languages which are very similar to those in
            Python. For numbers, see:
            https://github.com/d3/d3-3.x-api-
            reference/blob/master/Formatting.md#d3_format And for
            dates see: https://github.com/d3/d3-3.x-api-
            reference/blob/master/Time-Formatting.md#format We add
            one item to d3's date formatter: "%{n}f" for fractional
            seconds with n digits. For example, *2016-10-13
            09:15:23.456* with tickformat "%H~%M~%S.%2f" would
            display "09~15~23.46"
        layer
            Sets the layer on which this axis is displayed. If
            *above traces*, this axis is displayed above all the
            subplot's traces If *below traces*, this axis is
            displayed below all the subplot's traces, but above the
            grid lines. Useful when used together with scatter-like
            traces with `cliponaxis` set to False to show markers
            and/or text nodes above this axis.
        linecolor
            Sets the axis line color.
        linewidth
            Sets the width (in px) of the axis line.
        matches
            If set to another axis id (e.g. `x2`, `y`), the range
            of this axis will match the range of the corresponding
            axis in data-coordinates space. Moreover, matching axes
            share auto-range values, category lists and histogram
            auto-bins. Note that setting axes simultaneously in
            both a `scaleanchor` and a `matches` constraint is
            currently forbidden. Moreover, note that matching axes
            must have the same `type`.
        mirror
            Determines if the axis lines or/and ticks are mirrored
            to the opposite side of the plotting area. If True, the
            axis lines are mirrored. If "ticks", the axis lines and
            ticks are mirrored. If False, mirroring is disable. If
            "all", axis lines are mirrored on all shared-axes
            subplots. If "allticks", axis lines and ticks are
            mirrored on all shared-axes subplots.
        nticks
            Specifies the maximum number of ticks for the
            particular axis. The actual number of ticks will be
            chosen automatically to be less than or equal to
            `nticks`. Has an effect only if `tickmode` is set to
            "auto".
        overlaying
            If set a same-letter axis id, this axis is overlaid on
            top of the corresponding same-letter axis, with traces
            and axes visible for both axes. If False, this axis
            does not overlay any same-letter axes. In this case,
            for axes with overlapping domains only the highest-
            numbered axis will be visible.
        position
            Sets the position of this axis in the plotting space
            (in normalized coordinates). Only has an effect if
            `anchor` is set to "free".
        range
            Sets the range of this axis. If the axis `type` is
            "log", then you must take the log of your desired range
            (e.g. to set the range from 1 to 100, set the range
            from 0 to 2). If the axis `type` is "date", it should
            be date strings, like date data, though Date objects
            and unix milliseconds will be accepted and converted to
            strings. If the axis `type` is "category", it should be
            numbers, using the scale where each category is
            assigned a serial number from zero in the order it
            appears.
        rangemode
            If "normal", the range is computed in relation to the
            extrema of the input data. If *tozero*`, the range
            extends to 0, regardless of the input data If
            "nonnegative", the range is non-negative, regardless of
            the input data. Applies only to linear axes.
        scaleanchor
            If set to another axis id (e.g. `x2`, `y`), the range
            of this axis changes together with the range of the
            corresponding axis such that the scale of pixels per
            unit is in a constant ratio. Both axes are still
            zoomable, but when you zoom one, the other will zoom
            the same amount, keeping a fixed midpoint. `constrain`
            and `constraintoward` determine how we enforce the
            constraint. You can chain these, ie `yaxis:
            {scaleanchor: *x*}, xaxis2: {scaleanchor: *y*}` but you
            can only link axes of the same `type`. The linked axis
            can have the opposite letter (to constrain the aspect
            ratio) or the same letter (to match scales across
            subplots). Loops (`yaxis: {scaleanchor: *x*}, xaxis:
            {scaleanchor: *y*}` or longer) are redundant and the
            last constraint encountered will be ignored to avoid
            possible inconsistent constraints via `scaleratio`.
            Note that setting axes simultaneously in both a
            `scaleanchor` and a `matches` constraint is currently
            forbidden.
        scaleratio
            If this axis is linked to another by `scaleanchor`,
            this determines the pixel to unit scale ratio. For
            example, if this value is 10, then every unit on this
            axis spans 10 times the number of pixels as a unit on
            the linked axis. Use this for example to create an
            elevation profile where the vertical scale is
            exaggerated a fixed amount with respect to the
            horizontal.
        separatethousands
            If "true", even 4-digit integers are separated
        showdividers
            Determines whether or not a dividers are drawn between
            the category levels of this axis. Only has an effect on
            "multicategory" axes.
        showexponent
            If "all", all exponents are shown besides their
            significands. If "first", only the exponent of the
            first tick is shown. If "last", only the exponent of
            the last tick is shown. If "none", no exponents appear.
        showgrid
            Determines whether or not grid lines are drawn. If
            True, the grid lines are drawn at every tick mark.
        showline
            Determines whether or not a line bounding this axis is
            drawn.
        showspikes
            Determines whether or not spikes (aka droplines) are
            drawn for this axis. Note: This only takes affect when
            hovermode = closest
        showticklabels
            Determines whether or not the tick labels are drawn.
        showtickprefix
            If "all", all tick labels are displayed with a prefix.
            If "first", only the first tick is displayed with a
            prefix. If "last", only the last tick is displayed with
            a suffix. If "none", tick prefixes are hidden.
        showticksuffix
            Same as `showtickprefix` but for tick suffixes.
        side
            Determines whether a x (y) axis is positioned at the
            "bottom" ("left") or "top" ("right") of the plotting
            area.
        spikecolor
            Sets the spike color. If undefined, will use the series
            color
        spikedash
            Sets the dash style of lines. Set to a dash type string
            ("solid", "dot", "dash", "longdash", "dashdot", or
            "longdashdot") or a dash length list in px (eg
            "5px,10px,2px,2px").
        spikemode
            Determines the drawing mode for the spike line If
            "toaxis", the line is drawn from the data point to the
            axis the  series is plotted on. If "across", the line
            is drawn across the entire plot area, and supercedes
            "toaxis". If "marker", then a marker dot is drawn on
            the axis the series is plotted on
        spikesnap
            Determines whether spikelines are stuck to the cursor
            or to the closest datapoints.
        spikethickness
            Sets the width (in px) of the zero line.
        tick0
            Sets the placement of the first tick on this axis. Use
            with `dtick`. If the axis `type` is "log", then you
            must take the log of your starting tick (e.g. to set
            the starting tick to 100, set the `tick0` to 2) except
            when `dtick`=*L<f>* (see `dtick` for more info). If the
            axis `type` is "date", it should be a date string, like
            date data. If the axis `type` is "category", it should
            be a number, using the scale where each category is
            assigned a serial number from zero in the order it
            appears.
        tickangle
            Sets the angle of the tick labels with respect to the
            horizontal. For example, a `tickangle` of -90 draws the
            tick labels vertically.
        tickcolor
            Sets the tick color.
        tickfont
            Sets the tick font.
        tickformat
            Sets the tick label formatting rule using d3 formatting
            mini-languages which are very similar to those in
            Python. For numbers, see:
            https://github.com/d3/d3-3.x-api-
            reference/blob/master/Formatting.md#d3_format And for
            dates see: https://github.com/d3/d3-3.x-api-
            reference/blob/master/Time-Formatting.md#format We add
            one item to d3's date formatter: "%{n}f" for fractional
            seconds with n digits. For example, *2016-10-13
            09:15:23.456* with tickformat "%H~%M~%S.%2f" would
            display "09~15~23.46"
        tickformatstops
            A tuple of
            plotly.graph_objects.layout.yaxis.Tickformatstop
            instances or dicts with compatible properties
        tickformatstopdefaults
            When used in a template (as
            layout.template.layout.yaxis.tickformatstopdefaults),
            sets the default property values to use for elements of
            layout.yaxis.tickformatstops
        ticklen
            Sets the tick length (in px).
        tickmode
            Sets the tick mode for this axis. If "auto", the number
            of ticks is set via `nticks`. If "linear", the
            placement of the ticks is determined by a starting
            position `tick0` and a tick step `dtick` ("linear" is
            the default value if `tick0` and `dtick` are provided).
            If "array", the placement of the ticks is set via
            `tickvals` and the tick text is `ticktext`. ("array" is
            the default value if `tickvals` is provided).
        tickprefix
            Sets a tick label prefix.
        ticks
            Determines whether ticks are drawn or not. If "", this
            axis' ticks are not drawn. If "outside" ("inside"),
            this axis' are drawn outside (inside) the axis lines.
        tickson
            Determines where ticks and grid lines are drawn with
            respect to their corresponding tick labels. Only has an
            effect for axes of `type` "category" or
            "multicategory". When set to "boundaries", ticks and
            grid lines are drawn half a category to the left/bottom
            of labels.
        ticksuffix
            Sets a tick label suffix.
        ticktext
            Sets the text displayed at the ticks position via
            `tickvals`. Only has an effect if `tickmode` is set to
            "array". Used with `tickvals`.
        ticktextsrc
            Sets the source reference on plot.ly for  ticktext .
        tickvals
            Sets the values at which ticks on this axis appear.
            Only has an effect if `tickmode` is set to "array".
            Used with `ticktext`.
        tickvalssrc
            Sets the source reference on plot.ly for  tickvals .
        tickwidth
            Sets the tick width (in px).
        title
            plotly.graph_objects.layout.yaxis.Title instance or
            dict with compatible properties
        titlefont
            Deprecated: Please use layout.yaxis.title.font instead.
            Sets this axis' title font. Note that the title's font
            used to be customized by the now deprecated `titlefont`
            attribute.
        type
            Sets the axis type. By default, plotly attempts to
            determined the axis type by looking into the data of
            the traces that referenced the axis in question.
        uirevision
            Controls persistence of user-driven changes in axis
            `range`, `autorange`, and `title` if in `editable:
            true` configuration. Defaults to `layout.uirevision`.
        visible
            A single toggle to hide the axis while preserving
            interaction like dragging. Default is true when a
            cheater plot is present on the axis, otherwise false
        zeroline
            Determines whether or not a line is drawn at along the
            0 value of this axis. If True, the zero line is drawn
            on top of the grid lines.
        zerolinecolor
            Sets the line color of the zero line.
        zerolinewidth
            Sets the width (in px) of the zero line.
        

In [85]:
jhu_df_time[jhu_df_time['country']=='China'].groupby('Date').sum()

Unnamed: 0_level_0,id,confirmed,deaths,recovered,lat,lon
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-22 00:00:00+00:00,2145,548,17,0.0,1083.3367,3684.4197
2020-01-23 00:00:00+00:00,2145,643,18,0.0,1083.3367,3684.4197
2020-01-24 00:00:00+00:00,2145,920,26,0.0,1083.3367,3684.4197
2020-01-25 00:00:00+00:00,2145,1406,42,0.0,1083.3367,3684.4197
2020-01-26 00:00:00+00:00,2145,2075,56,0.0,1083.3367,3684.4197
...,...,...,...,...,...,...
2020-03-20 00:00:00+00:00,2145,81250,3253,0.0,1083.3367,3684.4197
2020-03-21 00:00:00+00:00,2145,81305,3259,0.0,1083.3367,3684.4197
2020-03-22 00:00:00+00:00,2145,81435,3274,0.0,1083.3367,3684.4197
2020-03-23 00:00:00+00:00,2145,81498,3274,0.0,1083.3367,3684.4197


In [359]:
# Import from S3:
jhu_df = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/jhu_df.csv.gz', index_col=0)
jhu_df_time = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/jhu_df_time.csv.gz', index_col=0)
csbs_df = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/csbs_df.csv.gz', index_col=0)
per_day_stats_by_country = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/country_df_per_day.csv.gz', index_col=0)
per_day_stats_by_state = pd.read_csv(
    'https://jordansdatabucket.s3-us-west-2.amazonaws.com/covid19data/provence_df_per_day.csv.gz', index_col=0)

jhu_df_time['Date'] = pd.to_datetime(jhu_df_time['Date'])
jhu_df['Date'] = pd.to_datetime(jhu_df['Date'])
csbs_df['Date'] = pd.to_datetime(csbs_df['Date'])
jhu_df['state'] = ""


date_mapper = pd.DataFrame(
    jhu_df_time['Date'].unique(), columns=['Date'])
date_mapper['Date_text'] = date_mapper['Date'].dt.strftime('%m/%d/%y')
min_date = date_mapper.index[0]
max_date = date_mapper.index[-1]


merge = pd.concat([jhu_df, csbs_df])
merge = merge.fillna('')
problem_countries = merge[merge['country_code'] == '']['country'].tolist()
merge.loc[merge['country'] == 'Namibia', 'country_code'] = ''
centroid_mapper = pd.read_csv('country_centroids_az8.csv')
problem_states = merge[~merge['country_code'].isin(centroid_mapper['iso_a2'])]
new_merge = merge.merge(
    centroid_mapper, left_on='country_code', right_on='iso_a2')
new_merge['case_rate'] = new_merge['confirmed']/new_merge['pop_est'] * 100
new_merge['death_rate'] = new_merge['deaths']/new_merge['pop_est'] * 100
new_merge['confirmed_no_death'] = new_merge['confirmed'] - new_merge['deaths']

# If something is has the same name for continent and subregion, lets just add the word _subregion
new_merge.loc[new_merge['continent'] == new_merge['subregion'],
              'subregion'] = new_merge['subregion'] + ' Subregion'



Unnamed: 0,id,lat,lon,Date,province,country_code,country,confirmed,deaths,recovered,...,name_len,long_len,abbrev_len,tiny,homepart,Longitude,Latitude,case_rate,death_rate,confirmed_no_death
0,0,33.000000,65.000000,2020-03-24 06:01:08.402656+00:00,,AF,Afghanistan,40,1,0,...,11,11,4,-99,1,66.004734,33.835231,0.000141,0.000004,39
1,1,41.153300,20.168300,2020-03-24 06:01:08.412527+00:00,,AL,Albania,104,4,0,...,7,7,4,-99,1,20.049834,41.142450,0.002858,0.000110,100
2,2,28.033900,1.659600,2020-03-24 06:01:08.417963+00:00,,DZ,Algeria,230,17,0,...,7,7,4,-99,1,2.617323,28.158938,0.000673,0.000050,213
3,3,42.506300,1.521800,2020-03-24 06:01:08.423338+00:00,,AD,Andorra,133,1,0,...,7,7,4,5,1,1.560544,42.542291,0.158545,0.001192,132
4,4,-11.202700,17.873900,2020-03-24 06:01:08.428696+00:00,,AO,Angola,3,0,0,...,6,6,4,-99,1,17.537368,-12.293361,0.000023,0.000000,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1504,233,12.116500,-61.679000,2020-03-24 06:01:09.466806+00:00,,GD,Grenada,1,0,0,...,7,7,5,-99,1,-61.682202,12.117250,0.001102,0.000000,1
1505,234,-18.665695,35.529562,2020-03-24 06:01:09.471494+00:00,,MZ,Mozambique,1,0,0,...,10,10,4,-99,1,35.533675,-17.273816,0.000005,0.000000,1
1506,235,34.802075,38.996815,2020-03-24 06:01:09.475908+00:00,,SY,Syria,1,0,0,...,5,5,5,-99,1,38.507882,35.025474,0.000005,0.000000,1
1507,236,-8.874217,125.727539,2020-03-24 06:01:09.480471+00:00,,TL,Timor-Leste,1,0,0,...,11,11,4,-99,1,125.844390,-8.828892,0.000088,0.000000,1


In [351]:
 COVID19Py.COVID19(data_source="jhu").getAll(timelines)

{'latest': {'confirmed': 418678, 'deaths': 18625, 'recovered': 0},
 'locations': []}