In [1]:
import pandas as pd
from IPython.display import Markdown, display
from ipywidgets import interact
from ipywidgets.widgets import (
    Dropdown, SelectionSlider, Checkbox
)
from datetime import datetime
import cufflinks as cf
import numpy as np

## Getting the data

In [79]:
# Path to the files
path_dict = dict(
    confirmed = ("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/"
                 "csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"),
    death = ("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/"
             "csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")
)

In [80]:
def set_index(df):
    """Set the index for the data frame using the date

    Args:
        df: Pandas data frame obtained from John Hopkins repo
    """
    # Set region, country, lat and long as index
    index = pd.MultiIndex.from_frame(df.iloc[:, :4])
    # Set the index and transpose
    df = df.iloc[:, 4:].set_index(index).T
    # Set date as index
    return df.set_index(pd.to_datetime(df.index, dayfirst=False))

df_dict = {
    type_: pd.read_csv(path).pipe(set_index)
    for type_, path in path_dict.items()
}

In [83]:
def get_same_origin(df):
    n_days = df.shape[0]

    def _pad_days(s):
        s = s.astype(float)
        s_pad = s[s.cumsum() != 0]
        return np.pad(s_pad, (0, n_days-s_pad.shape[0]), 
                      'constant', constant_values=np.nan)

    df = (
        df
        .apply(_pad_days, raw=True)
        .reset_index(drop=True)
    ).dropna(how='all')
    
    df.index += 1
    return df

def get_data(df_input,
             data_type,
             output_path=None,
             label_total='<< Total >>',
             plot_type='line',
             move_origin=False):

    # Make a copy of the data frame
    df = df_input.copy()
    
    if not move_origin:
        # Add a new column with the total number per day
        df[(None, label_total, None, None)] = (
            df.groupby(level=1, axis=1)
            .sum().sum(axis=1)
        )
        
    # Groupy territories per country
    df = df.groupby(level=1, axis=1).sum()
        
    if move_origin:
        # Shift origin
        df = get_same_origin(df)
        # Replace 0's with NaN's
        df.replace(0, np.nan, inplace=True)

    max_str_len = len(label_total)
    df.columns = [c if len(c) <= max_str_len
                  else f"{c[:max_str_len]}..."
                  for c in df.columns]
    
    df_diff =  df - df.shift()

    # n = n_rows + 1 (for the total)
    n = df.shape[1]

    labels = [data_type[0].upper() + data_type[1:] + " (total)",
              data_type[0].upper() + data_type[1:] + " (per day)"]
    titles = [f"Number of {data_type} (total)",
              f"Number of {data_type} (per day)"]

    visibles = [[True] + (n-1) * ['legendonly'] + n * [False],
                 n * [False] + [True] + (n-1) * ['legendonly']]

    buttons = [dict(args=[{"visible": visible},
                          {"yaxis_title": title}],
                    label=label, method="update")
               for title, label, visible in zip(titles, labels, visibles)]
    
    # Plot the data for cumulative number of cases
    fig_0 = df.iplot(kind='line',  yTitle=titles[0], 
                     theme='ggplot', asFigure=True, online=True,
                     margin=dict(l=0, r=0, b=0, t=100, pad=0),
                     mode='lines+markers')

    # Plot the data for daily number of cases
    fig_1 = df_diff.iplot(kind='bar', asFigure=True)

    # Add the second plot to the first (same fig)
    fig_0.add_traces(tuple(fig_1.select_traces()))

    # Keep only the first selected in the legend
    [tr.update({'visible': v}) for tr, v in zip(fig_0.select_traces(), visibles[0])]

    
    fig_0.update_layout(
        # Define the layout of the menu
        updatemenus=[
            dict(direction="down", active=0, #type="buttons",
                 x=0, y=1.2, xanchor='left', buttons=buttons)
        ],
        # Define the layout of the legend
        legend=dict(
            x=0.5, y=-0.9,
            yanchor='middle', xanchor='center',
            font=dict(
                family="sans-serif",
                size=10, color="black"
            ),
            bordercolor="Black", borderwidth=1,
            orientation="h"
        )
    )

    if output_path is not None:
        # Write the file to HTML
        fig_0.write_html(output_path)

    return fig_0

In [84]:
%%time
plot_info = (('confirmed', 'confirmed cases'),
             ('death', 'deaths'))

for k, data_type in plot_info:
    path = f"plot-{k}.html"
    fig = get_data(df_dict[k], data_type, path)

CPU times: user 5.66 s, sys: 3.83 ms, total: 5.66 s
Wall time: 5.67 s


In [85]:
%%time

for k, data_type in plot_info:
    path = f"plot-{k}-origin.html"
    fig = get_data(df_dict[k], data_type, 
                   path, move_origin=True)

CPU times: user 5.68 s, sys: 48.3 ms, total: 5.72 s
Wall time: 5.7 s
