# Playing with CSSE numbers
## Get data here: [github](https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series)

In [8]:
import pandas as pd
import numpy as np
from collections import OrderedDict
from dateutil.parser import parse
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from scipy.signal import correlate
%matplotlib inline

## Do you want to re-download the data?
This is update in the evenings, no need to re-download everytime you run it.
Options are: 'no', or anything else

In [2]:
re_download = 'no'

## Make functions to import the data and massage it into a good format

In [3]:
def ImportData(re_download):
    if re_download.lower() != 'no':
        print('downloading')
        us_cases = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv')
        us_cases.to_csv('./data/time_series_covid19_confirmed_US.csv', encoding='utf-8', index=False)
        global_cases = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
        global_cases.to_csv('./data/time_series_covid19_confirmed_global.csv', encoding='utf-8', index=False)
        us_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv')
        us_deaths.to_csv('./data/time_series_covid19_deaths_US.csv', encoding='utf-8', index=False)
        global_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
        global_deaths.to_csv('./data/time_series_covid19_deaths_global.csv', encoding='utf-8', index=False)
        global_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
        global_recovered.to_csv('./data/time_series_covid19_recovered_global.csv', encoding='utf-8', index=False)
    else:
        print('not re-downloaded')
        us_cases = pd.read_csv('./data/time_series_covid19_confirmed_US.csv')
        global_cases = pd.read_csv('./data/time_series_covid19_confirmed_global.csv')
        us_deaths = pd.read_csv('./data/time_series_covid19_deaths_US.csv')
        global_deaths = pd.read_csv('./data/time_series_covid19_deaths_global.csv')
        global_recovered = pd.read_csv('./data/time_series_covid19_recovered_global.csv')
    return us_cases, global_cases, us_deaths, global_deaths, global_recovered

def MakeNewDF(re_download='no'):
    """
    Join the two DataFrames and add a few columns
    """
    us_cases, global_cases, us_deaths, global_deaths, global_recovered = ImportData(re_download)
    # For some reason cases is missing population
    us_cases['Population'] = us_deaths['Population']

    # Add a column for category
    us_cases['category'] = 'cases'
    us_deaths['category'] = 'deaths'

    # Concat the DF's
    big_df = us_cases.append(us_deaths, sort=False)
    length_holder = len(big_df)

    # Get State List
    s_list = list(us_deaths.Province_State.unique())
    # Get County List
    cnty_list = list(us_deaths['Admin2'].unique())
    del cnty_list[0]

    # Make temp DF for state values
    state_df = pd.DataFrame(columns=list(big_df.keys()))
    for i, j in enumerate(s_list):
        keep_dat = ['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
                    'Country_Region', 'Lat', 'Long_', 'Combined_Key', 'category']
        big_df.loc[length_holder + i] = us_cases.loc[us_cases['Province_State'] == j].sum()
        big_df.loc[length_holder + i, keep_dat] = ['N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'all', j, 'US', 'N/A', 'N/A', 'N/A', 'cases']
        big_df.loc[length_holder + len(s_list) + i] = us_deaths.loc[us_deaths['Province_State'] == j].sum()
        big_df.loc[length_holder + len(s_list) + i, keep_dat] = ['N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'all', j, 'US', 'N/A', 'N/A', 'N/A', 'deaths']

    date_list = [x for x in list(big_df.keys()) if x.startswith(('1', '2', '3', '4', '5', '6', '7'))]
    date_pd = pd.to_datetime(date_list)
    st_dict = OrderedDict(zip(s_list, s_list))
    cnty_dict = OrderedDict(zip(cnty_list, cnty_list))
    return big_df, date_pd, date_list, st_dict, cnty_dict

## Functions for working with the arrays

In [43]:
def DailyFromCumSum(arr):
    daily_h = arr[1:] - arr[:-1]
    daily = np.insert(daily_h, 0, 0, axis=0)
    return daily

def RollIt(np_arr, roll):
    s = pd.Series(np_arr)
    rolled = s.rolling(roll).mean()
    return rolled

def GetSeries(t_string, state, cnty, chop=False):
    if 'State' in t_string:
        cnty_var = 'all'
    else:
        cnty_var = cnty
    if 'Cases' in t_string:
        cat_var = 'cases'
    else:
        cat_var = 'deaths'
    if 'Daily' in t_string:
        return DailyFromCumSum(np.array(bdf[(bdf.Province_State == state)
                                            & (bdf['Admin2'] == cnty_var)
                                            & (bdf['category'] == cat_var)].loc[:, dl[:]])[0])
    else:
        return np.array(bdf[(bdf.Province_State == state)
                            & (bdf['Admin2'] == cnty_var)
                            & (bdf['category'] == cat_var)].loc[:, dl[:]])[0]

def ChopSer(ar1, ar2):
    chop = np.argmax(ar1>5)
    return ar1[chop:], ar2[chop:]

def PhaseShift(ar1, ar2):
    ar1_chopped, ar2_chopped = ChopSer(ar1.copy(), ar2.copy())
    chopped_length = len(ar1_chopped)
    xcorr = correlate(ar1[-chopped_length:], ar2[-chopped_length:])
    dt = np.linspace(-chopped_length, chopped_length, 2 * chopped_length - 1)
#     fig2, ax = plt.subplots(figsize=(16, 10))
#     fig.patch.set_facecolor(fig_facecolor)
#     ax.set_facecolor(bg_color)
#     ax.plot(dt, xcorr, color=color)
#     plt.show()
    recovered_time_shift = dt[xcorr.argmax()]
    return recovered_time_shift

## Run the functions for getting our data

In [44]:
bdf, dpd, dl, sl, cl = MakeNewDF(re_download)

not re-downloaded


## Set some parameters

In [45]:
fig_facecolor = 'xkcd:cloudy blue'
bg_color = 'xkcd:really light blue'
ts_color = 'tab:red'
roll_color = 'xkcd:blue'
roll_linewidth = 4.0
x_rotation = 60
tick_fontsize = 14
label_fontsize = 16
title_fontsize = 20
legend_fontsize = 20

## Run the interactive plotting widget
NOTE: the dual plotting doesn't work anymore, I'll have to fix it later.

In [47]:
def PlotIt(which_plot='Cumulative Cases', duo=None, state='Washington',
           cnty='King', roll=10, show_roll=False, poly_trend=False):
    # Set up the fig
    fig, ax1 = plt.subplots(figsize=(16, 10))
    fig.patch.set_facecolor(fig_facecolor)
    ax1.set_facecolor(bg_color)
    plt.xticks(fontsize=tick_fontsize, rotation=x_rotation)
    plt.yticks(fontsize=tick_fontsize)

    ax1.set_xlabel('Date', fontsize=label_fontsize)
    
    if which_plot.endswith('Cases'):
        ax1.set_ylabel('Cases', fontsize=label_fontsize)
    else:
        ax1.set_ylabel('Deaths', fontsize=label_fontsize)

    if which_plot.startswith('State'):
        fig.suptitle(state + ' ' + which_plot, fontsize=title_fontsize)
    else:
        fig.suptitle(cnty + ' ' + which_plot, fontsize=title_fontsize)
    series_one = GetSeries(which_plot, state, cnty)
    ax1.plot(dpd, series_one, color=ts_color, label=which_plot + ' Count')
    if show_roll:    
        ma = RollIt(series_one, roll)
        ax1.plot(dpd,
                 ma, color=roll_color,
                 linewidth=roll_linewidth,
                 label=str(roll) + ' Day Rolling Avg')
    ax1.legend(loc=2, fontsize=legend_fontsize, framealpha=.6)
    ax1.tick_params(axis='y', labelcolor=ts_color)

    if duo != None:
        ax2 = ax1.twinx()
        color = 'tab:blue'
        ax2.tick_params(axis='y', labelcolor=color)
        fig.suptitle(state + ' ' + which_plot + ' vs. ' + state + ' ' + duo, fontsize=label_fontsize)
        series_two = GetSeries(duo, state, cnty)
        ax2.plot(dpd, series_two, color=color, label=duo + ' Count')

        # I need to be able to chop the series to the first case in order to do this properly
        print(PhaseShift(series_one, series_two))

        # TODO: Fix the cases vs. deaths problem here
        if duo.endswith('Cases'):
            ax2.set_ylabel('Cases', color=color, fontsize=label_fontsize)
        else:
            ax2.set_ylabel('Deaths', color=color, fontsize=label_fontsize)
        # added these three lines
#         lns = lns1+lns2+lns3
#         labs = [l.get_label() for l in lns]
#         ax1.legend(lns, labs, loc=0)
        ax1.legend(loc=2, fontsize=legend_fontsize, framealpha=.6)

    if poly_trend:
        ar1_chopped, ar2_chopped = ChopSer(series_one.copy(), series_two.copy())
        xnew = np.array(range(len(ar1_chopped)))
        polynomial_coeff = np.polyfit(xnew, ar1_chopped, 2)
        ynew = np.poly1d(polynomial_coeff)
        ax1.plot(dpd[-len(ar1_chopped):], ynew(xnew), c='k')

    # ax2.set_ylim(my_weight.min() - 5, my_weight.max() + 20)
    fig.tight_layout()
    plt.show()

plt_one = {'State Cumulative Cases': 'State Cumulative Cases',
           'State Cumulative Deaths': 'State Cumulative Deaths',
           'State Daily Cases': 'State Daily Cases',
           'State Daily Deaths': 'State Daily Deaths',
           'County Cumalative Cases': 'County Cumulative Cases',
           'County Cumulative Deaths': 'County Cumulative Deaths',
           'County Daily Cases': 'County Daily Cases',
           'County Daily Deaths': 'County Daily Deaths'}
plt_two = plt_one.copy()
plt_two['None'] = None
interactive_plot = interactive(PlotIt,
                               which_plot=plt_one,
                               duo=plt_two,
                               state=sl,
                               cnty=cl,
                               roll=widgets.IntSlider(min=4, max=20, step=1,
                                                      description='Rolling Average:',
                                                      continuous_update=False))
output = interactive_plot.children[-1]
interactive_plot

interactive(children=(Dropdown(description='which_plot', options={'State Cumulative Cases': 'State Cumulative …

In [21]:
new_dl = dl.copy()
new_dl

['1/22/20',
 '1/23/20',
 '1/24/20',
 '1/25/20',
 '1/26/20',
 '1/27/20',
 '1/28/20',
 '1/29/20',
 '1/30/20',
 '1/31/20',
 '2/1/20',
 '2/2/20',
 '2/3/20',
 '2/4/20',
 '2/5/20',
 '2/6/20',
 '2/7/20',
 '2/8/20',
 '2/9/20',
 '2/10/20',
 '2/11/20',
 '2/12/20',
 '2/13/20',
 '2/14/20',
 '2/15/20',
 '2/16/20',
 '2/17/20',
 '2/18/20',
 '2/19/20',
 '2/20/20',
 '2/21/20',
 '2/22/20',
 '2/23/20',
 '2/24/20',
 '2/25/20',
 '2/26/20',
 '2/27/20',
 '2/28/20',
 '2/29/20',
 '3/1/20',
 '3/2/20',
 '3/3/20',
 '3/4/20',
 '3/5/20',
 '3/6/20',
 '3/7/20',
 '3/8/20',
 '3/9/20',
 '3/10/20',
 '3/11/20',
 '3/12/20',
 '3/13/20',
 '3/14/20',
 '3/15/20',
 '3/16/20',
 '3/17/20',
 '3/18/20',
 '3/19/20',
 '3/20/20',
 '3/21/20',
 '3/22/20',
 '3/23/20',
 '3/24/20',
 '3/25/20',
 '3/26/20',
 '3/27/20',
 '3/28/20',
 '3/29/20',
 '3/30/20',
 '3/31/20',
 '4/1/20',
 '4/2/20',
 '4/3/20',
 '4/4/20',
 '4/5/20',
 '4/6/20',
 '4/7/20',
 '4/8/20',
 '4/9/20',
 '4/10/20',
 '4/11/20',
 '4/12/20',
 '4/13/20',
 '4/14/20',
 '4/15/20',
 '4/16/

In [None]:
fig, ax = plt.subplots(figsize=(6, 3))
ax.set_title("What won't Meatloaf do for love?")
ax.pie([70, 20, 10], labels=['That', 'Also that, but in red', 'THAT!'], colors=['Blue', 'Red', 'Purple'])
plt.show()