<a href="https://colab.research.google.com/github/srikanthrc/running-to-stand-still/blob/master/_notebooks/2020-03-21-covid19_overview.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# COVID-19 Tracking / Worldwide
> Tracking coronavirus total cases, deaths and new cases by country.  Separately, a view of US by state.

- author: Srikanth Chinmay (inspired by Pratap Vardhan @ http://covid19dashboards.com)
- categories: [covid-19]
- image: images/covid-overview.png
- permalink: /covid-overview/
- toc: false
- comments: false


In [1]:
#hide
print('''
Example of using jupyter notebook, pandas (data transformations), jinja2 (html, visual)
to create visual dashboards with fastpages
You see also the live version on https://gramener.com/enumter/covid19/
''')


Example of using jupyter notebook, pandas (data transformations), jinja2 (html, visual)
to create visual dashboards with fastpages
You see also the live version on https://gramener.com/enumter/covid19/



In [0]:
#hide
import numpy as np
import pandas as pd
from datetime import datetime
from jinja2 import Template
from IPython.display import HTML

In [0]:
#hide

# FETCH
import getpass
base_url = 'https://raw.githubusercontent.com/srikanthrc/covid-19/master/'
base_url = '' if (getpass.getuser() == 'Pratap Vardhan') else base_url
paths = {
    'mapping': base_url + 'mapping_countries.csv',
    'overview': base_url + 'overview.tpl'
}

def get_mappings(url):
    df = pd.read_csv(url)
    return {
        'df': df,
        'replace.country': dict(df.dropna(subset=['Name']).set_index('Country')['Name']),
        'map.continent': dict(df.set_index('Name')['Continent'])
    }

mapping = get_mappings(paths['mapping'])

def get_template(path):
    from urllib.parse import urlparse
    if bool(urlparse(path).netloc):
        from urllib.request import urlopen
        return urlopen(path).read().decode('utf8')
    return open(path).read()

def get_frame(name):
    url = (
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/'
        f'csse_covid_19_time_series/time_series_covid19_{name}_global.csv')
        # f'csse_covid_19_time_series/time_series_19-covid-{name}.csv')
    df = pd.read_csv(url)
    # rename countries
    df['Country/Region'] = df['Country/Region'].replace(mapping['replace.country'])
    return df

def get_dates(df):
    dt_cols = df.columns[~df.columns.isin(['Province/State', 'Country/Region', 'Lat', 'Long'])]
    LAST_DATE_I = -1
    # sometimes last column may be empty, then go backwards
    for i in range(-1, -len(dt_cols), -1):
        if not df[dt_cols[i]].fillna(0).eq(0).all():
            LAST_DATE_I = i
            break
    return LAST_DATE_I, dt_cols

In [0]:
#hide
COL_REGION = 'Country/Region'

df = get_frame('confirmed')

LAST_DATE_I, dt_cols = get_dates(df)
dt_today = dt_cols[LAST_DATE_I]
dt_ago = dt_cols[LAST_DATE_I-1]

# dft_: timeseries, dfc_: today country agg
# Confirmed, Deaths
dft_cases = df
dfc_cases = dft_cases.groupby(COL_REGION)[dt_today].sum()
dfp_cases = dft_cases.groupby(COL_REGION)[dt_ago].sum()

dft_deaths = get_frame('deaths')
dfc_deaths = dft_deaths.groupby(COL_REGION)[dt_today].sum()
dfp_deaths = dft_deaths.groupby(COL_REGION)[dt_ago].sum()

#dft_recovered = get_frame('recovered')
#dfc_recovered = dft_recovered.groupby(COL_REGION)[dt_today].sum()
#dfp_recovered = dft_recovered.groupby(COL_REGION)[dt_ago].sum()


In [5]:
#hide
df_table = (pd.DataFrame(dict(
    Cases=dfc_cases, Deaths=dfc_deaths, 
    PCases=dfp_cases, PDeaths=dfp_deaths))
             .sort_values(by=['Cases', 'Deaths'], ascending=[False, False])
             .reset_index())
for c in 'Cases, Deaths'.split(', '):
    df_table[f'{c} (+)'] = (df_table[c] - df_table[f'P{c}']).clip(0)  # DATA BUG
df_table['Fatality Rate'] = (100 * df_table['Deaths'] / df_table['Cases']).round(1)
df_table['Continent'] = df_table['Country/Region'].map(mapping['map.continent'])
df_table.head(15)

Unnamed: 0,Country/Region,Cases,Deaths,PCases,PDeaths,Cases (+),Deaths (+),Fatality Rate,Continent
0,US,682619,23529,555313,22020,127306,1509,3.4,North America
1,Spain,170099,17756,166831,17209,3268,547,10.4,Europe
2,Italy,159516,20465,156363,19899,3153,566,12.8,Europe
3,France,137875,14986,133670,14412,4205,574,10.9,Europe
4,Germany,130072,3194,127854,3022,2218,172,2.5,Europe
5,United Kingdom,89570,11347,85206,10629,4364,718,12.7,Europe
6,China,83213,3345,83134,3343,79,2,4.0,Asia
7,Iran,73303,4585,71686,4474,1617,111,6.3,Asia
8,Turkey,61049,1296,56956,1198,4093,98,2.1,Asia
9,Belgium,30589,3903,29647,3600,942,303,12.8,Europe


In [6]:
#hide
# world, china, europe, us
metrics = ['Cases', 'Deaths', 'Cases (+)', 'Deaths (+)']
s_china = df_table[df_table['Country/Region'].eq('China')][metrics].sum().add_prefix('China ')
s_us = df_table[df_table['Country/Region'].eq('US')][metrics].sum().add_prefix('US ')
s_eu = df_table[df_table['Continent'].eq('Europe')][metrics].sum().add_prefix('EU ')
summary = {'updated': pd.to_datetime(dt_today), 'since': pd.to_datetime(dt_ago)}
summary = {**summary, **df_table[metrics].sum(), **s_china, **s_us, **s_eu}
summary

{'Cases': 2019319,
 'Cases (+)': 172640,
 'China Cases': 83213,
 'China Cases (+)': 79,
 'China Deaths': 3345,
 'China Deaths (+)': 2,
 'Deaths': 119482,
 'Deaths (+)': 5391,
 'EU Cases': 908788,
 'EU Cases (+)': 26279,
 'EU Deaths': 80028,
 'EU Deaths (+)': 3221,
 'US Cases': 682619,
 'US Cases (+)': 127306,
 'US Deaths': 23529,
 'US Deaths (+)': 1509,
 'since': Timestamp('2020-04-12 00:00:00'),
 'updated': Timestamp('2020-04-13 00:00:00')}

In [0]:
#hide
dft_ct_cases = dft_cases.groupby(COL_REGION)[dt_cols].sum()
dft_ct_new_cases = dft_ct_cases.diff(axis=1).fillna(0).astype(int)
#dft_ct_new_cases.head()
#dft_ct_new_cases.loc[:, dt_cols[LAST_DATE_I-10]:dt_cols[LAST_DATE_I]]
#df_table.head(20)

In [8]:
#hide_input
template = Template(get_template(paths['overview']))
html = template.render(
    D=summary, table=df_table.head(20),  # REMOVE .head(20) to see all values
    newcases=dft_ct_new_cases.loc[:, dt_cols[LAST_DATE_I-50]:dt_cols[LAST_DATE_I]],
    np=np, pd=pd, enumerate=enumerate)
HTML(f'<div>{html}</div>')

Unnamed: 0_level_0,10  100  1000,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0
Country,New Cases,Total Cases,Total Cases,Deaths,Deaths,Fatality,Unnamed: 7_level_1,Unnamed: 8_level_1
,Feb. 23  Apr. 13,,"(+NEW) since Apr, 12","(+NEW) since Apr, 12","(+NEW) since Apr, 12","(+NEW) since Apr, 12",,
US,,682619.0,"(+127,306)",23529,"(+1,509)",3.4%,,
Spain,,170099.0,"(+3,268)",17756,(+547),10.4%,,
Italy,,159516.0,"(+3,153)",20465,(+566),12.8%,,
France,,137875.0,"(+4,205)",14986,(+574),10.9%,,
Germany,,130072.0,"(+2,218)",3194,(+172),2.5%,,
United Kingdom,,89570.0,"(+4,364)",11347,(+718),12.7%,,
China,,83213.0,(+79),3345,(+2),4.0%,,
Iran,,73303.0,"(+1,617)",4585,(+111),6.3%,,
Turkey,,61049.0,"(+4,093)",1296,(+98),2.1%,,


In [9]:
#hide
from pathlib import Path
if not Path('covid_overview.py').exists():
    ! wget https://raw.githubusercontent.com/srikanthrc/covid-19/master/covid_overview.py

--2020-04-14 00:29:56--  https://raw.githubusercontent.com/srikanthrc/covid-19/master/covid_overview.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9406 (9.2K) [text/plain]
Saving to: ‘covid_overview.py’


2020-04-14 00:29:56 (103 MB/s) - ‘covid_overview.py’ saved [9406/9406]



In [0]:
#hide
import covid_overview as covid

In [0]:
#hide
COL_REGION = 'Country/Region'   

kpis_info = [
    {'title': 'Asia', 'prefix': 'Asia'},
    {'title': 'Europe', 'prefix': 'EU'},
    {'title': 'North America', 'prefix': 'NA'}]

data = covid.gen_country_data(kpis_info=kpis_info)

In [12]:
#hide
data['summary']

{'Asia Cases': 309152,
 'Asia Cases (+)': 11761,
 'Asia Deaths': 11318,
 'Asia Deaths (+)': 344,
 'Cases': 2019319,
 'Cases (+)': 172640,
 'Deaths': 119482,
 'Deaths (+)': 5391,
 'EU Cases': 908788,
 'EU Cases (+)': 26279,
 'EU Deaths': 80028,
 'EU Deaths (+)': 3221,
 'NA Cases': 721653,
 'NA Cases (+)': 129580,
 'NA Deaths': 24936,
 'NA Deaths (+)': 1612,
 'since': Timestamp('2020-04-12 00:00:00'),
 'updated': Timestamp('2020-04-13 00:00:00')}

In [0]:
# data['table'].head(10)

In [0]:
#hide_input
template = Template(covid.get_template(covid.paths['overview']))
dt_cols, LAST_DATE_I = data['dt_cols'], data['dt_last']

#html = template.render(
#    D=data['summary'], table=data['table'],
#    newcases=data['newcases'].loc[:, dt_cols[LAST_DATE_I-50]:dt_cols[LAST_DATE_I]],
#    KPIS_INFO=kpis_info,
#    np=np, pd=pd, enumerate=enumerate
#   )
#HTML(f'<div>{html}</div>')

#####Visualizations by [Pratap Vardhan](https://twitter.com/PratapVardhan)[^1]

[^1]: Sources: ["COVID-19 Data Repository by Johns Hopkins CSSE"](https://systems.jhu.edu/research/public-health/ncov/) [GitHub](https://github.com/CSSEGISandData/COVID-19). [covidtracking.com](https://covidtracking.com/)