<a href="https://colab.research.google.com/github/shouryan01/COVID-19-Tracker/blob/master/covid19_visualizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#IMPORT LIBRARIES

import pandas as pd
import numpy as np
import datetime
import plotly.express as px
import plotly.io as pio


print("Done!")

In [None]:
#LOAD DATA

GLOBAL_CASES_SOURCE      = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
GLOBAL_DEATHS_SOURCE     = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
GLOBAL_RECOVERIES_SOURCE = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

global_cases      = pd.read_csv(GLOBAL_CASES_SOURCE)
global_deaths     = pd.read_csv(GLOBAL_DEATHS_SOURCE)
global_recoveries = pd.read_csv(GLOBAL_RECOVERIES_SOURCE)
datasets = [global_cases, global_deaths, global_recoveries]

date_yesterday = pd.to_datetime(global_cases.columns[-1]).date()
date_today     = date_yesterday + datetime.timedelta(days=1)
date_before_yesterday = date_yesterday - datetime.timedelta(days=1)

print('\nData was last updated on ' + str(date_today) + '.' + ' Latest data available for ' + str(date_yesterday))

In [None]:
#CLEAN DATA

#Drop all negative values
def remove_negatives(df):
    indices = df[df.iloc[:, -1] < 0].index.tolist() #index of all rows, last column
    for i in indices:
        df.drop([i], inplace = True)
    return df.reset_index(drop = True)
datasets = [remove_negatives(i) for i in datasets]

#Rename columns, deal with NaN, change to datetime format
for i in range(3):
    datasets[i].rename(columns={'Country/Region':'Country', 'Province/State':'State'}, inplace=True)
    # datasets[i][['State']] = datasets[i][['State']].fillna('')
    datasets[i] = datasets[i].fillna('')
    datasets[i].fillna(0, inplace=True)
    datasets[i].columns = datasets[i].columns[:4].tolist() + \
                          [pd.to_datetime(datasets[i].columns[j]).date() for j in range(4, len(datasets[i].columns))]

datasets[0]

In [None]:
#GROUP BY COUNTRY

country_datasets = []
labels = ['Global Cases     ', 'Global Deaths    ', 'Global Recoveries']
df_labels = ['Cases', 'Deaths', 'Recoveries']

for i in range(3):
    total = datasets[i].groupby('Country').sum()
    total.rename(index={'US':'United States'}, inplace = True)
    country_datasets.append(total)

country_datasets[0]
# country_datasets[0][[date_yesterday]]

In [None]:
#INSTALL country_converter

!pip install country_converter --upgrade

In [None]:
#LABEL CONTINENTS

#FIX NORTH/SOUTH America

import country_converter as coco

for dataset in country_datasets:
    continent = coco.convert(names=dataset.index.tolist(), to='Continent')
    dataset['Continent'] = continent

country_datasets[0]

In [None]:
#GLOBAL STATISTICS

print('As of: ', str(date_today), '\n')

for i in range(3):
    print('{} : {:,d}'.format(labels[i], datasets[i].sum()[date_yesterday]))

print('\nGlobal Mortality Rate : {:.2f}%'.format(datasets[1].sum()[date_yesterday]/datasets[0].sum()[date_yesterday] * 100))
print('Global Recovery Rate  : {:.2f}%'.format(datasets[2].sum()[date_yesterday]/datasets[0].sum()[date_yesterday] * 100))

In [None]:
#TOP 10 DAILY STATISTICS (ranked by current total cases/deaths/recoveries)

def daily():
    #Compute daily changes
    datasets_daily = [df.sort_values(by=date_yesterday, ascending=False).iloc[:, 2:-1].diff(axis=1).fillna(0).T for df in country_datasets]

    return datasets_daily

daily()[0]

In [None]:
#IMPORT BOKEH LIBRARIES
from bokeh.io        import output_file, show, output_notebook, save
from bokeh.plotting  import figure
from bokeh.models    import ColumnDataSource, HoverTool
from bokeh.palettes  import Dark2 as palette #https://docs.bokeh.org/en/latest/docs/reference/palettes.html#matplotlib-palettes
from bokeh.transform import factor_cmap

print("Done!")

In [None]:
#VISUALIZE CASES, DEATHS, RECOVERIES

def bokehB(dataset, case):
    df = dataset.iloc[:,-2:].sort_values(by=dataset.columns[-2], 
                                       ascending=False).head(20)
    df['totals'] = df.iloc[:,0]
    df.drop(df.columns[0], axis=1, inplace=True)
    cont_cat = len(df['Continent'].unique())
    
    #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    source = ColumnDataSource(df)

    select_tools = ['save']
    tooltips = [
        ('Country', '@Country'),('Total', '@totals{0,000}')
        ]

    p = figure(x_range=df.index.tolist(), plot_width=840, plot_height=600,
               x_axis_label='Country', 
               y_axis_label='Totals',
               title="Top Countries in {} as of ".format(case.strip()) +  str(date_today),
               tools=select_tools)

    p.vbar(x='Country', top='totals', width=0.9, alpha=0.7, source=source, 
           legend_field="Continent",
           color=factor_cmap('Continent', palette=palette[cont_cat], 
                             factors=df.Continent.unique()))

    p.xgrid.grid_line_color = None
    p.y_range.start = 0
    p.xaxis.major_label_orientation = 1
    p.left[0].formatter.use_scientific = False

    p.add_tools(HoverTool(tooltips=tooltips))

    output_notebook()
    
    return show(p) 

bokehB(country_datasets[0], labels[0])

In [None]:
bokehB(country_datasets[1], labels[1])

In [None]:
bokehB(country_datasets[2], labels[2])

In [None]:
#VISUALIZE MORTALITY RATE PER CASES

def bokehB_mort(num=100):
    # top countries by deaths rate with at least num deaths
    top_death = country_datasets[1][date_yesterday].sort_values(ascending=False)
    top_death = top_death[top_death>num]
    
    # Inner join to the confirmed set, compute mortality rate and take top 20
    df_mort = pd.concat([country_datasets[0][date_yesterday], top_death], axis=1, 
                        join='inner')
    mort_rate = round(df_mort.iloc[:,1] / df_mort.iloc[:,0] * 100, 2)
    mort_rate = mort_rate.sort_values(ascending=False).to_frame().head(20)
    
    # take yesterday's data
    df = mort_rate.iloc[:,-1].sort_values(ascending=False).head(20).to_frame()
    df['totals'] = df.iloc[:,-1]
    df.drop(df.columns[0], axis=1, inplace=True)
    
    import country_converter as coco
    continent = coco.convert(names=df.index.to_list(), to='Continent')
    df['Continent'] = continent
    cont_cat = len(df['Continent'].unique())
    
    source = ColumnDataSource(df)

    select_tools = ['save']
    tooltips = [
        ('Country', '@Country'),('Rate', '@totals{0.00}%')
        ]

    p = figure(x_range=df.index.tolist(), plot_width=840, plot_height=600,
               x_axis_label='Country', 
               y_axis_label='Rate (%)',
               title="Mortality rate of countries with at least {} deaths " \
               "as of ".format(num) +  str(date_today),
               tools=select_tools)

    p.vbar(x='Country', top='totals', width=0.9, alpha=0.7, source=source,
           legend_field="Continent",
           fill_color=factor_cmap('Continent', palette=palette[cont_cat], 
                                  factors=df.Continent.unique()))

    p.xgrid.grid_line_color = None
    p.y_range.start = 0
    p.xaxis.major_label_orientation = 1
    p.left[0].formatter.use_scientific = False

    p.add_tools(HoverTool(tooltips=tooltips))    
    show(p) 

bokehB_mort()

In [None]:
#CLEAN DATA FOR A MAP

#Create dataset for daily cases on a national level
daily_statistics = [3]
for i in range(3):
    countries_with_split_data = datasets[0][datasets[0].State!=''].Country.unique().tolist()
    #['Australia', 'Canada', 'China', 'Denmark', 'France', 'Netherlands', 'United Kingdom']
    # ^^ are the countries with cases at the state/province level and/or include territories

    #Figuring out geographical centers of mentioned countries:
    #Australia: -25.609361 134.361949
    #Canada: 53.7609 -98.8139	
    #China: 35.7518	104.2861	
    #Denmark: 56.2639	9.5018
    #France: 46.227600	2.213700	
    #Netherlands: 52.1326	5.2913	
    #UK: 55.378100	-3.436000	

for i in range(3):
    #Fixing Australia's center:
    country_datasets[i].at['Australia','Lat'] = -25.609361
    country_datasets[i].at['Australia','Long'] = 134.361949
    #Fixing Canada's center: 
    country_datasets[i].at['Canada','Lat'] = 53.7609
    country_datasets[i].at['Canada','Long'] = -98.8139	
    #Fixing China's center: 
    country_datasets[i].at['China','Lat'] = 35.7518	
    country_datasets[i].at['China','Long'] = 104.2861	
    #Fixing Denmark's center: 
    country_datasets[i].at['Denmark','Lat'] = 56.2639	
    country_datasets[i].at['Denmark','Long'] = 9.5018
    #Fixing France's center: 
    country_datasets[i].at['France','Lat'] = 46.227600	
    country_datasets[i].at['France','Long'] = 2.213700	
    #Fixing the Neatherlands' center: 
    country_datasets[i].at['Netherlands','Lat'] = 52.1326	
    country_datasets[i].at['Netherlands','Long'] = 5.2913	
    #Fixing the UK's center: 
    country_datasets[i].at['United Kingdom','Lat'] = 55.378100	
    country_datasets[i].at['United Kingdom','Long'] = -3.436000	

    country_datasets[i]['Country'] = country_datasets[i].index
    country_datasets[i].columns = country_datasets[i].columns.astype(str)

country_datasets[1]

In [None]:
fig = px.scatter_geo(country_datasets[0],
                     lat="Lat", lon="Long", color=str(date_yesterday),
                     hover_name="Country", 
                     size=str(date_yesterday),
                     size_max=45, #hover_data=["State"], 
                     template='plotly', 
                     color_continuous_scale="oranges", #https://plotly.com/python/builtin-colorscales/#named-builtin-continuous-color-scales
                     projection="natural earth",
                    #  animation_frame=country_datasets[0].columns
                     title="Global Cases as of: " + str(date_yesterday)
                    )

fig.update_layout(
    geo=dict(showframe=True, showcoastlines=False, 
             projection_type='natural earth')
)

fig.update_geos(resolution=110, showcountries=True, 
                lataxis_range=[-55, 90], lonaxis_range=[-180, 180])

fig.show()
# pio.write_html(fig, file='plots/Geo_confirmed.html', auto_open=False)

In [None]:
#TODO: add animations!!

fig = px.scatter_geo(country_datasets[1],
                     lat="Lat", lon="Long", color=str(date_yesterday),
                     hover_name="Country", 
                     size=str(date_yesterday),
                     size_max=35, #hover_data=["State"], 
                     template='plotly', 
                     color_continuous_scale="oranges",
                     projection="natural earth",
                     title="Global Deaths as of: " + str(date_yesterday)
                    )

fig.update_layout(
    geo=dict(showframe=True, showcoastlines=False, 
             projection_type='natural earth')
)

fig.update_geos(resolution=110, showcountries=True, 
                lataxis_range=[-55, 90], lonaxis_range=[-180, 180])

fig.show()

In [None]:
fig = px.scatter_geo(country_datasets[2],
                     lat="Lat", lon="Long", color=str(date_yesterday),
                     hover_name="Country", 
                     size=str(date_yesterday),
                     size_max=35, #hover_data=["State"], 
                     template='plotly', 
                     color_continuous_scale="oranges",
                     projection="natural earth",
                     title="Global Recoveries as of: " + str(date_yesterday)
                    )

fig.update_layout(
    geo=dict(showframe=True, showcoastlines=False, 
             projection_type='natural earth')
)

fig.update_geos(resolution=110, showcountries=True, 
                lataxis_range=[-55, 90], lonaxis_range=[-180, 180])

fig.show()

**REPLICATING THE FINANCIAL TIMES CHART**