<a href="https://colab.research.google.com/github/michaelnikhil/my_python_monitor_covid19/blob/master/my_dashboard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# COVID19 monitor

Python notebook to display statistics on the virus cases/deaths

Data from John Hopkins University, Center of System Science and Engineering
https://github.com/CSSEGISandData


Imports and class to load the data




In [45]:
import pandas as pd
import re
import datetime
import matplotlib.dates as mdates
import numpy as np
import matplotlib.pyplot as plt

class LoadData():
    def __init__(self, url):
        self.url = url

    def downloadCovid(self):
        try:
            df = pd.read_csv(self.url, sep=',', na_values=['nan'], na_filter=True)
        except:
            # dummy value
            df = pd.Series(1, index=['France'])
            print('error, using dummy data')
        return df

    def downloadPopulation(self):
        try:
            df = pd.read_csv(self.url, sep=',', na_values=['nan'], na_filter=True)
            unique_country = set()
            population = []
            for item in df['Country_Region'].to_list():
                unique_country.add(item)
            for country in unique_country:
                subset_df = df[df['Country_Region'] == country]
                #pick the country level row = the row with no Province/state
                population.append(subset_df[subset_df['Province_State'].isna()]['Population'])
            country_population = pd.Series(population,index=unique_country)
  
        except:
            country_population = pd.Series(1, index=['France'])
            print('error population, using dummy data')
        return country_population

    def retrieve_dates(self, df):
        unformatted_dates = df.columns[4:len(df.columns)]
        unformatted_dates2 = [re.sub(r"\d{2}$", r"2020", x) for x in unformatted_dates]
        dates = [datetime.datetime.strptime(x, '%m/%d/%Y') for x in unformatted_dates2]
        return dates


covid19_class : class Country

In [132]:
class Country():
    def __init__(self, df_covid, series_pop, country_name):
        self.df_covid = df_covid
        self.series_pop = series_pop
        self.country_name = country_name

    def nvalues(self):
        #extract covid cases, cumulate the different regions of a country
        data = self.df_covid[self.df_covid['Country/Region'] == self.country_name]
        data = data.drop(['Province/State', 'Country/Region', 'Lat', 'Long'], axis=1)
        val_abs = []
        for col in data:
            val_abs.append(sum(data[col]))

        #normalise with country population per million 
        val_rel=[]
        for item in val_abs:
            val_rel.append(1e6 *  item / (self.series_pop[self.country_name]).values[0] )
        return val_abs, val_rel

Utility functions


In [58]:
def daily_values(cumul):
    val = [0]
    for i in range(1, len(cumul)):
        val.append(cumul[i] - cumul[i - 1])
    series_val = pd.Series(data=val)
    return series_val

def sortN(df_covid,series_pop,N):
    cases_abs = pd.Series(dtype=float)
    cases_rel = pd.Series(dtype=float)
    #combine the covid cases to the population 
    for country in series_pop.index:
        objCountry = Country(df_covid,series_pop, country)
        #print(country)
        val_abs, val_rel = objCountry.nvalues()
        cases_abs=cases_abs.append(pd.Series([val_abs[-1]],index=[country]))  
        cases_rel=cases_rel.append(pd.Series([val_rel[-1]],index=[country])) 
        
    cases_abs=cases_abs.sort_values(axis=0,ascending=False)
    cases_rel=cases_rel.sort_values(axis=0,ascending=False)
    lastDate = df_covid.columns[-1]
    return cases_abs[0:N], cases_rel[0:N], lastDate

def format_plot(fig1, ax0):
    ax0.legend(fontsize=12, loc=0)
    ax0.xaxis.set_major_formatter(mdates.DateFormatter('%d%b'))
    ax0.yaxis.grid(True)
    fig1.autofmt_xdate()

main script to collect the data

In [133]:
#rolling average size
window_size=7

# url of the data in csv file
url1 = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
url2 = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
url3 = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv"
# import all covid data from the url
death = LoadData(url1)
data_death = death.downloadCovid()
conf = LoadData(url2)
data_conf = conf.downloadCovid()
dates = death.retrieve_dates(data_death)

#import world population
population = LoadData(url3)
country_population = population.downloadPopulation()

# summary 10 biggest values - absolute and relatives
country_sort10_abs, country_sort10_rel, lastDate = sortN(data_death,country_population,10)

# choose the countries to analyse or use the 10 biggest value
#list_of_countries = ['Italy', 'Spain', 'United Kingdom', 'France', 'US', 'Sweden']
list_of_countries = country_sort10_abs.index.to_list()

Summary plots

In [None]:
fig1, ax0 = plt.subplots(figsize=(8, 4))
pos = np.arange(10)
ax0.bar(pos, country_sort10_abs, align='center')
ax0.set_ylabel('deaths')
plt.xticks(pos, country_sort10_abs.index)
ax0.set_title(lastDate + ' absolute')
ax0.yaxis.grid(True)
fig1.autofmt_xdate()

fig2, ax0 = plt.subplots(figsize=(8, 4))
pos = np.arange(10)
ax0.bar(pos, country_sort10_rel, align='center')
ax0.set_ylabel('deaths')
plt.xticks(pos, country_sort10_rel.index)
ax0.set_title(lastDate+ ' per million')
ax0.yaxis.grid(True)
fig2.autofmt_xdate()

Time series plots

In [None]:

# plot values per country and comparing between countries
fig10, (ax10, ax11) = plt.subplots(ncols=2, figsize=(10, 6))
fig11, (ax12, ax13) = plt.subplots(ncols=2, figsize=(10, 6))

for country in list_of_countries:
  # number of deaths
  n_death_country_abs,n_death_country_rel = Country(data_death,country_population, country).nvalues()
  n_death_daily = daily_values(n_death_country_abs)
  # number of confirmed cases
  n_conf_country_abs,n_conf_country_rel  = Country(data_conf, country_population, country).nvalues()
  n_conf_daily = daily_values(n_conf_country_abs)

  # plot per country
  fig1, (ax0, ax1) = plt.subplots(nrows=2, figsize=(8, 6))
  ax0.plot(dates, n_death_country_abs, label='deaths')
  # ax0.plot(dates,n_conf_country,label='confirmed cases')
  ax0.set_ylabel('total', fontsize=12)
  ax0.set_title(country + ' absolute')
  format_plot(fig1, ax0)

  ax1.plot(dates, n_death_daily, label='deaths daily')
  # ax1.plot(dates,n_conf_daily,label='confirmed cases')
  ax1.plot(dates, n_death_daily.rolling(window_size).mean().to_list(),'r--',label='deaths 7day av')
  ax1.set_ylabel('daily', fontsize=12)
  format_plot(fig1, ax1)

  # compare countries
  ax10.plot(dates, n_death_country_abs, label=country)
  ax10.set_ylabel('death', fontsize=12)
  ax10.set_title('absolute')
  format_plot(fig10, ax10)

  ax11.semilogy(dates, n_death_country_abs, label=country)
  ax11.set_ylabel('death', fontsize=12)
  ax10.set_title('absolute')        
  format_plot(fig10, ax11)
        
  ax12.plot(dates, n_death_country_rel, label=country)
  ax12.set_ylabel('death', fontsize=12)
  ax12.set_title('relative per million')
  format_plot(fig11, ax12)

  ax13.semilogy(dates, n_death_country_rel, label=country)
  ax13.set_ylabel('death', fontsize=12)
  ax12.set_title('relative per million')        
  format_plot(fig11, ax13)
