In [1]:
import pandas as pd
import time
import datetime
import matplotlib.pyplot as plt
import numpy as np
from scipy.constants import golden
from sklearn.linear_model import LinearRegression
import ipywidgets as widgets
import uuid
import os
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
import pytz
import random

In [2]:
today = datetime.datetime.today()

# Select your local timezone to get the right date when running the notebook
localtimezone = pytz.timezone("US/Pacific")
today = today.astimezone(localtimezone)
print(f"{today.year}-{today.month:02d}-{today.day:02d}")

2020-04-04


In [3]:
filename = f"https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-{today.year}-{today.month:02d}-{today.day:02d}.xlsx"

In [4]:
df = pd.read_excel(filename)

In [5]:
df = df[::-1]
df.head()

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2018
8703,2020-03-21,21,3,2020,1,0,Zimbabwe,ZW,ZWE,14439018.0
8702,2020-03-22,22,3,2020,1,0,Zimbabwe,ZW,ZWE,14439018.0
8701,2020-03-23,23,3,2020,0,0,Zimbabwe,ZW,ZWE,14439018.0
8700,2020-03-24,24,3,2020,0,1,Zimbabwe,ZW,ZWE,14439018.0
8699,2020-03-25,25,3,2020,0,0,Zimbabwe,ZW,ZWE,14439018.0


In [6]:
datecol = df.columns[0]
cases = df.columns[4]
country = df.columns[7]
datecol, cases, country

('dateRep', 'cases', 'geoId')

In [7]:
def logarithmic_curve(x, a, c, d):
    return a / (1 + np.exp(-c * (x - d)))

In [8]:
def exp_curve(x, a, b):
    return a*np.exp(b*x)

In [9]:
def covid_graphs(selectedcountry, threshold=10, image_width=5, show_deaths=False, save_figure=False):
    nrows = 2
    ncols = 2
    markersize = 10
    fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(image_width*ncols, image_width*nrows))
    fig.tight_layout(pad=3.0)
    confirmed = df[(df['cases'] >= 1) & (df[country]==selectedcountry)]['cases']
    confirmed_daily = confirmed.values
    confirmed_cumsum = confirmed.cumsum().values
    deaths = df[(df['cases'] >= 1) & (df[country]==selectedcountry)]['deaths']
    if confirmed_daily.sum() < 10:
        return 0
    
    if show_deaths:
        axs[0, 0].bar(np.arange(0, len(deaths)), deaths.cumsum().values, color='#c9c9c9', label='cumulative deaths')
        axs[0, 0].text(0.65, 0.9, f"Deaths: {deaths.sum()}", transform=axs[0, 0].transAxes)
        axs[0, 1].bar(np.arange(0, len(deaths)), deaths.cumsum().values, color='#c9c9c9', label='cumulative deaths')
        axs[0, 1].text(0.65, 0.9, f"Deaths: {deaths.sum()}", transform=axs[0, 1].transAxes)
    
    axs[0, 0].plot(np.arange(0, len(confirmed_daily)), confirmed_daily, color='#fc5e03', label='daily')
    axs[0, 0].plot(np.arange(0, len(confirmed_cumsum)), confirmed_cumsum, color='b', label='cumulative')

#     axs[0, 0]=axs[0, 0].twinx()
    axs[0, 0].plot(np.argmax(confirmed_daily), max(confirmed_daily), ".",  markersize=markersize, color='r', label='Max cases in one day')

    
    axs[0, 0].set_yscale('linear')
    axs[0, 0].set_xlabel('Days since first infection')
    axs[0, 0].text(0.65, 0.95, f"Infections: {confirmed_daily.sum()}", transform=axs[0, 0].transAxes)
    axs[0, 0].legend()
    axs[0, 0].set_title('Infections')
    
    axs[0, 1].set_yscale('log')
    axs[0, 1].plot(np.arange(0, len(confirmed_daily)), confirmed_daily, color='#fc5e03', label='daily')
    axs[0, 1].plot(np.arange(0, len(confirmed_cumsum)), confirmed_cumsum, color='b', label='cumulative')
    axs[0, 1].text(0.65, 0.95, f"Infections: {confirmed_daily.sum()}", transform=axs[0, 1].transAxes)
    axs[0, 1].plot(np.argmax(confirmed_daily), max(confirmed_daily), ".",  markersize=markersize, color='r', label='Max cases in one day')
    
#     axs[0, 1].plot(np.arange(1, len(confirmed_daily)), np.clip(2**(np.arange(0.0, len(confirmed_daily)-1)), 0, max(confirmed_cumsum)), color='#9a9a9a', ls='--')
#     axs[0, 1].plot(np.arange(1, len(confirmed_daily)), (np.arange(1.0, len(confirmed_daily)))**2/(np.arange(0.0, len(confirmed_daily)-1)), color='#9a9a9a', ls='--')
    
    axs[0, 1].set_xlabel('Days since first infection')
    axs[0, 1].legend()
    axs[0, 1].set_title('Infections (logarithmic scale)')


    
    growth_rate = confirmed_daily/df[(df[country] == selectedcountry) & (df['cases'] >= 1)]['cases'].shift().values
    growth_rate_indices = df[(df[country] == selectedcountry) & (df['cases'] >= 1)].index
    growth_ratedf = df[(df[country] == selectedcountry) & (df['cases'] >= 1)]['cases']/df[(df[country] == selectedcountry) & (df['cases'] >= 1)]['cases'].shift()
    rolling_mean = growth_ratedf.rolling(window=3).mean()
    lr = LinearRegression()
    days_gr = [(df.loc[idx, datecol] - df[(df[country] == selectedcountry) & (df['cases'] >= 1)]['dateRep'].min()).days for idx in df[(df[country] == selectedcountry) & (df['cases'] >= 1)].index]
    days_gr = np.array(days_gr)
    lr.fit(days_gr[1:].reshape(-1,1), growth_rate[1:].reshape(-1,1))
    growth_trend = lr.predict(days_gr[1:].reshape(-1,1))
    
    axs[1, 0].bar(np.arange(0, len(growth_rate)), growth_rate, color='#bababa', label='daily rate')
    axs[1, 0].plot(np.arange(0, len(rolling_mean.values)), rolling_mean.values, color='#fc5e03', linewidth=2, label='3-day average')
    axs[1, 0].plot(np.arange(0, len(days_gr[1:])), growth_trend, color='#0000ff', label='trend')
    axs[1, 0].hlines(1.0, 0, len(days_gr[1:]), color='#000000', ls="--", lw=1)

    if (9/(np.nanmean(growth_rate)+2*np.nanstd(growth_rate))) < 0.85:
        y_text = 0.85 - (9/(np.nanmean(growth_rate)+2*np.nanstd(growth_rate)))
    else:
        y_text = 0.85

    axs[1, 0].text(-0.5, y_text, "No growth", size=9)
    axs[1, 0].text(.75, 0.8, f"Current = {growth_rate[-1]:.2f}", size=10, transform=axs[1, 0].transAxes)
    axs[1, 0].text(.75, 0.77, f"3-day avg= {rolling_mean.values[-1]:.2f}", size=10, transform=axs[1, 0].transAxes)
    axs[1, 0].text(.75, 0.74, f"Trend= {growth_trend[-1][0]:.2f}", size=10, transform=axs[1, 0].transAxes)
    
    axs[1, 0].set_xticks(np.arange(0, len(df[(df[country] == selectedcountry) & (df['cases'] >= 1)][cases]), 10))
    axs[1, 0].set_xlabel('Days since first infection')
    
    axs[1, 0].set_ylim(bottom=0, top=np.nanmean(growth_rate)+2*np.nanstd(growth_rate))
    axs[1, 0].legend(loc='upper right')
    axs[1, 0].set_title('Growth factor')

    if len(confirmed_cumsum) > 10:
        myfunction = logarithmic_curve
        x = np.arange(0, len(confirmed_cumsum))
        try:
            logistic_params, covariance = curve_fit(myfunction, x, confirmed_cumsum)
            a, c, d = logistic_params
        except Exception as e:
            pass

        axs[1, 1].plot(np.arange(0, len(confirmed_cumsum)), confirmed_cumsum, ".", markersize=8, label="Reported cases")

        if c < 0 or d < 0:
            myfunction = exp_curve
            logistic_params, covariance = curve_fit(myfunction, x, confirmed_cumsum)
                    
        confirmed_pred = myfunction(np.arange(0, len(confirmed_cumsum)), *logistic_params)
        r2 = r2_score(confirmed_cumsum, confirmed_pred)
        axs[1, 1].text(0.2, 0.3, f"$R^2$={r2:.4f}", transform=axs[1, 1].transAxes)
        
        if myfunction == logarithmic_curve:
            confirmed_now = confirmed_cumsum[-1]
            confirmed_then = confirmed_cumsum[-2]
            days = 0
            now = len(confirmed_cumsum)-1
            while confirmed_now - confirmed_then > threshold:
                days += 1
                confirmed_then = confirmed_now
                confirmed_now = logarithmic_curve(now + days, a, c, d)


            x = np.arange(0, len(days_gr) + days)
            y = myfunction(x, a, c, d)
            axs[1, 1].plot([len(confirmed_daily), len(confirmed_daily)], [0, max(y)], color='k', ls=':')
            axs[1, 1].plot(x, y, color='#fc5e03', label="Fitted logistic function")
            axs[1, 1].text(0.55, 0.1, f"Total predicted cases: {int(confirmed_now)}", transform=axs[1, 1].transAxes)
            axs[1, 1].text(0.55, 0.05, f"Remainig growth days: {days}", transform=axs[1, 1].transAxes)
            axs[1, 1].fill_between(x[-days:], confirmed_cumsum[-1], y[-days:],
                                   facecolor="none", hatch="/", edgecolor="b", linewidth=0.0,
                                  label='Predicted increase')

        elif myfunction == exp_curve:
            x = np.arange(0, int(len(confirmed_cumsum)*1.5))
            y = myfunction(x, *logistic_params)
            axs[1, 1].plot(x, y, color='#fc5e03', label="Fitted exponential function")
    
    axs[1, 1].set_xlabel('Days since first infection')
    axs[1, 1].legend(loc='upper left')
    axs[1, 1].set_title('Evolution (prediction)')

    
    country_name = df[df[country] == selectedcountry]['countriesAndTerritories'].values[0]
    date_str = today.strftime("%B %d, %Y")
    plt.suptitle(f"COVID-19 evolution in {country_name}\n{date_str}", y=1.025, size=15)
    
    if save_figure:
        filename = f"{selectedcountry}_{today.year}-{today.month:02d}-{today.day:02d}_{uuid.uuid4().hex}.png"
        destination_folder = os.path.expanduser(r"~\Documents\COVID-19")
        if not os.path.isdir(destination_folder):
            try:
                os.mkdir(destination_folder)
            except Exception as e:
                print(e)
        plt.savefig(os.path.join(destination_folder, filename),
                    dpi=300, bbox_inches='tight')
        print(f"file saved as: {os.path.join(destination_folder, filename)}")


In [10]:
countries = df[['countriesAndTerritories', country, 'cases']].groupby(['countriesAndTerritories', country], as_index=False)['cases'].sum().values
countries = countries[countries[:,2].argsort()[::-1]]
countries = [(f"{a} ({c})", b) for a, b, c in countries]

In [12]:
%matplotlib inline
style = {'description_width': 'initial'}
output = widgets.interact(covid_graphs,
                          selectedcountry=widgets.Dropdown(options=countries,
                                                           value=countries[0][1],
                                                           description='Country (total):',
                                                           style=style),
                          threshold=widgets.IntSlider(10, 5, 500, 5), 
                          image_width=widgets.IntSlider(6, 3, 10, 1),
                          show_deaths=widgets.Checkbox(False, disabled=False),
                          save_figure=widgets.Checkbox(False, disabled=True))

interactive(children=(Dropdown(description='Country (total):', options=(('United_States_of_America (277965)', …