In [None]:
import pandas as pd
import time
import datetime
import matplotlib.pyplot as plt
import numpy as np
from scipy.constants import golden
from sklearn.linear_model import LinearRegression
import ipywidgets as widgets
import uuid
import os
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
import pytz
import random
from collections import OrderedDict 
from matplotlib import animation
import collections

# Import data

In [None]:
today = datetime.datetime.today()
# Select your local timezone to get the right date when running the notebook
localtimezone = pytz.timezone("US/Pacific")
today = today.astimezone(localtimezone)
print(f"{today.year}-{today.month:02d}-{today.day:02d}")

In [None]:
filename = f"https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-{today.year}-{today.month:02d}-{today.day:02d}.xlsx"

In [None]:
df = pd.read_excel(filename)

In [None]:
df = df[::-1]
df.head()

In [None]:
datecol = df.columns[0]
cases = df.columns[4]
country = df.columns[7]
# The population column was renamed, this filter tries to avoid errors in the future
population_column = df.columns[[list(df.columns).index(i) for i in df.columns if 'pop' in i]][0]
datecol, cases, country, population_column

# Define required functions

In [None]:
def logarithmic_curve(x, a, b, c, d):
    """
    Logistic function with parameters a, b, c, d
    a is the curve's maximum value
    b is an offset value
    c is the logistic growth rate or steepness of the curve
    d is the x value of the sigmoid's midpoint
    """
    return a / (1 + np.exp(-c * (x - d))) + b

In [None]:
def exp_curve(x, a, b):
    return a*np.exp(b*x)

In [None]:
def sigmoid_derivative(x, a, b, c, d, e, f):
    return logarithmic_curve(x, a, b, c)*(1-logarithmic_curve(x, d, e, f))

# Define all plots

In [None]:
def plot_new_cases(confirmed, population, deaths, ax, log_scale=False, rolling_windows_days=15, image_width=6, markersize=10, show_deaths=False):
    """
    This function plots the number of daily new cases and the trend for the past selected days
    If log_scale is True, the y axis is logarithmic, and the cumulative sum of new cases displayed instead of the trend.
    """
    from matplotlib.ticker import StrMethodFormatter
    if show_deaths:
        if log_scale:
            ax.plot(np.arange(0, len(deaths)), deaths.cumsum().values, color='#6c6c6c', lw=2, label='cumulative deaths')
            ax.fill_between(np.arange(0, len(deaths)), 0, deaths.cumsum().values, color='#c9c9c9')
            ax.text(0.65*len(deaths), max(deaths), f"Deaths: {deaths.sum()}")
        else:
            ax.plot(np.arange(0, len(deaths)), deaths.values, color='#6c6c6c', lw=2, label='daily deaths')
            ax.text(0.65, max(deaths)/max(confirmed), f"Deaths: {deaths.sum()}", transform=ax.transAxes)
        
    ax.plot(np.arange(0, len(confirmed)), confirmed, color='#ff5900', lw=2, label='daily')
    if log_scale:
        ax.plot(np.arange(0, len(confirmed.cumsum())), confirmed.cumsum(), color='#0028bb', alpha=0.8, lw=2, label='cumulative')
    else:
        ax.plot(np.arange(0, len(confirmed.rolling(window=7).mean())), confirmed.rolling(window=rolling_windows_days).mean(),
            color='#0028bb', lw=2, alpha=0.8, label=f"{rolling_windows_days}-day average")
    ax.plot(np.argmax(confirmed), max(confirmed), ".",  markersize=markersize, color='r',
            label=f'Max cases in one day ({max(confirmed):,})')
    ax.set_yscale(f"{'log' if log_scale else 'linear'}")
    ax.set_xlabel('Days since first infection')
    ax.legend(title=f"Infections: {confirmed.sum():,} ({confirmed.sum()/population*1e6:,.0f}/1M)",
              loc='best')
    ax.set_title(f"{'Infections (logarithmic scale)' if log_scale else 'Infections'}")
    ax.grid(which='major', axis='y', alpha=0.7, linewidth=0.7)
    ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}')) # comma separated numbers, no decimal places
    
    return ax

In [None]:
def plot_growth_factor(confirmed, population, date_first_infection, ax, rolling_windows_days=15, image_width=6, markersize=10):
    """
    This function plots the daily growth factor and the trend for the past selected days
    """
    from matplotlib.ticker import StrMethodFormatter
    
    growth_rate = confirmed.values/confirmed.shift().values
    growth_ratedf = confirmed/confirmed.shift()
    rolling_mean = growth_ratedf.rolling(window=rolling_windows_days).mean()
    lr = LinearRegression()
    days_gr = np.array((confirmed.index - date_first_infection).days)
    lr.fit(days_gr[1:].reshape(-1,1), growth_rate[1:].reshape(-1,1))
    growth_trend = lr.predict(days_gr[1:].reshape(-1,1))

    ax.plot(np.arange(0, len(rolling_mean.values)), rolling_mean.values, color='#ff5900', linewidth=2, label=f'{rolling_windows_days}-day average')
    ax.plot(np.arange(0, len(days_gr[1:])), growth_trend, color='#0028bb', alpha=0.8, lw=2, label='trend')
    ax.axhline(1.0, color='#000000', ls="--", lw=1, label='No growth')

    ax.text(.67, 0.82, f"Current = {growth_rate[-1]:.2f}", size=10, transform=ax.transAxes)
    ax.text(.67, 0.79, f"{rolling_windows_days}-day avg= {rolling_mean.values[-1]:.2f}", size=10, transform=ax.transAxes)
    
    if round(rolling_mean.values[-1], 2) > 1/np.log10(2):
        ax.text(.62, 0.76, f"New cases double every day", size=10, transform=ax.transAxes) 
    elif round(rolling_mean.values[-1], 2) > 1:
        ax.text(.57, 0.76, f"New cases double every {np.log10(2)/np.log10(rolling_mean.values[-1]):.0f} day{'s' if np.log10(2)/np.log10(rolling_mean.values[-1]) > 1 else ''}", size=10, transform=ax.transAxes)
    
    ax.set_xlabel('Days since first infection')
    ax.legend(loc='upper right')
    ax.set_title('Growth factor')
    ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.2f}')) # comma separated numbers, no decimal places
    ax.grid(which='major', axis='y', alpha=0.7, linewidth=0.7)
                
    return ax

In [None]:
def plot_prediction(confirmed, population, date_first_infection, ax, rolling_windows_days=15, image_width=6, markersize=8):
    """
    This function plots the predicted cases until the infection activity stabilizes using the fit to the logarithmic function
    The inflection point is highlighted in the plot as well as the last date in the data set
    The inflection point reflects half of the time (in the x axis) and half of the affected population (in the y axis)
    """
    from scipy.optimize import curve_fit
    from sklearn.metrics import r2_score
    from matplotlib.ticker import StrMethodFormatter
    
    if len(confirmed) > 10:
        passed_inflection_point = False

        # plot the actual cases reported in the database as blue dots
        ax.plot(np.arange(0, len(confirmed)), confirmed.cumsum(), ".", color='#0028bb', alpha=0.8, markersize=markersize, label="Reported cases")

        # Create the x values for the fit function: number of days since the first infection
        # The y-values are the cumulative number of cases: confirmed.cumsum()
        x = np.arange(0, len(confirmed))
        # Set the initial parameters for the fit function
        # a = current maximum value
        # b = 1
        # c = 0.1
        # d = half the number of days with new infections for the selected country
        p0 = [confirmed.values.sum(), 1, 0.5, len(confirmed)//2]
        myfunction = logarithmic_curve
        try:
            # actual fit to the logistic function
            logistic_params, covariance = curve_fit(myfunction, x, confirmed.cumsum().values, p0=p0)
            a, b, c, d = logistic_params
        except Exception as e1:
            print(e1)
            try:
                myfunction = exp_curve
                p0 = [1.0, 1.0]
                d = 0
                logistic_params, covariance = curve_fit(myfunction, x, confirmed.cumsum().values, p0=p0)
                a, b = logistic_params
            except Exception as e2:
                print(e2)
                return 0
        else:

            # Detect if the inflection point has been reached
            if d > 0 and d < len(confirmed):
                passed_inflection_point = True
            else:
                days_to_inflection = int(d)-len(confirmed)+1
                plural = f"{'s' if days_to_inflection > 1 else ''}"

            # Calculate how good is the fit
            confirmed_pred = myfunction(np.arange(0, len(confirmed)), *logistic_params)
            r2 = r2_score(confirmed.cumsum().values, confirmed_pred)

            # Plot R2 rendering LaTeX on the plot
            ax.text(0.2, 0.3, f"$R^2$={r2:.4f}", transform=ax.transAxes)

            # calculate when the max number of cases increases so little new cases are not increasing
            # this provides a slightly more accurate value than simply doubling the days of the inflection point
            # uncomment the following lines if you want to see the difference
            """
            days_pred = np.arange(len(confirmed)-(len(confirmed)%15), 360, 15)
            ypred = logarithmic_curve(days_pred, *logistic_params)

            diff_ypred = [ypred[i + 1] - ypred[i] for i in range(len(ypred)-1)]
            days_diff = days_pred[np.argmax(diff_ypred < np.max(diff_ypred)*0.000001)] - len(confirmed)

            x = np.arange(0, days_pred[np.argmax(diff_ypred < np.max(diff_ypred)*0.000001)])
            y = logarithmic_curve(x, *logistic_params)
            """
            # Calculate the whole curve using the double of days than the inflection point
            # Comment the next 3 lines if you are testing the calculation above
            x = np.arange(0, np.ceil(d*2))
            y = myfunction(x, *logistic_params)
            if myfunction == logarithmic_curve:
                y_inflection_point = myfunction(int(d), *logistic_params)
                days_diff = int(d) - len(confirmed)
                days_to_asymptote = int(d*2) - len(confirmed)

            if not len(y) > 0:
                y = [0]
            # plot a vertical black dotted line indicating the end of the actual data
            ax.axvline(len(confirmed), color='k', ls=':', label=f"Today (day {len(confirmed)})")
            text_x = len(confirmed)+4
            text_y = 0.05*max(max(y), confirmed.sum())

            # generate the inflection point label
            inflection_point_label = f"Inflection point{' in ' if not passed_inflection_point else ' '}{abs(days_diff+1 )} days{' ago' if passed_inflection_point else ''}"
            # plot a vertical red dotted line indicating the location of the inflection point as a reference
            ax.axvline(int(d), color='r', ls=':', label=inflection_point_label)

            # annotate the inflection point as a reference
            # define the location of the annotation text
            xytext = (int(d)*1.2, y_inflection_point*1.25) if not passed_inflection_point else (int(d)*.7, y_inflection_point*1.25)
            # define the text box color and transparency
            props = dict(facecolor='white', alpha=0.8, edgecolor='white')
            ax.annotate(f"{int(d)} days", xy=(int(d), y_inflection_point), xycoords='data',
                        xytext=xytext,  textcoords='data',
                        arrowprops=dict(arrowstyle="-|>",
                                  connectionstyle=f"arc3,rad={0.2 if passed_inflection_point else -0.2}", fc='r'),
                        bbox=props
                       )
            # plot the fitted curve in orange
            ax.plot(x, y, color='#ff5900', lw=2, label="Fitted logistic function")

            # plot the predicted maximum number of cases and the number of days until then
            if confirmed.cumsum().values[-1] < y[-1]:
                text_x = 0.4
                text_y = 0.02
                ax.text(text_x, text_y, f"Cases in {abs(days_to_asymptote)} days from today: {int(y[-1]):,}", 
                        transform=ax.transAxes, bbox=dict(facecolor='white', alpha=0.8, edgecolor='white'))
#                 ax.ticklabel_format(axis='y', style='', scilimits=None, useOffset=None, useLocale=None, useMathText=None)
            ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}')) # comma separated numbers, no decimal places

            ax.fill_between(x[len(confirmed):], confirmed.cumsum().values[-1], y[len(confirmed):],
                             facecolor="none", hatch="/", edgecolor=f"{'b' if passed_inflection_point else 'r'}", linewidth=0.0,
                             label=f"Predicted increase")
    
    ax.set_xlabel('Days since first infection')
    ax.legend(loc='upper left')
    ax.set_title('Evolution (prediction)')
    ax.grid(which='major', axis='y', alpha=0.7, linewidth=0.7)
    
    return ax

# Putting all plots together

In [None]:
def covid_graphs(selectedcountry, threshold=10, image_width=6, show_deaths=False, save_figure=False):
    nrows = 2
    ncols = 2
    markersize = 10
    rolling_windows_days = 15
    fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(image_width*ncols, image_width*nrows))
    fig.tight_layout(pad=3.0)
#     confirmed = df[(df['cases'] >= 1) & (df[country]==selectedcountry)]['cases']
    confirmed = df[(df['cases'] >= 1) & (df['geoId']==selectedcountry)].groupby('dateRep')['cases'].sum()
    confirmed_daily = confirmed.values
    confirmed_cumsum = confirmed.cumsum().values
    deaths = df[(df['cases'] >= 1) & (df[country]==selectedcountry)]['deaths']
    population = df[df[country] == selectedcountry][population_column].values[-1]
    
    if confirmed_daily.sum() < 10:
        # return empty plot if there are less than 10 total cases for the selected country
        return 0
#     x_infections = 0.6
#     y_infections = 0.03
#     if confirmed_daily[-1]/max(confirmed_daily) < 0.5:
#         y_infections = 0.93
#     if show_deaths:
#         axs[0, 0].plot(np.arange(0, len(deaths)), deaths.values, color='#c9c9c9', label='daily deaths')
#         axs[0, 0].text(0.65, max(deaths)/max(confirmed_daily), f"Deaths: {deaths.sum()}", transform=axs[0, 0].transAxes)
#         axs[0, 1].bar(np.arange(0, len(deaths)), deaths.cumsum().values, color='#c9c9c9', label='cumulative deaths')
#         axs[0, 1].text(0.65, 0.9, f"Deaths: {deaths.sum()}", transform=axs[0, 1].transAxes)
#         if confirmed_daily[-1]/max(confirmed_daily) > 0.7:
#             x_infections = 0.05
#             y_infections = 0.5
#         else:
#             y_infections = 0.93
            
            
    # 1st graph: new cases and accummulated cases in linear y-scale
    axs[0, 0] = plot_new_cases(confirmed, population, deaths, axs[0,0], show_deaths=show_deaths)
    
    # 2nd graph: new cases and accummulated cases in logarithmic y-scale
    axs[0, 1] = plot_new_cases(confirmed, population, deaths, axs[0,1], log_scale=True, show_deaths=show_deaths)

    # 3rd graph: growth factor evolution 
    date_first_infection = df[(df['geoId'] == selectedcountry) & (df['cases'] >= 1)]['dateRep'].min()
    axs[1, 0] = plot_growth_factor(confirmed, population, date_first_infection, axs[1, 0])
    
    # 4th graph: disease evolution prediction based on fit to logistic curve
    axs[1, 1] = plot_prediction(confirmed, population, date_first_infection, axs[1, 1],markersize=8)
    
    country_name = df[df[country] == selectedcountry]['countriesAndTerritories'].values[0]
    date_str = today.strftime("%B %d, %Y")
    plt.suptitle(f"COVID-19 evolution in {country_name} (pop. {population:,.0f})\n{date_str}", y=1.025, size=15)
  
    if save_figure:
        filename = f"{selectedcountry}_{today.year}-{today.month:02d}-{today.day:02d}_{uuid.uuid4().hex}.png"
        destination_folder = os.path.expanduser(r"~\Documents\COVID-19")
        if not os.path.isdir(destination_folder):
            try:
                os.mkdir(destination_folder)
            except Exception as e:
                print(e)
        plt.savefig(os.path.join(destination_folder, filename),
                    dpi=300, bbox_inches='tight')
        print(f"file saved as: {os.path.join(destination_folder, filename)}")

In [None]:
countries = df.loc[df[df['cases'] >= 1].index,['countriesAndTerritories', 'geoId', 'cases']].groupby(['countriesAndTerritories', country], as_index=False)['cases'].sum().values

countries = countries[countries[:,2].argsort()[::-1]]

countries = [(f"{a} ({c:,})", b) for a, b, c in countries]

%matplotlib inline
style = {'description_width': 'initial'}
output = widgets.interact(covid_graphs,
                          selectedcountry=widgets.Dropdown(options=countries,
                                                           value=countries[0][1],
                                                           description='Country (total):',
                                                           style=style),
                          threshold=widgets.IntSlider(10, 5, 500, 5), 
                          image_width=widgets.IntSlider(6, 3, 10, 1),
                          show_deaths=widgets.Checkbox(False, disabled=False),
                          save_figure=widgets.Checkbox(False, disabled=False))

# Evolution by million people

In [None]:
import matplotlib.dates as mdates

bypopulation = True

quarter = mdates.MonthLocator(interval=2)
months = mdates.MonthLocator()  # every month
weeks = mdates.DayLocator(interval=15)

number_of_countries=16
casesbycountry = [(c, df[df[country] == c]['cases'].sum()) for c in df[country][::-1].unique()]
# growthratebycountry = [(c, (df[df[country] == c]['cases']/df[df[country] == c]['cases'].shift()).values[-1]) for c in df[country][::-1].unique() if not np.isnan((df[df[country] == c]['cases']/df[df[country] == c]['cases'].shift()).values[-1])]
cases_in_relevant_countries = sorted(casesbycountry,  key = lambda x: x[1], reverse=True)[:number_of_countries]
# growth_rate_in_relevant_countries = sorted(growth_rate.items(), key=lambda x: x[1], reverse=True)[:number_of_countries]

relevant_countries = [x[0] for x in cases_in_relevant_countries]
# relevant_countries = [x[0] for x in growth_rate_in_relevant_countries]
relevant_countries
max_yaxis = max(cases_in_relevant_countries)[1]

rolling_windows_days = 15

nrows = ncols = int(np.ceil(np.sqrt(len(relevant_countries))))
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*5, nrows*5), sharey='row')
fig.tight_layout(pad=3.0)
magenta = '#7a09ab'
myblue='#0300a0'
for row in range(nrows):
    for col in range(ncols):
        if row*nrows+col < len(relevant_countries):
            c = relevant_countries[row*nrows+col]
            if bypopulation:
                axs[row, col].plot(df[(df['cases'] >= 1) & (df[country]== 'CN')]['dateRep'], df[(df['cases'] >= 1) & (df[country]== 'CN')]['cases'].rolling(window=rolling_windows_days).mean()/df[(df['cases'] >= 1) & (df[country]== 'CN')]['popData2019']*1e6, linewidth=1, color='#000000', label=f"{'China' if c != 'CN' else ''}")                               
#                 axs[row, col].fill_between(df[(df['cases'] >= 1) & (df[country]== 'CN')]['dateRep'], df[(df['cases'] >= 1) & (df[country]== 'CN')]['cases'].rolling(window=rolling_windows_days).mean()/df[(df['cases'] >= 1) & (df[country]== 'CN')]['popData2019']*1e6, hatch="/", facecolor="none", edgecolor='#000000')
                axs[row, col].plot(df[(df['cases'] >= 1) & (df[country]== 'ES')]['dateRep'], df[(df['cases'] >= 1) & (df[country]== 'ES')]['cases'].rolling(window=rolling_windows_days).mean()/df[(df['cases'] >= 1) & (df[country]== 'ES')]['popData2019']*1e6, linewidth=1, color='#7c7c7c', ls="-.", label=f"{'Spain' if c != 'ES' else ''}")
                axs[row, col].plot(df[(df['cases'] >= 1) & (df[country]== 'US')]['dateRep'], df[(df['cases'] >= 1) & (df[country]== 'US')]['cases'].rolling(window=rolling_windows_days).mean()/df[(df['cases'] >= 1) & (df[country]== 'US')]['popData2019']*1e6, linewidth=1, color='#8c8c8c', ls=":", label=f"{'USA' if c != 'US' else ''}")
                axs[row, col].plot(df[(df['cases'] >= 1) & (df[country]== 'BR')]['dateRep'], df[(df['cases'] >= 1) & (df[country]== 'BR')]['cases'].rolling(window=rolling_windows_days).mean()/df[(df['cases'] >= 1) & (df[country]== 'BR')]['popData2019']*1e6, linewidth=1, color='#9c9c9c', ls="--", label=f"{'Brazil' if c != 'BR' else ''}")    
                axs[row, col].plot(df[(df['cases'] >= 1) & (df[country]== c)]['dateRep'], df[(df['cases'] >= 1) & (df[country]== c)]['cases'].rolling(window=rolling_windows_days).mean()/df[(df['cases'] >= 1) & (df[country]== c)]['popData2019']*1e6, linewidth=3, color='#c0000a', label=f"{df[df[country]==c]['countriesAndTerritories'].values[0].replace('_', ' ') if c != 'US' else 'USA'}")
            else:
                axs[row, col].plot(df[(df['cases'] >= 1) & (df[country]== 'CN')]['dateRep'], df[(df['cases'] >= 1) & (df[country]== 'CN')]['cases'].rolling(window=rolling_windows_days).mean(), linewidth=1, color='#6c6c6c', label=f"{'China' if c != 'CN' else ''}")                               
                axs[row, col].plot(df[(df['cases'] >= 1) & (df[country]== 'ES')]['dateRep'], df[(df['cases'] >= 1) & (df[country]== 'ES')]['cases'].rolling(window=rolling_windows_days).mean(), linewidth=1, color='#7c7c7c', ls="-.", label=f"{'Spain' if c != 'ES' else ''}")
                axs[row, col].plot(df[(df['cases'] >= 1) & (df[country]== 'US')]['dateRep'], df[(df['cases'] >= 1) & (df[country]== 'US')]['cases'].rolling(window=rolling_windows_days).mean(), linewidth=1, color='#8c8c8c', ls=":", label=f"{'USA' if c != 'US' else ''}")
                axs[row, col].plot(df[(df['cases'] >= 1) & (df[country]== 'BR')]['dateRep'], df[(df['cases'] >= 1) & (df[country]== 'BR')]['cases'].rolling(window=rolling_windows_days).mean(), linewidth=1, color='#9c9c9c', ls="--", label=f"{'Brazil' if c != 'BR' else ''}")    
                axs[row, col].plot(df[(df['cases'] >= 1) & (df[country]== c)]['dateRep'], df[(df['cases'] >= 1) & (df[country]== c)]['cases'].rolling(window=rolling_windows_days).mean(), linewidth=3, color='#c0000a', label=f"{df[df[country]==c]['countriesAndTerritories'].values[0].replace('_', ' ') if c != 'US' else 'USA'}")

            axs[row, col].grid(axis='y', alpha=0.5)
            axs[row, col].legend()
            # format the ticks
            axs[row, col].xaxis.set_major_locator(months)
            axs[row, col].xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
            axs[row, col].xaxis.set_minor_formatter(mdates.DateFormatter('%b %Y'))
                               
# fig.text(0.5, -0.005, 'Days since 1st case', ha='center', fontsize=12)
fig.text(-0.01, 0.5, 'Daily cases per million people', va='center', rotation='vertical', fontsize=12)
plt.suptitle("COVID-19 cases evolution by country", y=1.01, fontsize=16)

# filename = f"Country_comparison_{today.year}-{today.month:02d}-{today.day:02d}_{uuid.uuid4().hex}.png"
filename = f"Country_comparison_1Mpeople_{today.year}-{today.month:02d}-{today.day:02d}.png"
destination_folder = os.path.expanduser(r"~\Documents\COVID-19")
# plt.savefig(os.path.join(destination_folder, filename),dpi=300, bbox_inches='tight')
# print(f"File saved as: {os.path.join(destination_folder, filename)}")

# Analysis by continent

In [None]:
threshold=10
image_width=6
save_figure=False
nrows = 1
ncols = 2
markersize = 10
rolling_windows_days = 15

for selectedcontinent in df['continentExp'].unique():
    population = df[df['continentExp'] == selectedcontinent]['popData2019'].unique().sum()
    if population > 0:
        fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(image_width*ncols, image_width*nrows))
        fig.tight_layout(pad=3.0)
        confirmed = df[(df['cases'] >= 1) & (df['continentExp']==selectedcontinent)].groupby('dateRep')['cases'].sum()
        confirmed_daily = confirmed
        confirmed_cumsum = confirmed.cumsum()
        deaths = df[(df['cases'] >= 1) & (df['continentExp']==selectedcontinent)].groupby('dateRep')['deaths'].sum()


        x_infections = 0.6
        y_infections = 0.03
        if confirmed_daily.values[-1]/max(confirmed_daily.values) < 0.5:
            y_infections = 0.93


        # 1st graph: new cases and accummulated cases in linear y-scale
        axs[0] = plot_new_cases(confirmed, population, deaths, axs[0], log_scale=False, rolling_windows_days=rolling_windows_days,
                                   image_width=image_width, markersize=markersize)
        country_colors = ['#383838', '#b0b0b0']
        for i, selectedcountry in enumerate(df[df['continentExp'] == selectedcontinent].groupby('geoId')['cases'].sum().nlargest(2).index):
            confirmed_country = df[(df['cases'] >= 1) & (df['geoId']==selectedcountry)]['cases']
            country_name = df[df['geoId']==selectedcountry]['countriesAndTerritories'].values[0].replace('_', ' ')
            if len(country_name) > 13:
                country_name = selectedcountry
    #         deaths_country = df[(df['cases'] >= 1) & (df['geoId']==selectedcountry)]['deaths']
    #         population_country = df[df['geoId'] == selectedcountry]['popData2019'].values[-1]
            axs[0].plot(np.arange(0, len(confirmed_country)), confirmed_country.rolling(rolling_windows_days).mean(),
                        color=country_colors[i], lw=1)
            axs[0].text(len(confirmed_country), confirmed_country.rolling(rolling_windows_days).mean().values[-1], f"{country_name}", fontsize=10)
            axs[0].fill_between(np.arange(0, len(confirmed_country)), 0, confirmed_country.rolling(rolling_windows_days).mean(),
                               color=country_colors[i], alpha=0.2)

        # 2nd graph:  graph: disease evolution prediction based on fit to logistic curve
        date_first_infection = df[(df['continentExp'] == selectedcontinent) & (df['cases'] >= 1)]['dateRep'].min()  
        axs[1] = plot_prediction(confirmed, population, date_first_infection, axs[1], rolling_windows_days=15, image_width=6, markersize=8)

        date_str = today.strftime("%B %d, %Y")
        plt.suptitle(f"COVID-19 evolution in {selectedcontinent} (pop. {population:,.0f})\n{date_str}", y=1.025, size=15)

        if save_figure:
            filename = f"{selectedcontinent}_{today.year}-{today.month:02d}-{today.day:02d}.png"
            destination_folder = os.path.expanduser(r"~\Documents\COVID-19")
            if not os.path.isdir(destination_folder):
                try:
                    os.mkdir(destination_folder)
                except Exception as e:
                    print(e)
            plt.savefig(os.path.join(destination_folder, filename),
                        dpi=300, bbox_inches='tight')
            print(f"file saved as: {os.path.join(destination_folder, filename)}")

# Growth factor in different countries

In [None]:
# Only display countries with at least 15,000 cases:
np.seterr(invalid='ignore')

min_number_cases = 15000
growth_rates = []
rolling_windows_days = 15
for selectedcountry in df[country].unique():
    confirmed = df[(df['cases'] >= 1) & (df[country]==selectedcountry)]['cases']
    if len(confirmed) > 1:
        confirmed_daily = confirmed.values
        confirmed_cumsum = confirmed.cumsum().values
        growth_rate = confirmed_daily/df[(df[country] == selectedcountry) & (df['cases'] >= 1)]['cases'].shift().values
        growth_ratedf = df[(df[country] == selectedcountry) & (df['cases'] >= 1)]['cases']/df[(df[country] == selectedcountry) & (df['cases'] >= 1)]['cases'].shift()
        rolling_mean = growth_ratedf.rolling(window=rolling_windows_days).mean()
        if confirmed_cumsum[-1] > min_number_cases and len(growth_rate > 3) and len(rolling_mean.values) > 1 and rolling_mean.values[-1] > 1.0:
            growth_rates.append((selectedcountry, growth_rate[-1], rolling_mean.values[-1]))

growth_rates.sort(key = lambda x: x[2], reverse=True)
num_countries = 16
relevant_countries = np.array(growth_rates)[:,0].tolist()[:num_countries]
nrows = ncols = int(np.ceil(len(relevant_countries)**0.5))


image_width = 6
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(image_width*ncols, image_width*nrows))
fig.tight_layout(pad=3.0)

df.fillna(0, inplace=True)
for i, selectedcountry in enumerate(relevant_countries):
    from scipy import stats
    countrydf = df[(df['cases'] >= 1) & (df[country]==selectedcountry)]
    confirmed = countrydf['cases']
    
    confirmed_daily = confirmed.values
    confirmed_cumsum = confirmed.cumsum().values
    deaths = countrydf['deaths']
    growth_rate = confirmed_daily/confirmed.shift().values

    growth_ratedf = confirmed/confirmed.shift()
    rolling_mean = growth_ratedf.rolling(window=rolling_windows_days).mean()
    
    lr = LinearRegression()
    days_gr = [(df.loc[idx, datecol] - countrydf['dateRep'].min()).days for idx in countrydf.index]
    days_gr = np.array(days_gr)
    lr.fit(days_gr[1:].reshape(-1,1), np.clip(growth_rate[1:], 0, np.mean(growth_rate[1:])*2*np.std(growth_rate[1:])).reshape(-1,1))
    growth_trend = lr.predict(days_gr[1:].reshape(-1,1))
    
    axs[i//nrows, (i%nrows)%ncols].plot(np.arange(0, len(rolling_mean.values)), rolling_mean.values, color='#fc5e03', linewidth=2, label=f'{rolling_windows_days}-day average')
    axs[i//nrows, (i%nrows)%ncols].plot(np.arange(0, len(days_gr[1:])), growth_trend, color='#0000ff', label='trend')
    axs[i//nrows, (i%nrows)%ncols].axhline(1.0, color='#000000', ls="--", lw=1, label='No growth')

#     if (9/(np.nanmean(growth_rate)+2*np.nanstd(growth_rate))) < 0.85:
#         y_text = 0.85 - (9/(np.nanmean(growth_rate)+2*np.nanstd(growth_rate)))
#     else:
#         y_text = 0.85

#     axs[i//nrows, (i%nrows)%ncols].text(-0.5, y_text, "No growth", size=9)
    axs[i//nrows, (i%nrows)%ncols].text(.67, 0.82, f"Current = {growth_rate[-1]:.2f}", size=10, transform=axs[i//nrows, (i%nrows)%ncols].transAxes)
    axs[i//nrows, (i%nrows)%ncols].text(.67, 0.79, f"{rolling_windows_days}-day avg= {rolling_mean.values[-1]:.2f}", size=10, transform=axs[i//nrows, (i%nrows)%ncols].transAxes)
    
    if round(rolling_mean.values[-1], 2) > 1/np.log10(2):
        axs[i//nrows, (i%nrows)%ncols].text(.62, 0.76, f"Affected double every day", size=10, transform=axs[i//nrows, (i%nrows)%ncols].transAxes) 
    elif round(rolling_mean.values[-1], 2) > 1:
        axs[i//nrows, (i%nrows)%ncols].text(.57, 0.76, f"Affected double every {np.log10(2)/np.log10(rolling_mean.values[-1]):.0f} day{'s' if np.log10(2)/np.log10(rolling_mean.values[-1]) > 1 else ''}", size=10, transform=axs[i//nrows, (i%nrows)%ncols].transAxes)
    
    axs[i//nrows, (i%nrows)%ncols].set_xticks(np.arange(0, len(confirmed), 10))
#     axs[i//nrows, (i%nrows)%ncols].set_xlabel('Days since first infection')
    
    axs[i//nrows, (i%nrows)%ncols].set_ylim(bottom=0, top=min(np.nanmean(growth_rate)*3, np.nanmean(growth_rate)+2*np.nanstd(growth_rate)))
    axs[i//nrows, (i%nrows)%ncols].legend(loc='upper right')
    kountry = df[df[country]==selectedcountry]['countriesAndTerritories'].unique()[-1]
    axs[i//nrows, (i%nrows)%ncols].set_title(f'{kountry} ({confirmed_cumsum[-1]:,.0f} total infected)')
    axs[i//nrows, (i%nrows)%ncols].grid(which='major', axis='y', alpha=0.7, linewidth=0.7)
    
for i in range(len(relevant_countries), nrows**2):
    fig.delaxes(axs[i//nrows, (i%nrows)%ncols])
                                            
                                            
plt.suptitle(f"Growth factor: Top {num_countries} countries", y=1.01, size=15)                          
fig.text(0.4, 0, "Days since first infection", va='center', fontsize=16)
plt.show()
# Uncomment the following lines to save the image on your hard disk                                            
#filename = f"Growth_rates_{today.year}-{today.month:02d}-{today.day:02d}_{uuid.uuid4().hex}.png"
# destination_folder = os.path.expanduser(r"~\Documents\COVID-19")
# plt.savefig(os.path.join(destination_folder, filename), dpi=300, bbox_inches='tight')
# print(f"file saved as: {os.path.join(destination_folder, filename)}")