In [None]:
# COVID-19 infections per country
# Copyright 2020 Denis Meyer
#
# Data source:
# 2019 Novel Coronavirus COVID-19 (2019-nCoV) Data Repository by Johns Hopkins CSSE
# * https://github.com/CSSEGISandData/COVID-19
#
# Plot options
# * All countries
# * Countries with highest infection rates
# * Countries with highest number of deaths
# * Specific countries (infections)
# * Specific countries (deaths)
# * Curve fit (infections) for a specific country
# * Curve fit (deaths) for a specific country
# * Multi curve fit (infections) for a specific country
# * Multi curve fit (deaths) for a specific country
# * Curve fit (infections) for multiple countries
# * Curve fit (deaths) for multiple countries
#
# Misc options
# * Option for displaying deaths per country inside the graphs

In [None]:
import logging
import io
import requests
import os
import datetime
import random

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import scipy.optimize
import numpy as np

from heapq import nlargest

In [None]:
# Settings

### Data source ###
# 2019 Novel Coronavirus COVID-19 (2019-nCoV) Data Repository by Johns Hopkins CSSE
# https://github.com/CSSEGISandData/COVID-19
SETTINGS_DATASOURCE = {
    'infections': 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv',
    'deaths': 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
}

### Misc settings ###

FORCE_PLOT_ALL = False

SETTINGS = {
    # Ignores file cache if 'True', tries to load data for the current day from file cache otherwise
    'force_refresh_data': False,
    # Ignores all plotting flags and plots (except "all countries")
    'force_plot': FORCE_PLOT_ALL,
    # Ignores all plotting deaths flags and plots the deaths into the plot where available (except "all countries")
    'force_plot_deaths': FORCE_PLOT_ALL,
    # Ignores all flags "..._SAVE_PLOT_TO_FILE" and saves to file (except "all countries") - plotting must be activated
    'force_save_plot_to_file': FORCE_PLOT_ALL,
    # Image path to save the created plot images to (relative to the current directory)
    'plot_image_path': 'images',
    # Directory names
    'csv_subdir_name': 'data',
    'csv_infections_subdir_name': 'infections',
    'csv_deaths_subdir_name': 'deaths',
    # Cache file name
    'csv_infections_filename': 'time_series_19-covid-Confirmed-{}.csv',
    'csv_deaths_filename': 'time_series_19-covid-Deaths-{}.csv',
    # Plot configuration
    'plot': {
        'size': (20, 15),
        'title': 'COVID-19 infections per country',
        'label_x': 'Date',
        'label_y': 'Nr of infections',
        'label_x_deaths': 'Date',
        'label_y_deaths': 'Nr of deaths'
    },
    # Logging configuration
    'logging': {
        'loglevel': logging.INFO,
        'date_format': '%d-%m-%Y %H:%M:%S',
        'format': '[%(asctime)s] [%(levelname)-5s] [%(module)-20s:%(lineno)-4s] %(message)s'
    }
}


### Data plotting settings ###

# Plot: Infections, all countries
SETTINGS_PLOT_ALL_COUNTRIES = {
    # May take some time to plot
    'plot': False,
    # Boolean flag whether to save the plot to file
    'save_to_file': False,
    # The file name
    'filename': 'All-Countries.png',
    # Plot start day
    'start_day': -1,
    # Plot end day, use a number <= 0 to plot til last day
    'end_day': -1
}

# Plot: Infections and deaths, specific countries
SETTINGS_PLOT_SPECIFIC_COUNTRIES_INFECTIONS = {
    # May take some time to plot
    'plot': False,
    # Boolean flag whether to save the plot to file
    'save_to_file': False,
    # The file name
    'filename': 'Specific-Countries-Infections-{}.png',
    # Plot start day
    'start_day': 45,
    # Plot end day, use a number <= 0 to plot til last day
    'end_day': -1,
    # Boolean flag whether to plot deaths
    'plot_deaths': True,
    # List of countries
    'countries': ['Germany', 'Spain', 'Iran', 'US', 'France', 'Korea, South', 'Switzerland', 'United Kingdom']
}

# Plot: Deaths, specific countries
SETTINGS_PLOT_SPECIFIC_COUNTRIES_DEATHS = {
    # May take some time to plot
    'plot': False,
    # Boolean flag whether to save the plot to file
    'save_to_file': False,
    # The file name
    'filename': 'Specific-Countries-Deaths-{}.png',
    # Plot start day
    'start_day': 45,
    # Plot end day, use a number <= 0 to plot til last day
    'end_day': -1,
    # List of countries
    'countries': ['Germany', 'Spain', 'Iran', 'US', 'France', 'Korea, South', 'Switzerland', 'United Kingdom']
}

# Plot: Infections and deaths, countries with highest number of infections
SETTINGS_PLOT_HIGHEST_INFECTIONS_COUNTRIES = {
    # May take some time to plot
    'plot': False,
    # Boolean flag whether to save the plot to file
    'save_to_file': False,
    # The file name
    'filename': '{}-Countries-With-Highest-Number-Of-Infections.png',
    # Plot start day
    'start_day': 46,
    # Plot end day, use a number <= 0 to plot til last day
    'end_day': -1,
    # Boolean flag whether to plot deaths
    'plot_deaths': True,
    # Number of countries to plot
    'nr_countries': 10
}

# Plot: Deaths, countries with highest number of deaths
SETTINGS_PLOT_HIGHEST_DEATHS_COUNTRIES = {
    # May take some time to plot
    'plot': False,
    # Boolean flag whether to save the plot to file
    'save_to_file': False,
    # The file name
    'filename': '{}-Countries-With-Highest-Number-Of-Deaths.png',
    # Plot start day
    'start_day': 40,
    # Plot end day, use a number <= 0 to plot til last day
    'end_day': -1,
    # Number of countries to plot
    'nr_countries': 10
}

# Plot: Infections and deaths, curve fit for a specific country
SETTINGS_PLOT_CURVE_FIT_INFECTIONS = {
    # May take some time to plot
    'plot': False,
    # Boolean flag whether to save the plot to file
    'save_to_file': False,
    # The file name
    'filename': 'Curve-Fit-Infections-{}.png',
    # Plot start day
    'start_day': 40,
    # Plot end day, use a number <= 0 to plot til last day
    'end_day': -1,
    # If prediction for 'predict_days' days should be calculated. 'end_day' will be ignored, deaths not plotted.
    'predict': True,
    # Prediction days
    'predict_days': 4,
    # Boolean flag whether to plot deaths
    'plot_deaths': True,
    # The country to plot
    'country': 'Germany',
    # Fitting data for start and end day
    'start_day_fit': 63,
    'end_day_fit': 71,
    # For debugging and parameter tweaking purposes: Activate to plot only the data in the full range
    'raw_data_only': False,
    # Boolean flag whether to plot days as x-label instead of dates
    'plot_days_as_label_x': False
}

# Plot: Deaths, curve fit for a specific country
SETTINGS_PLOT_CURVE_FIT_DEATHS = {
    # May take some time to plot
    'plot': False,
    # Boolean flag whether to save the plot to file
    'save_to_file': False,
    # The file name
    'filename': 'Curve-Fit-Deaths-{}.png',
    # Plot start day
    'start_day': 40,
    # Plot end day, use a number <= 0 to plot til last day
    'end_day': -1,
    # If prediction for 'predict_days' days should be calculated. 'end_day' will be ignored.
    'predict': True,
    # Prediction days
    'predict_days': 4,
    # The country to plot
    'country': 'Germany',
    # Fitting data for start and end day
    'start_day_fit': 63,
    'end_day_fit': 71,
    # For debugging and parameter tweaking purposes: Activate to plot only the data in the full range
    'raw_data_only': False,
    # Boolean flag whether to plot days as x-label instead of dates
    'plot_days_as_label_x': False
}

# Plot: Infections and deaths, multi curve fit for a specific country
SETTINGS_MULTI_CURVE_FIT_INFECTIONS = {
    # May take some time to plot
    'plot': False,
    # Boolean flag whether to save the plot to file
    'save_to_file': False,
    # The file name
    'filename': 'Multi-Curve-Fit-Infections-{}.png',
    # Plot start day
    'start_day': 40,
    # Plot end day, use a number <= 0 to plot til last day
    'end_day': -1,
    # Boolean flag whether to plot deaths
    'plot_deaths': True,
    # The country to plot
    'country': 'Germany',
    # Fitting data
    'fits': [
        {
            'start_day': 40,
            'end_day': 50,
            'plot_start_day': 40,
            'plot_end_day': 52,
            'color': 'lightskyblue'
        },
        {
            'start_day': 51,
            'end_day': 58,
            'plot_start_day': 49,
            'plot_end_day': 64,
            'color': 'blue'#,
            #'fit_func': lambda x, a, b, c: a * np.exp(b * x)
        },
        {
            'start_day': 58,
            'end_day': 65,
            'plot_start_day': 56,
            'plot_end_day': 67,
            'color': 'steelblue'
        },
        {
            'start_day': 67,
            'end_day': 71,
            'plot_start_day': 65,
            'plot_end_day': -1,
            'color': 'slateblue'
        }
    ],
    # For debugging and parameter tweaking purposes: Activate to plot only the data in the full range
    'raw_data_only': False,
    # Boolean flag whether to plot days as x-label instead of dates
    'plot_days_as_label_x': False
}

# Plot: Deaths, multi curve fit for a specific country
SETTINGS_MULTI_CURVE_FIT_DEATHS = {
    # May take some time to plot
    'plot': False,
    # Boolean flag whether to save the plot to file
    'save_to_file': False,
    # The file name
    'filename': 'Multi-Curve-Fit-Deaths-{}.png',
    # Plot start day
    'start_day': 40,
    # Plot end day, use a number <= 0 to plot til last day
    'end_day': -1,
    # The country to plot
    'country': 'Germany',
    # Fitting data
    'fits': [
        {
            'start_day': 51,
            'end_day': 58,
            'plot_start_day': 47,
            'plot_end_day': 60,
            'color': 'blue'#,
            #'fit_func': lambda x, a, b, c: a * np.exp(b * x)
        },
        {
            'start_day': 62,
            'end_day': 70,
            'plot_start_day': 58,
            'plot_end_day': -1,
            'color': 'steelblue'
        }
    ],
    # For debugging and parameter tweaking purposes: Activate to plot only the data in the full range
    'raw_data_only': False,
    # Boolean flag whether to plot days as x-label instead of dates
    'plot_days_as_label_x': False
}

# Plot: Infections and deaths, curve fit for multiple countries
SETTINGS_CURVE_FIT_INFECTIONS_MULTI_COUNTRIES = {
    # May take some time to plot
    'plot': False,
    # Boolean flag whether to save the plot to file
    'save_to_file': False,
    # The file name
    'filename': 'Curve-Fit-Infections-{}.png',
    # Plot start day
    'start_day': 40,
    # Plot end day, use a number <= 0 to plot til last day
    'end_day': -1,
    # Boolean flag whether to plot deaths
    'plot_deaths': True,
    # Country and fitting data
    'countries': [
        {
            'name': 'Italy',
            'start_day': 63,
            'end_day': 71,
            'color': 'tomato'
        },
        {
            'name': 'US',
            'start_day': 63,
            'end_day': 71,
            'color': 'seagreen'
        },
        {
            'name': 'Spain',
            'start_day': 63,
            'end_day': 71,
            'color': 'gold'
        },
        {
            'name': 'Germany',
            'start_day': 63,
            'end_day': 71,
            'color': 'lightskyblue'#,
            #'fit_func': lambda x, a, b, c: a * np.exp(b * x)
        }
    ],
    # For debugging and parameter tweaking purposes: Activate to plot only the data in the full range
    'raw_data_only': False,
    # Boolean flag whether to plot days as x-label instead of dates
    'plot_days_as_label_x': False
}

# Plot: Deaths, curve fit for multiple countries
SETTINGS_CURVE_FIT_DEATHS_MULTI_COUNTRIES = {
    # May take some time to plot
    'plot': False,
    # Boolean flag whether to save the plot to file
    'save_to_file': False,
    # The file name
    'filename': 'Curve-Fit-Deaths-{}.png',
    # Plot start day
    'start_day': 40,
    # Plot end day, use a number <= 0 to plot til last day
    'end_day': -1,
    # Country and fitting data
    'countries': [
        {
            'name': 'Italy',
            'start_day': 63,
            'end_day': 71,
            'color': 'tomato'
        },
        {
            'name': 'US',
            'start_day': 63,
            'end_day': 71,
            'color': 'seagreen'
        },
        {
            'name': 'Spain',
            'start_day': 63,
            'end_day': 71,
            'color': 'gold'
        },
        {
            'name': 'Germany',
            'start_day': 63,
            'end_day': 71,
            'color': 'lightskyblue'#,
            #'fit_func': lambda x, a, b, c: a * np.exp(b * x)
        }
    ],
    # For debugging and parameter tweaking purposes: Activate to plot only the data in the full range
    'raw_data_only': False,
    # Boolean flag whether to plot days as x-label instead of dates
    'plot_days_as_label_x': False
}

In [None]:
def initialize_logger(loglevel, frmt, datefmt):
    '''Initializes the logger
    
    :param loglevel: The log level
    :param frmt: The log format
    :param datefmt: The date format
    '''
    logging.basicConfig(level=loglevel,
                        format=frmt,
                        datefmt=datefmt)

def download_csv_data(url):
    '''Downloads the data
    
    :param url: The data source URL
    :return: CSV data
    '''
    if not url:
        return None

    s = requests.get(url).content
    return pd.read_csv(io.StringIO(s.decode('utf-8')))

def get_data(dir_csv, csv_subpath, filename_csv, url, force_refresh_data=False):
    '''Retrieves the data, either from file or download

    :param dir_csv: The CSV directory
    :param csv_subpath: The CSV sub-directory
    :param filename_csv: The CSV filename
    :param url: The URL
    :param force_refresh_data: Boolean whether to force refreshing the data
    :return: Dataframe
    '''
    df = None

    path_data = os.path.join(dir_csv, csv_subpath)
    if not os.path.exists(path_data):
        os.makedirs(path_data)
    csv_file = os.path.join(path_data, filename_csv)

    file_loaded = False
    try:
        if not force_refresh_data:
            logging.info('Not force refreshing data')
            logging.info('Trying to load from file "{}"'.format(csv_file))
            df = pd.read_csv('{}'.format(csv_file), encoding='utf-8')
            file_loaded = True
            logging.info('Successfully loaded data from file "{}"'.format(csv_file))
        else:
            logging.info('Force refreshing data')
    except FileNotFoundError:
        df = None
    if not file_loaded:
        logging.info('Downloading fresh data from "{}"...'.format(url))
        df = download_csv_data(url)
        logging.info('Trying to save to file "{}"'.format(csv_file))
        df.to_csv('{}'.format(csv_file), encoding='utf-8', index=False)
        logging.info('Successfully saved to file "{}"'.format(csv_file))

    return df

def get_clean_image_name(name):
    '''Returns a clean image name
    
    :param name: The image name
    :return: Cleaned image name
    '''
    return name.replace(',', '-').replace(' ', '')

def save_plot(curr_dir, fig, path, date, name):
    '''Saves the plot of the fig to "<current_dir>/<name>"

    :param curr_dir: The directory
    :param fig: The figure
    :param path: The image path
    :param date: The date
    :param name: The name of the image
    :return: True if successfully saved, False else
    '''
    try:
        path_data = os.path.join(curr_dir, path if path else 'images', str(date.date()) if date else 'unknown')
        if not os.path.exists(path_data):
            os.makedirs(path_data)
        full_path = os.path.join(path_data, get_clean_image_name(name))
        logging.info('Saving plot to "{}"'.format(full_path))
        fig.savefig(full_path)
        return True
    except Exception as e:
        logging.info('Could not save plot to file "{}" in path "{}": {}'.format(name, path, e))
        return False

def func_fit(x, a, b, c=1.0):
    '''Curve fitting fitting function
    
    :param x: x
    :param a: a
    :param b: b
    :param c: c
    :return: Function applied to parameters
    '''
    return np.exp(a + b * x)

def func_sigma(y):
    '''Curve fitting error function
    
    :param y: y
    :return: Error for y
    '''
    return np.sqrt(y)

def get_cmap(n, name='hsv'):
    '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct RGB color.
    The keyword argument name must be a standard mpl colormap name.
    
    :param n: Index
    :param name: standard mpl colormap name
    :return: Colormap function
    '''
    return plt.cm.get_cmap(name, n)

In [None]:
# initialize_logger(SETTINGS['logging']['loglevel'], SETTINGS['logging']['date_format'], SETTINGS['logging']['format'])
# Logging + Jupyter is currently not working together (on my machine...)
logging.info = print

sns.set(palette='muted')

In [None]:
csv_dir = os.path.join(os.getcwd(), SETTINGS['csv_subdir_name'])
current_date_str = datetime.date.today().strftime('%Y-%m-%d')

In [None]:
# Load 'confirmed' data

df = get_data(csv_dir, SETTINGS['csv_infections_subdir_name'], SETTINGS['csv_infections_filename'].format(current_date_str), SETTINGS_DATASOURCE['infections'], force_refresh_data=SETTINGS['force_refresh_data'])

# Drop unnecessary columns
df = df.drop(['Province/State', 'Lat', 'Long'], axis=1)

In [None]:
# Load 'deaths' data

df_deaths = get_data(csv_dir, SETTINGS['csv_deaths_subdir_name'], SETTINGS['csv_deaths_filename'].format(current_date_str), SETTINGS_DATASOURCE['deaths'], force_refresh_data=SETTINGS['force_refresh_data'])

# Drop unnecessary columns
df_deaths = df_deaths.drop(['Province/State', 'Lat', 'Long'], axis=1)

In [None]:
# Group by Country/Region, sum the values and reset the index
df_grouped_summed = df.groupby('Country/Region').sum().reset_index()
dates = list(df_grouped_summed.columns.values)[1:]
date_first = datetime.datetime.strptime(dates[0], '%m/%d/%y')
date_last = datetime.datetime.strptime(dates[-1], '%m/%d/%y')
logging.info('Working with "confirmed" data from {} to {}'.format(date_first.date(), date_last.date()))

In [None]:
# Group by Country/Region, sum the values and reset the index
df_deaths_grouped_summed = df_deaths.groupby('Country/Region').sum().reset_index()
dates_deaths = list(df_deaths_grouped_summed.columns.values)[1:]
date_first_deaths = datetime.datetime.strptime(dates_deaths[0], '%m/%d/%y')
date_last_deaths = datetime.datetime.strptime(dates_deaths[-1], '%m/%d/%y')
logging.info('Working with "deaths" data from {} to {}'.format(date_first_deaths.date(), date_last_deaths.date()))

In [None]:
# Gather all countries
all_countries_list = list(df_grouped_summed['Country/Region'])

In [None]:
# Plot: All countries

plot_name = 'All countries'

if SETTINGS_PLOT_ALL_COUNTRIES['plot']:
    logging.info('Plotting "{}"'.format(plot_name))

    countries = df_grouped_summed['Country/Region']

    # Plot
    fig, ax = plt.subplots(figsize=SETTINGS['plot']['size'])

    # Validate plot start and end days
    ed = SETTINGS_PLOT_ALL_COUNTRIES['end_day']
    plot_day_end = ed if (ed > 0 and ed < len(dates)) else len(dates)
    sd = SETTINGS_PLOT_ALL_COUNTRIES['start_day']
    plot_day_start = sd if sd > 0 and sd < plot_day_end else 0
    logging.info('Plotting to days [{}, {}]'.format(plot_day_start, plot_day_end))

    for i, cr in enumerate(countries):
        # Infected
        df_tmp = df_grouped_summed[df_grouped_summed['Country/Region']==cr]
        df_melted = df_tmp.melt(id_vars=df_tmp.columns.values[:1], var_name='Date', value_name='Value')[plot_day_start:plot_day_end]
        ax.plot(df_melted['Date'], df_melted['Value'], '-', label='{}'.format(cr))

    ax.set_title('{} - {} - {}'.format(SETTINGS['plot']['title'], date_last.date(), plot_name), loc='center')
    ax.set_xlabel(SETTINGS['plot']['label_x'])
    ax.set_ylabel(SETTINGS['plot']['label_y'])

    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))

    plt.show()

    if SETTINGS_PLOT_ALL_COUNTRIES['save_to_file']:
        save_plot(os.getcwd(), fig, SETTINGS['plot_image_path'], date_last, SETTINGS_PLOT_ALL_COUNTRIES['filename'])

    plt.close(fig)

In [None]:
# Plot: Specific countries - infections

plot_name = 'Specific countries (infections): "{}"'.format(', '.join(SETTINGS_PLOT_SPECIFIC_COUNTRIES_INFECTIONS['countries']))

if SETTINGS['force_plot'] or SETTINGS_PLOT_SPECIFIC_COUNTRIES_INFECTIONS['plot']:
    logging.info('Plotting "{}"'.format(plot_name))

    countries = SETTINGS_PLOT_SPECIFIC_COUNTRIES_INFECTIONS['countries']

    # Plot
    fig, ax = plt.subplots(figsize=SETTINGS['plot']['size'])

    # Validate plot start and end days
    ed = SETTINGS_PLOT_SPECIFIC_COUNTRIES_INFECTIONS['end_day']
    sd = SETTINGS_PLOT_SPECIFIC_COUNTRIES_INFECTIONS['start_day']
    plot_day_end = ed if (ed > 0 and ed < len(dates)) else len(dates)
    plot_day_start = sd if sd > 0 and sd < plot_day_end else 0
    logging.info('Plotting to days [{}, {}]'.format(plot_day_start, plot_day_end))

    # Calculate color map
    cmap = get_cmap(len(countries))

    for i, cr in enumerate(countries):
        if cr in all_countries_list:
            # Infected
            df_tmp = df_grouped_summed[df_grouped_summed['Country/Region']==cr]
            df_melted = df_tmp.melt(id_vars=df_tmp.columns.values[:1], var_name='Date', value_name='Value')[plot_day_start:plot_day_end]
            ax.plot(df_melted['Date'], df_melted['Value'], '-', color=cmap(i), label='{} (Infections)'.format(cr))

            # Deaths
            if SETTINGS['force_plot_deaths'] or SETTINGS_PLOT_SPECIFIC_COUNTRIES_INFECTIONS['plot_deaths']:
                df_deaths_tmp = df_deaths_grouped_summed[df_deaths_grouped_summed['Country/Region']==cr]
                df_deaths_melted = df_deaths_tmp.melt(id_vars=df_deaths_tmp.columns.values[:1], var_name='Date', value_name='Value')[plot_day_start:plot_day_end]
                ax.plot(df_deaths_melted['Date'], df_deaths_melted['Value'], '--', color=cmap(i), label='{} (Deaths)'.format(cr))
        else:
            logging.info('Could not find given country "{}"'.format(cr))

    ax.set_title('{} - {} - {}'.format(SETTINGS['plot']['title'], date_last.date(), plot_name), loc='center')
    ax.set_xlabel(SETTINGS['plot']['label_x'])
    ax.set_ylabel(SETTINGS['plot']['label_y'])

    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))

    plt.show()

    if SETTINGS['force_save_plot_to_file'] or SETTINGS_PLOT_SPECIFIC_COUNTRIES_INFECTIONS['save_to_file']:
        save_plot(os.getcwd(), fig, SETTINGS['plot_image_path'], date_last, SETTINGS_PLOT_SPECIFIC_COUNTRIES_INFECTIONS['filename'].format('-'.join(SETTINGS_PLOT_SPECIFIC_COUNTRIES_INFECTIONS['countries'])))

    plt.close(fig)

In [None]:
# Plot: Specific countries - deaths

plot_name = 'Specific countries (deaths): "{}"'.format(', '.join(SETTINGS_PLOT_SPECIFIC_COUNTRIES_DEATHS['countries']))

if SETTINGS['force_plot'] or SETTINGS_PLOT_SPECIFIC_COUNTRIES_DEATHS['plot']:
    logging.info('Plotting "{}"'.format(plot_name))

    countries = SETTINGS_PLOT_SPECIFIC_COUNTRIES_DEATHS['countries']

    # Plot
    fig, ax = plt.subplots(figsize=SETTINGS['plot']['size'])

    # Validate plot start and end days
    ed = SETTINGS_PLOT_SPECIFIC_COUNTRIES_DEATHS['end_day']
    sd = SETTINGS_PLOT_SPECIFIC_COUNTRIES_DEATHS['start_day']
    plot_day_end = ed if (ed > 0 and ed < len(dates)) else len(dates)
    plot_day_start = sd if sd > 0 and sd < plot_day_end else 0
    logging.info('Plotting to days [{}, {}]'.format(plot_day_start, plot_day_end))

    # Calculate color map
    cmap = get_cmap(len(countries))

    for i, cr in enumerate(countries):
        if cr in all_countries_list:
            # Deaths
            df_deaths_tmp = df_deaths_grouped_summed[df_deaths_grouped_summed['Country/Region']==cr]
            df_deaths_melted = df_deaths_tmp.melt(id_vars=df_deaths_tmp.columns.values[:1], var_name='Date', value_name='Value')[plot_day_start:plot_day_end]
            ax.plot(df_deaths_melted['Date'], df_deaths_melted['Value'], '-', color=cmap(i), label='{}'.format(cr))
        else:
            logging.info('Could not find given country "{}"'.format(cr))

    ax.set_title('{} - {} - {}'.format(SETTINGS['plot']['title'], date_last.date(), plot_name), loc='center')
    ax.set_xlabel(SETTINGS['plot']['label_x'])
    ax.set_ylabel(SETTINGS['plot']['label_y'])

    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))

    plt.show()

    if SETTINGS['force_save_plot_to_file'] or SETTINGS_PLOT_SPECIFIC_COUNTRIES_DEATHS['save_to_file']:
        save_plot(os.getcwd(), fig, SETTINGS['plot_image_path'], date_last, SETTINGS_PLOT_SPECIFIC_COUNTRIES_DEATHS['filename'].format('-'.join(SETTINGS_PLOT_SPECIFIC_COUNTRIES_DEATHS['countries'])))

    plt.close(fig)

In [None]:
# Plot: Countries with highest infection rates

plot_name = '{} Countries with highest infection rates'.format(SETTINGS_PLOT_HIGHEST_INFECTIONS_COUNTRIES['nr_countries'])

if SETTINGS['force_plot'] or SETTINGS_PLOT_HIGHEST_INFECTIONS_COUNTRIES['plot']:
    logging.info('Plotting "{}"'.format(plot_name))

    # Validate plot start and end days
    ed = SETTINGS_PLOT_HIGHEST_INFECTIONS_COUNTRIES['end_day']
    sd = SETTINGS_PLOT_HIGHEST_INFECTIONS_COUNTRIES['start_day']
    plot_day_end = ed if (ed > 0 and ed < len(dates)) else len(dates)
    plot_day_start = sd if sd > 0 and sd < plot_day_end else 0
    logging.info('Plotting to days [{}, {}]'.format(plot_day_start, plot_day_end))
    
    # Calculate the n highest countries
    dict_highest_all = {}
    countries = df_grouped_summed['Country/Region']
    for cr in countries:
        # Infected
        df_tmp = df_grouped_summed[df_grouped_summed['Country/Region']==cr]
        df_melted = df_tmp.melt(id_vars=df_tmp.columns.values[:1], var_name='Date', value_name='Value')
        dict_highest_all[cr] = df_melted.max().Value

    # Extract the n highest country names
    countries = nlargest(SETTINGS_PLOT_HIGHEST_INFECTIONS_COUNTRIES['nr_countries'], dict_highest_all, key=dict_highest_all.get)

    # Plot
    fig, ax = plt.subplots(figsize=SETTINGS['plot']['size'])

    # Calculate color map
    cmap = get_cmap(SETTINGS_PLOT_HIGHEST_INFECTIONS_COUNTRIES['nr_countries'])

    for i, cr in enumerate(countries):
        # Infected
        df_tmp = df_grouped_summed[df_grouped_summed['Country/Region']==cr]
        df_melted = df_tmp.melt(id_vars=df_tmp.columns.values[:1], var_name='Date', value_name='Value')[plot_day_start:plot_day_end]
        ax.plot(df_melted['Date'], df_melted['Value'], '-', color=cmap(i), label='{} (Infections)'.format(cr))

        # Deaths
        if SETTINGS['force_plot_deaths'] or SETTINGS_PLOT_HIGHEST_INFECTIONS_COUNTRIES['plot_deaths']:
            df_deaths_tmp = df_deaths_grouped_summed[df_deaths_grouped_summed['Country/Region']==cr]
            df_deaths_melted = df_deaths_tmp.melt(id_vars=df_deaths_tmp.columns.values[:1], var_name='Date', value_name='Value')[plot_day_start:plot_day_end]
            ax.plot(df_deaths_melted['Date'], df_deaths_melted['Value'], '--', color=cmap(i), label='{} (Deaths)'.format(cr))

    ax.set_title('{} - {} - {}'.format(SETTINGS['plot']['title'], date_last.date(), plot_name), loc='center')
    ax.set_xlabel(SETTINGS['plot']['label_x'])
    ax.set_ylabel(SETTINGS['plot']['label_y'])

    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
    plt.show()

    if SETTINGS['force_save_plot_to_file'] or SETTINGS_PLOT_HIGHEST_INFECTIONS_COUNTRIES['save_to_file']:
        save_plot(os.getcwd(), fig, SETTINGS['plot_image_path'], date_last, SETTINGS_PLOT_HIGHEST_INFECTIONS_COUNTRIES['filename'].format(SETTINGS_PLOT_HIGHEST_INFECTIONS_COUNTRIES['nr_countries']))

    plt.close(fig)

In [None]:
# Plot: Countries with highest number of deaths

plot_name = '{} Countries with highest number of deaths'.format(SETTINGS_PLOT_HIGHEST_DEATHS_COUNTRIES['nr_countries'])

if SETTINGS['force_plot'] or SETTINGS_PLOT_HIGHEST_DEATHS_COUNTRIES['plot']:
    logging.info('Plotting "{}"'.format(plot_name))

    # Validate plot start and end days
    ed = SETTINGS_PLOT_HIGHEST_DEATHS_COUNTRIES['end_day']
    sd = SETTINGS_PLOT_HIGHEST_DEATHS_COUNTRIES['start_day']
    plot_day_end = ed if (ed > 0 and ed < len(dates)) else len(dates)
    plot_day_start = sd if sd > 0 and sd < plot_day_end else 0
    logging.info('Plotting to days [{}, {}]'.format(plot_day_start, plot_day_end))

    # Calculate the n highest countries
    dict_highest_all = {}
    countries = df_deaths_grouped_summed['Country/Region']
    for cr in countries:
        # Deaths
        df_deaths_tmp = df_deaths_grouped_summed[df_deaths_grouped_summed['Country/Region']==cr]
        df_deaths_melted = df_deaths_tmp.melt(id_vars=df_deaths_tmp.columns.values[:1], var_name='Date', value_name='Value')
        dict_highest_all[cr] = df_deaths_melted.max().Value

    # Extract the n highest country names
    countries = nlargest(SETTINGS_PLOT_HIGHEST_DEATHS_COUNTRIES['nr_countries'], dict_highest_all, key=dict_highest_all.get)

    # Plot
    fig, ax = plt.subplots(figsize=SETTINGS['plot']['size'])

    for cr in countries:
        # Deaths
        df_deaths_tmp = df_deaths_grouped_summed[df_deaths_grouped_summed['Country/Region']==cr]
        df_deaths_melted = df_deaths_tmp.melt(id_vars=df_deaths_tmp.columns.values[:1], var_name='Date', value_name='Value')[plot_day_start:plot_day_end]
        ax.plot(df_deaths_melted['Date'], df_deaths_melted['Value'], '-', label=cr)

    ax.set_title('{} - {} - {}'.format(SETTINGS['plot']['title'], date_last.date(), plot_name), loc='center')
    ax.set_xlabel(SETTINGS['plot']['label_x_deaths'])
    ax.set_ylabel(SETTINGS['plot']['label_y_deaths'])

    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
    plt.show()

    if SETTINGS['force_save_plot_to_file'] or SETTINGS_PLOT_HIGHEST_DEATHS_COUNTRIES['save_to_file']:
        save_plot(os.getcwd(), fig, SETTINGS['plot_image_path'], date_last, SETTINGS_PLOT_HIGHEST_DEATHS_COUNTRIES['filename'].format(SETTINGS_PLOT_HIGHEST_DEATHS_COUNTRIES['nr_countries']))

    plt.close(fig)

In [None]:
# Curve fit - infections

if not SETTINGS_PLOT_CURVE_FIT_INFECTIONS['predict']:
    plot_name = 'Curve fit (infections) for country "{}"'.format(SETTINGS_PLOT_CURVE_FIT_INFECTIONS['country'])
else:
    plot_name = 'Curve fit (infections) for country "{}" with {} days prediction'.format(SETTINGS_PLOT_CURVE_FIT_INFECTIONS['country'], SETTINGS_PLOT_CURVE_FIT_INFECTIONS['predict_days'])

if SETTINGS['force_plot'] or SETTINGS_PLOT_CURVE_FIT_INFECTIONS['plot']:
    logging.info('Plotting "{}"'.format(plot_name))

    if SETTINGS_PLOT_CURVE_FIT_INFECTIONS['country'] in all_countries_list:
        fig, ax = plt.subplots(figsize=SETTINGS['plot']['size'])

        # Infected
        df_tmp = df_grouped_summed[df_grouped_summed['Country/Region']==SETTINGS_PLOT_CURVE_FIT_INFECTIONS['country']]
        df_melted = df_tmp.melt(id_vars=df_tmp.columns.values[:1], var_name='Date', value_name='Value')

        day_end = len(df_melted.Value)
        day_start = 0
        if not SETTINGS_PLOT_CURVE_FIT_INFECTIONS['raw_data_only']:
            # Validate start and end days
            edf = SETTINGS_PLOT_CURVE_FIT_INFECTIONS['end_day_fit']
            sdf = SETTINGS_PLOT_CURVE_FIT_INFECTIONS['start_day_fit']
            day_end = edf if (edf > 0 and edf < len(df_melted.Value)) else len(df_melted.Value)
            day_start = sdf if sdf > 0 and sdf < day_end else 0
            logging.info('Fitting to days [{}, {}]'.format(day_start, day_end))

        # Validate plot start and end days
        ed = SETTINGS_PLOT_CURVE_FIT_INFECTIONS['end_day']
        sd = SETTINGS_PLOT_CURVE_FIT_INFECTIONS['start_day']
        if not SETTINGS_PLOT_CURVE_FIT_INFECTIONS['predict']:
            plot_day_end = ed if (ed > 0 and ed < len(df_melted.Value)) else len(df_melted.Value)
        else:
            plot_day_end = len(df_melted.Value) + SETTINGS_PLOT_CURVE_FIT_INFECTIONS['predict_days']
        plot_day_start = sd if sd > 0 and sd < plot_day_end else 0
        logging.info('Plotting to days [{}, {}]'.format(plot_day_start, plot_day_end))

        vals_x = np.linspace(0, len(df_melted.Value), num = len(df_melted.Value))[day_start:day_end]
        vals_y = list(df_melted.Value)[day_start:day_end]
        vals_sigma = [func_sigma(y) for y in vals_y]

        vals_x_to_end = [t for t in range(plot_day_start, plot_day_end)]
        vals_y_to_end = list(df_melted.Value)[plot_day_start:plot_day_end]

        if not SETTINGS_PLOT_CURVE_FIT_INFECTIONS['raw_data_only']:
            # Scipy curve fit
            try:
                params, params_cov = scipy.optimize.curve_fit(func_fit, xdata=vals_x, ydata=vals_y, sigma=vals_sigma)
                vals_y_fit = [func_fit(x, params[0], params[1]) for x in vals_x_to_end]
                ax.plot(vals_x_to_end, vals_y_fit, '--', color ='blue', label ='Fit - days {}-{}'.format(day_start, day_end))

                # Predict missing n values for prediction
                if SETTINGS_PLOT_CURVE_FIT_INFECTIONS['predict']:
                    vx_from = vals_x_to_end[-SETTINGS_PLOT_CURVE_FIT_INFECTIONS['predict_days']]
                    vals_y_to_end = vals_y_to_end + [func_fit(v, params[0], params[1]) for v in range(vx_from, vx_from + SETTINGS_PLOT_CURVE_FIT_INFECTIONS['predict_days'])]
            except:
                logging.info('Could not find curve fit')
        else:
            logging.info('Just logging data')

        # Plot deaths
        if not SETTINGS_PLOT_CURVE_FIT_INFECTIONS['predict'] and (SETTINGS['force_plot_deaths'] or SETTINGS_PLOT_CURVE_FIT_INFECTIONS['plot_deaths']):
            # Deaths
            df_deaths_tmp = df_deaths_grouped_summed[df_deaths_grouped_summed['Country/Region']==SETTINGS_PLOT_CURVE_FIT_INFECTIONS['country']]
            df_deaths_melted = df_deaths_tmp.melt(id_vars=df_deaths_tmp.columns.values[:1], var_name='Date', value_name='Value')
            vals_y_deaths_to_end = list(df_deaths_melted.Value)[plot_day_start:plot_day_end]
            ax.plot(vals_x_to_end, vals_y_deaths_to_end, '-', color ='red', label ='Deaths')

        # Plot data
        ax.plot(vals_x_to_end, vals_y_to_end, 'o', color ='green', label ='Infections')

        # Plot prediction background
        if SETTINGS_PLOT_CURVE_FIT_DEATHS['predict']:
            plt.axvspan(vals_x_to_end[-4] - 0.5, vals_x_to_end[-1] + 0.5, facecolor='b', alpha=0.5, zorder=-100)

        ax.set_title('{} - {} - {}'.format(SETTINGS['plot']['title'], date_last.date(), plot_name), loc='center')
        ax.set_xlabel(SETTINGS['plot']['label_x'])
        ax.set_ylabel(SETTINGS['plot']['label_y'])

        # Calculate ticks and labels (=the dates on the x-axis)
        ticks = [t for t in range(plot_day_start, plot_day_end)][::2]
        #if plot_day_end not in ticks and len(ticks) % 2 == 0:
        #    ticks = ticks + [plot_day_end]
        if SETTINGS_PLOT_CURVE_FIT_INFECTIONS['plot_days_as_label_x']:
            labels = [d for d in ticks]
        else:
            labels = [str((date_first + datetime.timedelta(days=d)).date()) for d in ticks]
        plt.xticks(ticks=ticks, labels=labels)

        plt.legend(loc='upper left')
        plt.show()

        if SETTINGS['force_save_plot_to_file'] or SETTINGS_PLOT_CURVE_FIT_INFECTIONS['save_to_file']:
            save_plot(os.getcwd(), fig, SETTINGS['plot_image_path'], date_last, SETTINGS_PLOT_CURVE_FIT_INFECTIONS['filename'].format(SETTINGS_PLOT_CURVE_FIT_INFECTIONS['country']))

        plt.close(fig)
    else:
        logging.info('Could not find given country "{}"'.format(SETTINGS_PLOT_CURVE_FIT_INFECTIONS['country']))

In [None]:
# Curve fit - deaths

if not SETTINGS_PLOT_CURVE_FIT_INFECTIONS['predict']:
    plot_name = 'Curve fit (deaths) for country "{}"'.format(SETTINGS_PLOT_CURVE_FIT_INFECTIONS['country'])
else:
    plot_name = 'Curve fit (deaths) for country "{}" with {} days prediction'.format(SETTINGS_PLOT_CURVE_FIT_INFECTIONS['country'], SETTINGS_PLOT_CURVE_FIT_INFECTIONS['predict_days'])


if SETTINGS['force_plot'] or SETTINGS_PLOT_CURVE_FIT_DEATHS['plot']:
    logging.info('Plotting "{}"'.format(plot_name))

    if SETTINGS_PLOT_CURVE_FIT_DEATHS['country'] in all_countries_list:
        fig, ax = plt.subplots(figsize=SETTINGS['plot']['size'])

        # Infected
        df_deaths_tmp = df_deaths_grouped_summed[df_deaths_grouped_summed['Country/Region']==SETTINGS_PLOT_CURVE_FIT_DEATHS['country']]
        df_deaths_melted = df_deaths_tmp.melt(id_vars=df_deaths_tmp.columns.values[:1], var_name='Date', value_name='Value')
            
        day_end = len(df_deaths_melted.Value)
        day_start = 0
        if not SETTINGS_PLOT_CURVE_FIT_DEATHS['raw_data_only']:
            # Validate start and end days
            edf = SETTINGS_PLOT_CURVE_FIT_DEATHS['end_day_fit']
            sdf = SETTINGS_PLOT_CURVE_FIT_DEATHS['start_day_fit']
            day_end = edf if (edf > 0 and edf < len(df_deaths_melted.Value)) else len(df_deaths_melted.Value)
            day_start = sdf if sdf > 0 and sdf < day_end else 0
            logging.info('Fitting to days [{}, {}]'.format(day_start, day_end))

        # Validate plot start and end days
        ed = SETTINGS_PLOT_CURVE_FIT_DEATHS['end_day']
        sd = SETTINGS_PLOT_CURVE_FIT_DEATHS['start_day']
        if not SETTINGS_PLOT_CURVE_FIT_DEATHS['predict']:
            plot_day_end = ed if (ed > 0 and ed < len(df_deaths_melted.Value)) else len(df_deaths_melted.Value)
        else:
            plot_day_end = len(df_deaths_melted.Value) + SETTINGS_PLOT_CURVE_FIT_DEATHS['predict_days']
        plot_day_start = sd if sd > 0 and sd < plot_day_end else 0
        logging.info('Plotting to days [{}, {}]'.format(plot_day_start, plot_day_end))
        
        vals_x = np.linspace(0, len(df_deaths_melted.Value), num = len(df_deaths_melted.Value))[day_start:day_end]
        vals_y = list(df_deaths_melted.Value)[day_start:day_end]
        vals_sigma = [func_sigma(y) for y in vals_y]

        vals_x_to_end = [t for t in range(plot_day_start, plot_day_end)]
        vals_y_to_end = list(df_deaths_melted.Value)[plot_day_start:plot_day_end]

        if not SETTINGS_PLOT_CURVE_FIT_DEATHS['raw_data_only']:
            # Scipy curve fit
            try:
                params, params_cov = scipy.optimize.curve_fit(func_fit, xdata=vals_x, ydata=vals_y, sigma=vals_sigma)
                vals_y_fit = [func_fit(x, params[0], params[1]) for x in vals_x_to_end]
                ax.plot(vals_x_to_end, vals_y_fit, '--', color ='blue', label ='Fit - days {}-{}'.format(day_start, day_end))

                # Predict missing n values for prediction
                if SETTINGS_PLOT_CURVE_FIT_DEATHS['predict']:
                    vx_from = vals_x_to_end[-SETTINGS_PLOT_CURVE_FIT_INFECTIONS['predict_days']]
                    vals_y_to_end = vals_y_to_end + [func_fit(v, params[0], params[1]) for v in range(vx_from, vx_from + SETTINGS_PLOT_CURVE_FIT_DEATHS['predict_days'])]
            except:
                logging.info('Could not find curve fit')
        else:
            logging.info('Just logging data')

        # Plot data
        ax.plot(vals_x_to_end, vals_y_to_end, 'o', color ='green', label ='Deaths')

        # Plot prediction background
        if SETTINGS_PLOT_CURVE_FIT_DEATHS['predict']:
            plt.axvspan(vals_x_to_end[-4] - 0.5, vals_x_to_end[-1] + 0.5, facecolor='b', alpha=0.5, zorder=-100)

        ax.set_title('{} - {} - {}'.format(SETTINGS['plot']['title'], date_last.date(), plot_name), loc='center')
        ax.set_xlabel(SETTINGS['plot']['label_x'])
        ax.set_ylabel(SETTINGS['plot']['label_y'])

        # Calculate ticks and labels (=the dates on the x-axis)
        ticks = [t for t in range(plot_day_start, plot_day_end)][::2]
        #if plot_day_end not in ticks and len(ticks) % 2 == 0:
        #    ticks = ticks + [plot_day_end]
        if SETTINGS_PLOT_CURVE_FIT_DEATHS['plot_days_as_label_x']:
            labels = [d for d in ticks]
        else:
            labels = [str((date_first + datetime.timedelta(days=d)).date()) for d in ticks]
        plt.xticks(ticks=ticks, labels=labels)

        plt.legend(loc='upper left')
        plt.show()

        if SETTINGS['force_save_plot_to_file'] or SETTINGS_PLOT_CURVE_FIT_DEATHS['save_to_file']:
            save_plot(os.getcwd(), fig, SETTINGS['plot_image_path'], date_last, SETTINGS_PLOT_CURVE_FIT_DEATHS['filename'].format(SETTINGS_PLOT_CURVE_FIT_DEATHS['country']))

        plt.close(fig)
    else:
        logging.info('Could not find given country "{}"'.format(SETTINGS_PLOT_CURVE_FIT_DEATHS['country']))

In [None]:
# Multi curve fit - infections

plot_name = 'Multi curve fit (infections) for country "{}"'.format(SETTINGS_MULTI_CURVE_FIT_INFECTIONS['country'])

if SETTINGS['force_plot'] or SETTINGS_MULTI_CURVE_FIT_INFECTIONS['plot']:
    logging.info('Plotting "{}"'.format(plot_name))

    if SETTINGS_MULTI_CURVE_FIT_INFECTIONS['country'] in all_countries_list:
        fig, ax = plt.subplots(figsize=SETTINGS['plot']['size'])

        # Infected
        df_tmp = df_grouped_summed[df_grouped_summed['Country/Region']==SETTINGS_MULTI_CURVE_FIT_INFECTIONS['country']]
        df_melted = df_tmp.melt(id_vars=df_tmp.columns.values[:1], var_name='Date', value_name='Value')

        lowest_start_day = len(df_melted.Value)
        highest_end_day = 0
        for i, data in enumerate(SETTINGS_MULTI_CURVE_FIT_INFECTIONS['fits']):
            # Validate start and end days
            day_end = data['end_day'] if (data['end_day'] > 0 and data['end_day'] < len(df_melted.Value)) else len(df_melted.Value)
            day_start = data['start_day'] if data['start_day'] > 0 and data['start_day'] < day_end else 0
            logging.info('Fitting to days [{}, {}]'.format(day_start, day_end))
        
            # Validate plot start and end days
            plot_day_end = data['plot_end_day'] if (data['plot_end_day'] > 0 and data['plot_end_day'] < len(df_melted.Value)) else len(df_melted.Value)
            plot_day_start = data['plot_start_day'] if data['plot_start_day'] > 0 and data['plot_start_day'] < plot_day_end else 0
            logging.info('Plotting to days [{}, {}]'.format(plot_day_start, plot_day_end))
            lowest_start_day = plot_day_start if plot_day_start < lowest_start_day else lowest_start_day
            highest_end_day = plot_day_end if plot_day_end > highest_end_day else highest_end_day

            vals_x = np.linspace(0, len(df_melted.Value), num = len(df_melted.Value))[day_start:day_end]
            vals_y = list(df_melted.Value)[day_start:day_end]
            vals_sigma = [func_sigma(y) for y in vals_y]

            vals_x_to_end = [t for t in range(plot_day_start, plot_day_end)]
            vals_y_to_end = list(df_melted.Value)[plot_day_start:plot_day_end]

            if not SETTINGS_MULTI_CURVE_FIT_INFECTIONS['raw_data_only']:
                # Scipy curve fit
                try:
                    func = data['fit_func'] if 'fit_func' in data else func_fit
                    params, params_cov = scipy.optimize.curve_fit(func, xdata=vals_x, ydata=vals_y, sigma=vals_sigma)
                    vals_y_fit = [func(x, params[0], params[1], 0) for x in vals_x_to_end]
                    ax.plot(vals_x_to_end, vals_y_fit, '--', color=data['color'], label='Fit - days {}-{}'.format(day_start, day_end))
                except Exception as e:
                    logging.info('Could not find curve fit, exception: {}'.format(e))
            else:
                logging.info('Just logging data')

        # Plot deaths
        if SETTINGS['force_plot_deaths'] or SETTINGS_MULTI_CURVE_FIT_INFECTIONS['plot_deaths']:
            # Deaths
            df_deaths_tmp = df_deaths_grouped_summed[df_deaths_grouped_summed['Country/Region']==SETTINGS_MULTI_CURVE_FIT_INFECTIONS['country']]
            df_deaths_melted = df_deaths_tmp.melt(id_vars=df_deaths_tmp.columns.values[:1], var_name='Date', value_name='Value')

            vals_x_to_end = [t for t in range(lowest_start_day, highest_end_day)]
            vals_y_deaths_to_end = list(df_deaths_melted.Value)[lowest_start_day:highest_end_day]
            ax.plot(vals_x_to_end, vals_y_deaths_to_end, '-', color='red', label='Deaths')

        # Plot data
        plot_vals_x = [t for t in range(lowest_start_day, highest_end_day)]
        plot_vals_y = list(df_melted.Value)[lowest_start_day:highest_end_day]
        ax.plot(plot_vals_x, plot_vals_y, 'o', color='green', label='Infections')

        ax.set_title('{} - {} - {}'.format(SETTINGS['plot']['title'], date_last.date(), plot_name), loc='center')
        ax.set_xlabel(SETTINGS['plot']['label_x'])
        ax.set_ylabel(SETTINGS['plot']['label_y'])

        logging.info('Calculating ticks for days [{}, {}]'.format(lowest_start_day, highest_end_day))
        # Calculate ticks and labels (=the dates on the x-axis)
        ticks = [t for t in range(lowest_start_day, highest_end_day)][::2]
        #if plot_day_end not in ticks and len(ticks) % 2 == 1:
        #    ticks = ticks + [plot_day_end]
        if SETTINGS_MULTI_CURVE_FIT_INFECTIONS['plot_days_as_label_x']:
            labels = [d for d in ticks]
        else:
            labels = [str((date_first + datetime.timedelta(days=d)).date()) for d in ticks]
        plt.xticks(ticks=ticks, labels=labels)

        plt.legend(loc='upper left')
        plt.show()

        if SETTINGS['force_save_plot_to_file'] or SETTINGS_MULTI_CURVE_FIT_INFECTIONS['save_to_file']:
            save_plot(os.getcwd(), fig, SETTINGS['plot_image_path'], date_last, SETTINGS_MULTI_CURVE_FIT_INFECTIONS['filename'].format(SETTINGS_MULTI_CURVE_FIT_INFECTIONS['country']))

        plt.close(fig)
    else:
        logging.info('Could not find given country "{}"'.format(SETTINGS_MULTI_CURVE_FIT_INFECTIONS['country']))

In [None]:
# Multi curve fit - deaths

plot_name = 'Multi curve fit (deaths) for country "{}"'.format(SETTINGS_MULTI_CURVE_FIT_DEATHS['country'])

if SETTINGS['force_plot'] or SETTINGS_MULTI_CURVE_FIT_DEATHS['plot']:
    logging.info('Plotting "{}"'.format(plot_name))

    if SETTINGS_MULTI_CURVE_FIT_DEATHS['country'] in all_countries_list:
        fig, ax = plt.subplots(figsize=SETTINGS['plot']['size'])

        # Deaths
        df_deaths_tmp = df_deaths_grouped_summed[df_deaths_grouped_summed['Country/Region']==SETTINGS_MULTI_CURVE_FIT_DEATHS['country']]
        df_deaths_melted = df_deaths_tmp.melt(id_vars=df_deaths_tmp.columns.values[:1], var_name='Date', value_name='Value')

        lowest_start_day = len(df_deaths_melted.Value)
        highest_end_day = 0
        for i, data in enumerate(SETTINGS_MULTI_CURVE_FIT_DEATHS['fits']):
            # Validate start and end days
            day_end = data['end_day'] if (data['end_day'] > 0 and data['end_day'] < len(df_deaths_melted.Value)) else len(df_deaths_melted.Value)
            day_start = data['start_day'] if data['start_day'] > 0 and data['start_day'] < day_end else 0
            logging.info('Fitting to days [{}, {}]'.format(day_start, day_end))
        
            # Validate plot start and end days
            plot_day_end = data['plot_end_day'] if (data['plot_end_day'] > 0 and data['plot_end_day'] < len(df_deaths_melted.Value)) else len(df_deaths_melted.Value)
            plot_day_start = data['plot_start_day'] if data['plot_start_day'] > 0 and data['plot_start_day'] < plot_day_end else 0
            logging.info('Plotting to days [{}, {}]'.format(plot_day_start, plot_day_end))
            lowest_start_day = plot_day_start if plot_day_start < lowest_start_day else lowest_start_day
            highest_end_day = plot_day_end if plot_day_end > highest_end_day else highest_end_day

            vals_x = np.linspace(0, len(df_deaths_melted.Value), num = len(df_deaths_melted.Value))[day_start:day_end]
            vals_y = list(df_deaths_melted.Value)[day_start:day_end]
            vals_sigma = [func_sigma(y) for y in vals_y]

            vals_x_to_end = [t for t in range(plot_day_start, plot_day_end)]
            vals_y_to_end = list(df_deaths_melted.Value)[plot_day_start:plot_day_end]

            if not SETTINGS_MULTI_CURVE_FIT_DEATHS['raw_data_only']:
                # Scipy curve fit
                try:
                    func = data['fit_func'] if 'fit_func' in data else func_fit
                    params, params_cov = scipy.optimize.curve_fit(func, xdata=vals_x, ydata=vals_y, sigma=vals_sigma)
                    vals_y_fit = [func(x, params[0], params[1], 0) for x in vals_x_to_end]
                    ax.plot(vals_x_to_end, vals_y_fit, '--', color=data['color'], label='Fit - days {}-{}'.format(day_start, day_end))
                except Exception as e:
                    logging.info('Could not find curve fit, exception: {}'.format(e))
            else:
                logging.info('Just logging data')

        # Plot data
        plot_vals_x = [t for t in range(lowest_start_day, highest_end_day)]
        plot_vals_y = list(df_deaths_melted.Value)[lowest_start_day:highest_end_day]
        ax.plot(plot_vals_x, plot_vals_y, 'o', color='green', label='Infections')

        ax.set_title('{} - {} - {}'.format(SETTINGS['plot']['title'], date_last.date(), plot_name), loc='center')
        ax.set_xlabel(SETTINGS['plot']['label_x'])
        ax.set_ylabel(SETTINGS['plot']['label_y'])

        logging.info('Calculating ticks for days [{}, {}]'.format(lowest_start_day, highest_end_day))
        # Calculate ticks and labels (=the dates on the x-axis)
        ticks = [t for t in range(lowest_start_day, highest_end_day)][::2]
        #if plot_day_end not in ticks and len(ticks) % 2 == 1:
        #    ticks = ticks + [plot_day_end]
        if SETTINGS_MULTI_CURVE_FIT_DEATHS['plot_days_as_label_x']:
            labels = [d for d in ticks]
        else:
            labels = [str((date_first + datetime.timedelta(days=d)).date()) for d in ticks]
        plt.xticks(ticks=ticks, labels=labels)

        plt.legend(loc='upper left')
        plt.show()

        if SETTINGS['force_save_plot_to_file'] or SETTINGS_MULTI_CURVE_FIT_DEATHS['save_to_file']:
            save_plot(os.getcwd(), fig, SETTINGS['plot_image_path'], date_last, SETTINGS_MULTI_CURVE_FIT_DEATHS['filename'].format(SETTINGS_MULTI_CURVE_FIT_DEATHS['country']))

        plt.close(fig)
    else:
        logging.info('Could not find given country "{}"'.format(SETTINGS_MULTI_CURVE_FIT_DEATHS['country']))

In [None]:
# Curve fit multiple countries - infections

countries = [x['name'] for x in SETTINGS_CURVE_FIT_INFECTIONS_MULTI_COUNTRIES['countries']]
plot_name = 'Curve fit (infections) for countries "{}"'.format(', '.join(countries))

if SETTINGS['force_plot'] or SETTINGS_CURVE_FIT_INFECTIONS_MULTI_COUNTRIES['plot']:
    logging.info('Plotting "{}"'.format(plot_name))

    fig, ax = plt.subplots(figsize=SETTINGS['plot']['size'])

    # Validate plot start and end days
    ed = SETTINGS_CURVE_FIT_INFECTIONS_MULTI_COUNTRIES['end_day']
    sd = SETTINGS_CURVE_FIT_INFECTIONS_MULTI_COUNTRIES['start_day']
    plot_day_end = ed if (ed > 0 and ed < len(dates)) else len(dates)
    plot_day_start = sd if sd > 0 and sd < plot_day_end else 0
    logging.info('Plotting to days [{}, {}]'.format(plot_day_start, plot_day_end))

    lowest_start_day = len(dates)
    highest_end_day = 0
    for country in SETTINGS_CURVE_FIT_INFECTIONS_MULTI_COUNTRIES['countries']:
        logging.info('Preparing country "{}"'.format(country['name']))
        country_name = country['name']

        if country_name in all_countries_list:
            # Infected
            df_tmp = df_grouped_summed[df_grouped_summed['Country/Region']==country_name]
            df_melted = df_tmp.melt(id_vars=df_tmp.columns.values[:1], var_name='Date', value_name='Value')

            # Check best fit data for start and end day
            start_day = country['start_day']
            end_day = country['end_day']
            day_end = len(dates)
            day_start = 0
            if not SETTINGS_CURVE_FIT_INFECTIONS_MULTI_COUNTRIES['raw_data_only']:
                # Validate start and end days
                day_end = end_day if (end_day > 0 and end_day < len(dates)) else len(dates)
                day_start = start_day if start_day > 0 and start_day < day_end else 0
                logging.info('Fitting to days [{}, {}]'.format(day_start, day_end))
            lowest_start_day = plot_day_start if plot_day_start < lowest_start_day else lowest_start_day
            highest_end_day = plot_day_end if plot_day_end > highest_end_day else highest_end_day

            vals_x = np.linspace(0, len(df_melted.Value), num = len(df_melted.Value))[day_start:day_end]
            vals_y = list(df_melted.Value)[day_start:day_end]
            vals_sigma = [func_sigma(y) for y in vals_y]

            vals_x_to_end = [t for t in range(plot_day_start, plot_day_end)]
            vals_y_to_end = list(df_melted.Value)[plot_day_start:plot_day_end]

            ax.plot(vals_x_to_end, vals_y_to_end, 'o', color=country['color'], label='{} (Infections)'.format(country_name))
            if not SETTINGS_CURVE_FIT_INFECTIONS_MULTI_COUNTRIES['raw_data_only']:
                # Scipy curve fit
                try:
                    func = country['fit_func'] if 'fit_func' in country else func_fit
                    params, params_cov = scipy.optimize.curve_fit(func, xdata=vals_x, ydata=vals_y, sigma=vals_sigma)
                    vals_y_fit = [func(x, params[0], params[1], 0) for x in vals_x_to_end]
                    ax.plot(vals_x_to_end, vals_y_fit, '-', color=country['color'], label='{} (Fit - days {}-{})'.format(country_name, day_start, day_end))
                except Exception as e:
                    logging.info('Could not find curve fit, exception: {}'.format(e))
            else:
                logging.info('Just logging data')

            # Plot deaths
            if SETTINGS['force_plot_deaths'] or SETTINGS_CURVE_FIT_INFECTIONS_MULTI_COUNTRIES['plot_deaths']:
                # Deaths
                df_deaths_tmp = df_deaths_grouped_summed[df_deaths_grouped_summed['Country/Region']==country_name]
                df_deaths_melted = df_deaths_tmp.melt(id_vars=df_deaths_tmp.columns.values[:1], var_name='Date', value_name='Value')

                vals_x_to_end = [t for t in range(lowest_start_day, highest_end_day)]
                vals_y_deaths_to_end = list(df_deaths_melted.Value)[lowest_start_day:highest_end_day]
                ax.plot(vals_x_to_end, vals_y_deaths_to_end, '--', color=country['color'], label='{} (Deaths)'.format(country_name))
        else:
            logging.info('Could not find given country "{}"'.format(country_name))

    ax.set_title('{} - {} - {}'.format(SETTINGS['plot']['title'], date_last.date(), plot_name), loc='center')
    ax.set_xlabel(SETTINGS['plot']['label_x'])
    ax.set_ylabel(SETTINGS['plot']['label_y'])

    # Calculate ticks and labels (=the dates on the x-axis)
    ticks = [t for t in range(plot_day_start, plot_day_end)][::2]
    #if plot_day_end not in ticks and len(ticks) % 2 == 1:
    #    ticks = ticks + [plot_day_end]
    if SETTINGS_CURVE_FIT_INFECTIONS_MULTI_COUNTRIES['plot_days_as_label_x']:
        labels = [d for d in ticks]
    else:
        labels = [str((date_first + datetime.timedelta(days=d)).date()) for d in ticks]
    plt.xticks(ticks=ticks, labels=labels)

    plt.legend(loc='upper left')
    plt.show()

    if SETTINGS['force_save_plot_to_file'] or SETTINGS_CURVE_FIT_INFECTIONS_MULTI_COUNTRIES['save_to_file']:
        save_plot(os.getcwd(), fig, SETTINGS['plot_image_path'], date_last, SETTINGS_CURVE_FIT_INFECTIONS_MULTI_COUNTRIES['filename'].format('-'.join(countries)))

    plt.close(fig)

In [None]:
# Curve fit multiple countries - deaths

countries = [x['name'] for x in SETTINGS_CURVE_FIT_DEATHS_MULTI_COUNTRIES['countries']]
plot_name = 'Curve fit (deaths) for countries "{}"'.format(', '.join(countries))

if SETTINGS['force_plot'] or SETTINGS_CURVE_FIT_DEATHS_MULTI_COUNTRIES['plot']:
    logging.info('Plotting "{}"'.format(plot_name))

    fig, ax = plt.subplots(figsize=SETTINGS['plot']['size'])

    # Validate plot start and end days
    ed = SETTINGS_CURVE_FIT_DEATHS_MULTI_COUNTRIES['end_day']
    sd = SETTINGS_CURVE_FIT_DEATHS_MULTI_COUNTRIES['start_day']
    plot_day_end = ed if (ed > 0 and ed < len(dates)) else len(dates)
    plot_day_start = sd if sd > 0 and sd < plot_day_end else 0
    logging.info('Plotting to days [{}, {}]'.format(plot_day_start, plot_day_end))

    lowest_start_day = len(dates)
    highest_end_day = 0
    for country in SETTINGS_CURVE_FIT_DEATHS_MULTI_COUNTRIES['countries']:
        logging.info('Preparing country "{}"'.format(country['name']))
        country_name = country['name']

        if country_name in all_countries_list:
            # Infected
            df_deaths_tmp = df_deaths_grouped_summed[df_deaths_grouped_summed['Country/Region']==country_name]
            df_deaths_melted = df_deaths_tmp.melt(id_vars=df_deaths_tmp.columns.values[:1], var_name='Date', value_name='Value')

            # Check best fit data for start and end day
            start_day = country['start_day']
            end_day = country['end_day']
            day_end = len(dates)
            day_start = 0
            if not SETTINGS_CURVE_FIT_DEATHS_MULTI_COUNTRIES['raw_data_only']:
                # Validate start and end days
                day_end = end_day if (end_day > 0 and end_day < len(dates)) else len(dates)
                day_start = start_day if start_day > 0 and start_day < day_end else 0
                logging.info('Fitting to days [{}, {}]'.format(day_start, day_end))
            lowest_start_day = plot_day_start if plot_day_start < lowest_start_day else lowest_start_day
            highest_end_day = plot_day_end if plot_day_end > highest_end_day else highest_end_day

            vals_x = np.linspace(0, len(df_deaths_melted.Value), num = len(df_deaths_melted.Value))[day_start:day_end]
            vals_y = list(df_deaths_melted.Value)[day_start:day_end]
            vals_sigma = [func_sigma(y) for y in vals_y]

            vals_x_to_end = [t for t in range(plot_day_start, plot_day_end)]
            vals_y_to_end = list(df_deaths_melted.Value)[plot_day_start:plot_day_end]

            ax.plot(vals_x_to_end, vals_y_to_end, 'o', color=country['color'], label='{}'.format(country_name))
            if not SETTINGS_CURVE_FIT_DEATHS_MULTI_COUNTRIES['raw_data_only']:
                # Scipy curve fit
                try:
                    func = country['fit_func'] if 'fit_func' in country else func_fit
                    params, params_cov = scipy.optimize.curve_fit(func, xdata=vals_x, ydata=vals_y, sigma=vals_sigma)
                    vals_y_fit = [func(x, params[0], params[1], 0) for x in vals_x_to_end]
                    ax.plot(vals_x_to_end, vals_y_fit, '-', color=country['color'], label='{} (Fit - days {}-{})'.format(country_name, day_start, day_end))
                except Exception as e:
                    logging.info('Could not find curve fit, exception: {}'.format(e))
            else:
                logging.info('Just logging data')
        else:
            logging.info('Could not find given country "{}"'.format(country_name))

    ax.set_title('{} - {} - {}'.format(SETTINGS['plot']['title'], date_last.date(), plot_name), loc='center')
    ax.set_xlabel(SETTINGS['plot']['label_x'])
    ax.set_ylabel(SETTINGS['plot']['label_y'])

    # Calculate ticks and labels (=the dates on the x-axis)
    ticks = [t for t in range(plot_day_start, plot_day_end)][::2]
    #if plot_day_end not in ticks and len(ticks) % 2 == 1:
    #    ticks = ticks + [plot_day_end]
    if SETTINGS_CURVE_FIT_DEATHS_MULTI_COUNTRIES['plot_days_as_label_x']:
        labels = [d for d in ticks]
    else:
        labels = [str((date_first + datetime.timedelta(days=d)).date()) for d in ticks]
    plt.xticks(ticks=ticks, labels=labels)

    plt.legend(loc='upper left')
    plt.show()

    if SETTINGS['force_save_plot_to_file'] or SETTINGS_CURVE_FIT_DEATHS_MULTI_COUNTRIES['save_to_file']:
        save_plot(os.getcwd(), fig, SETTINGS['plot_image_path'], date_last, SETTINGS_CURVE_FIT_DEATHS_MULTI_COUNTRIES['filename'].format('-'.join(countries)))

    plt.close(fig)