In [None]:
# enable only on demand; will disable all normal print() output
# %load_ext pycodestyle_magic
# %pycodestyle_on

In [None]:
%matplotlib notebook
#%matplotlib widget

import copy
import json

from datetime import datetime
from math import ceil
from os import environ, path

import numpy as np
import requests

from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
from pandas import DataFrame as PanDataFrame

In [None]:
out_dir = path.join(environ.get("HOME"), "Chaos", "DL")
out_file_name = "cov19"

In [None]:
euro = {"at": "Austria", "be": "Belgium", "ba": "Bosnia and Herzegovina",
        "bg": "Bulgaria", "hr": "Croatia", "cy": "Cyprus", "cz": "Czechia",
        "dk": "Denmark", "ee": "Estonia", "fi": "Finland", "fr": "France",
        "de": "Germany", "gr": "Greece", "hu": "Hungary", "ie": "Ireland",
        "it": "Italy", "lv": "Latvia", "li": "Liechtenstein",
        "lt": "Lithuania", "lu": "Luxembourg", "nl": "Netherlands",
        "no": "Norway", "pl": "Poland", "pt": "Portugal", "ro": "Romania",
        "rs": "Serbia", "sk": "Slovakia", "si": "Slovenia", "es": "Spain",
        "se": "Sweden", "ch": "Switzerland", "gb": "United Kingdom"}

In [None]:
regions = {"amro": ["us"],
           "euro": euro.keys()}

In [None]:
population = {
    "at": 8901000, "be": 11431000, "ba": 3531000, "bg": 6951000,
    "hr": 4190000, "cy": 1189000, "cz": 10637000, "dk": 5822000,
    "ee": 1323000, "fi": 5517000, "fr": 66993000, "de": 83166000,
    "gr": 10277000, "hu": 9773000, "ie": 4761000, "it": 60260000,
    "lv": 1934000, "li": 38000, "lt": 2794000, "lu": 626000,
    "nl": 17290000, "no": 5367000, "pl": 38386000, "pt": 10600000,
    "ro": 20121000, "rs": 7057000, "sk": 5450000, "si": 2064000,
    "es": 47100000, "se": 10327000, "ch": 8601000, "gb": 66435000,
    "us": 328000000}

In [None]:
case_values = ["cases_per_day_confirmed", "cases_cumulative",
               "cases_cumulative_percent_population",
               "deaths_per_day_confirmed", "deaths_cumulative",
               "deaths_cumulative_permil_population"]
cases_day_description = {"key": ["unix_timestamp"],
                         "values": case_values}
full_data = {"cases_per_day_data_description": cases_day_description,
             "countries": {}}

In [None]:
# data dimensions; (1) timestamp, (2), region, (3) deaths, (4) cumulative deaths,
# (5) deaths last 7 days, (6) Deaths Last 7 Days Change, (7) Deaths Per Million, (8) Confirmed,
# (9) Cumulative confirmed, (10) Cases Last 7 Days, (11) Cases Last 7 Days Change,
# (12) Cases Per Million
furl = "https://covid19.who.int/page-data/region"

In [None]:
# Fetch and convert data from all regions of interest
for reg, reg_items in regions.items():
    for country_id in reg_items:
        country_name = "United States"
        region = "america"
        if country_id != "us":
            country_name = euro[country_id]
            region = "euro"

        curr_url = f"{furl}/{reg}/country/{country_id}/page-data.json"
        res = requests.get(curr_url)
        print(f"Fetching country: '{country_id}/{country_name}' at \n\t{curr_url}")
        data = json.loads(res.text)

        # Reduce to "timestamp: [confirmed, confirmed_cumulative,
        #                        case_cumulative_percent_population,
        #                        deaths, deaths_cumulative,
        #                        death_cumulative_permil_population]"
        curr_country = {}
        curr_data = data["result"]["pageContext"]["countryGroup"]["data"]["rows"]
        curr_perc_pop = population[country_id]/100
        curr_perm_pop = population[country_id]/1000
        for i in curr_data:
            case_perc_pop = round(i[8]/curr_perc_pop, 3)
            death_perm_pop = round(i[3]/curr_perm_pop, 3)

            curr_country[i[0]] = [i[7], i[8], case_perc_pop, i[2], i[3], death_perm_pop]

            if i[7] < 0:
                print(f"Negativ case value ({country_name}) in '{i[7]}', setting to 0.")
                curr_country[i[0]][0] = 0

        print(f"\tLatest cases: {curr_country[list(curr_country.keys())[-1]]}")
        full_data["countries"][country_id] = {"country_name": country_name,
                                              "region": region,
                                              "population": population[country_id],
                                              "cases": curr_country}

In [None]:
# Save data structure to json file
fn = path.join(out_dir, (f"{out_file_name}.json")) 
print(f"\nWriting to file {fn}")
with open(fn, "w", encoding="utf-8") as fp:
    json.dump(full_data, fp)

In [None]:
# Prepare data for Europe

# calc euro population
euro_pop = sum(population.values()) - population["us"]

# congregate data; get euro sum
euro_cases = {"country_name": "EU",
              "population": euro_pop,
              "cases_total": [],
              "cases": {}}

tmp_data = copy.deepcopy(full_data)
for i in tmp_data["countries"]:
    if tmp_data["countries"][i]["region"] != "euro":
        continue

    curr_cases = tmp_data["countries"][i]["cases"]
    curr_list = curr_cases[list(curr_cases.keys())[-1]]

    # Congregate latest total euro cases
    if not euro_cases["cases_total"]:
        euro_cases["cases_total"] = curr_list
    else:
        euro_cases["cases_total"] = [sum(x) for x in zip(euro_cases["cases_total"], curr_list)]

    # Congregate daily total euro cases
    if not euro_cases["cases"]:
        euro_cases["cases"] = curr_cases
    else:
        for j in curr_cases:
            euro_cases["cases"][j] = [sum(x) for x in zip(euro_cases["cases"][j], curr_cases[j])]

# Fix euro percentages
curr_perc_pop = euro_cases["population"] / 100
curr_perm_pop = euro_cases["population"] / 1000

# Euro percentage cases total
euro_cases["cases_total"][2] = round(euro_cases["cases_total"][1] / curr_perc_pop, 3)
euro_cases["cases_total"][5] = round(euro_cases["cases_total"][4] / curr_perm_pop, 3)

# Euro percentages per day
for i in euro_cases["cases"]:
    euro_cases["cases"][i][2] = round(euro_cases["cases"][i][1] / curr_perc_pop, 3)
    euro_cases["cases"][i][5] = round(euro_cases["cases"][i][4] / curr_perm_pop, 3)

In [None]:
# Comparison Europe, US
cases_dates = []
eu_cases_confirmed = []

for c_date, c_item in euro_cases["cases"].items():
    cases_dates.append(datetime.fromtimestamp(c_date/1000))
    eu_cases_confirmed.append(c_item[0])

us_cases = full_data["countries"]["us"]["cases"]
us_cases_confirmed = []

# Dirty fix to compare eu to us (us sometimes is a day ahead in terms of numbers.)
last_euro_date = cases_dates[-1]
for c_date, c_item in us_cases.items():
    us_cases_confirmed.append(c_item[0])
    if last_euro_date == datetime.fromtimestamp(c_date/1000):
        break

In [None]:
def eu_us_compare_plot(plot_date, plot_eu, plot_us):
    # Prepare plot
    ax = plt.subplot(111)
    ax.set_title("Per day Covid19 cases")
    ax.set_xlabel("Date")

    ax.plot(plot_date, plot_eu, label="European zone")
    ax.plot(plot_date, plot_us, label="United States")
    ax.legend(loc='upper left', fontsize='xx-small')
    plt.show()

eu_us_compare_plot(cases_dates, eu_cases_confirmed, us_cases_confirmed)

In [None]:
def euro_country_plot(plot_title, num_day=None):
    ax = plt.subplot(111)

    marker_idx = -1
    for curr in full_data["countries"]:
        ctry = full_data["countries"][curr]
        if ctry["region"] != "euro":
            continue

        curr_confirmed = []
        ctry_data = ctry["cases"]
        for case_date in ctry_data:
            curr_confirmed.append(ctry_data[case_date][0])

        # Handle individual markers
        marker_idx = marker_idx + 1

        country = ctry["country_name"]
        if num_day:
            ax.plot(cases_dates[-1*num_day:-1], curr_confirmed[-1*num_day:-1],
                    label=country, marker=markers_available[marker_idx])
        else:
            ax.plot(cases_dates, curr_confirmed, label=country,
                    marker=markers_available[marker_idx])

    ax.set_title(plot_title)
    ax.set_xlabel("Date")
    ax.legend(loc='upper left', fontsize='xx-small')
    plt.show()

In [None]:
# Fetch list of legend markers
markers_available = list(Line2D.markers.keys())

# Europe countries per day plot
euro_country_plot("Per day cases euro countries")

In [None]:
# Europe last days
last_days = 30
title = f"Per day cases euro countries; last {last_days} days"
euro_country_plot(title, last_days)

In [None]:
# display current numpy printoptions
print(np.get_printoptions())

# set precision to 3
np.set_printoptions(precision=3)

In [None]:
def prepare_country_data(ctry_code):
    ctry_data = []
    ctry = full_data["countries"][ctry_code]
    ctry_name = ctry["country_name"]

    for case_date in ctry["cases"]:
        ctry_data.append(ctry["cases"][case_date][0])

    return ctry_data, ctry_name

In [None]:
# Individual country plot
country_code = "at"
country_data, country_name = prepare_country_data(country_code)

ax = plt.subplot(111)
ax.plot(cases_dates, country_data)
ax.set_title(f"Per day cases in {country_code}/{country_name}")
ax.set_xlabel("Date")

plt.show()

In [None]:
# Individual country bar plot
country_code = "fr"
country_data, country_name = prepare_country_data(country_code)

_, ax = plt.subplots()
ax.bar(cases_dates, country_data, 0.8)
ax.set_title(f"Per day cases in {country_code}/{country_name}")
ax.set_xlabel("Dates")

plt.show()

In [None]:
# Format large numbers with comma as 1000 separator
def format_country_stats(cases, ctry_pop):
    curr_stat = copy.deepcopy(cases)
    curr_pop = copy.deepcopy(ctry_pop)
    format_pop = f'{curr_pop:,}'
    curr_case_sum = f'{curr_stat[1]:,}'
    curr_case_per = f'{curr_stat[2]:,}'
    curr_death_sum = f'{curr_stat[4]:,}'
    curr_death_per = f'{curr_stat[5]:,}'
    curr_mortality = f'{round((curr_stat[4]/(curr_stat[1]/100)), 3):,}'

    return [format_pop, curr_case_sum, curr_case_per, curr_death_sum,
            curr_death_per, curr_mortality, curr_pop, curr_stat[1],
            curr_stat[4]]

In [None]:
# Different stats
use_date = list(euro_cases["cases"].keys())[-1]

names = ["Europe"]
sum_only = [format_country_stats(euro_cases["cases"][use_date],
                                 euro_cases["population"])]

tmp_cases = copy.deepcopy(full_data["countries"])
for i in tmp_cases:
    sum_only.append(format_country_stats(tmp_cases[i]["cases"][use_date],
                                         tmp_cases[i]["population"]))
    names.append(tmp_cases[i]["country_name"])

- morbidity ... chance to get sick dependent on the population
- mortality ... chance to die dependent on the population (in our case ignoring the time component of mortality)
- lethality ... chance to die dependent on the sick population

In [None]:
# Using pandas to print table
d_sum_only = {}

idx = 0
for curr_list in sum_only:
    d_sum_only[names[idx]] = curr_list
    idx = idx + 1

col_labels = ["population", "sum_cases", "morbidity [% pop]",
              "sum_deaths", "mortality [‰ pop]", "lethality",
              "sort_pop", "sort_case", "sort_dead"]
sum_frame = PanDataFrame(d_sum_only, col_labels)

# 0 ... pop, 1 ... sum, 2 ... morbid, 3 ... sum_d, 4 ... mort,
# 5 ... leth, 6 ... pop_sort, 7 ... case_sort, 8 ... dead_sort
hide_col = ["sort_pop", "sort_case", "sort_dead"]
sum_frame.transpose().sort_values(by=col_labels[5],
                                  ascending=False).style.hide_columns(hide_col)

In [None]:
# calc sum infections last seven days
days = list(full_data["countries"]["at"]["cases"].keys())[-8:-1]

curr_data = {}
curr_plot = {}
for ccode in full_data["countries"]:
    access_data = full_data["countries"][ccode]
    curr_cases = {}

    sum_cases = 0
    for i in days:
        curr_cases[i] = access_data["cases"][i]
        sum_cases = sum_cases + access_data["cases"][i][0]

    perc_cases = round(sum_cases / (access_data["population"]/100), 3)
    curr_data[ccode] = {"country_name": access_data["country_name"],
                        "population": f'{access_data["population"]:,}',
                        "sum_cases": f'{sum_cases:,}',
                        "perc_cases": perc_cases,
                        "cases": copy.deepcopy(curr_cases)}
    curr_plot[access_data["country_name"]] = [f'{access_data["population"]:,}',
                                              f'{sum_cases:,}',
                                              perc_cases, sum_cases]

In [None]:
# Add seven days info table
day_col_labels = ["population", "cases last 7 days",
                  "morbidity [% pop]", "sort_cases"]

sum_frame = PanDataFrame(curr_plot, day_col_labels)

sum_frame.transpose().sort_values(by=[day_col_labels[2]],
                                  ascending=False).style.hide_columns(["sort_cases"])

In [None]:
# Per day percent plot to properly compare increase rates per citizen
def morbidity_plot(data_dict):
    plt_ax = plt.subplot(111)

    marker_idx = -1

    for ctry_label in data_dict["countries"]:
        ctry = data_dict["countries"][ctry_label]
        if ctry["region"] != "euro":
            continue

        curr_per_day_perc = []
        curr_pop = ctry["population"]
        ctry_data = ctry["cases"]
        for case_date in ctry_data:
            curr_val = round(ctry_data[case_date][0]/(curr_pop/1000), 3)
            curr_per_day_perc.append(curr_val)

        # Handle individual markers
        marker_idx = marker_idx + 1

        country = ctry["country_name"]
        plt_ax.plot(cases_dates[-30:-1],
                curr_per_day_perc[-30:-1],
                label=country,
                marker=markers_available[marker_idx])

    plt_ax.set_title("Per day morbidity (% pop) increase euro countries (Last 30 days)")
    plt_ax.set_xlabel("Date")
    plt_ax.legend(loc='upper left', fontsize='xx-small')
    plt.show()

morbidity_plot(full_data)

In [None]:
def plots_comparison(data_dict, last_date, num_days=None, use_bar=False):
    ctry_num = len(data_dict["countries"])/2
    _, axs = plt.subplots(int(ceil(ctry_num)), 2, figsize=(9, 40))

    cnt = 0
    cntrow = 0
    for ctry in data_dict["countries"]:
        day_data = []
        for day in data_dict["countries"][ctry]["cases"]:
            if ctry == "us" and last_date < datetime.fromtimestamp(day/1000):
                continue
            day_data.append(data_dict["countries"][ctry]["cases"][day][0])

        curr_dates = cases_dates
        if num_days:
            curr_dates = cases_dates[-1*num_days:-1]
            day_data = day_data[-1*num_days:-1]

        if use_bar:
            axs[cntrow, cnt].bar(curr_dates, day_data)
        else:
            axs[cntrow, cnt].plot(curr_dates, day_data)
        axs[cntrow, cnt].set_title(data_dict["countries"][ctry]["country_name"])
        cntrow = cntrow if cnt < 1 else cntrow + 1
        cnt = cnt + 1 if cnt < 1 else 0

    plt.tight_layout(pad=4.0)

In [None]:
# Plot all country comparison
plots_comparison(full_data, last_euro_date)

In [None]:
# Plot all country comparison
plots_comparison(full_data, last_euro_date, 60, True)

In [None]:
# Add last 60 days plot cases normalized to country population