## WHO Coronavirus disease (COVID-2019) 24 hour reports

24 hour report from https://covid19.who.int/table


In [None]:
import pandas as pd
import pycountry
import requests
import os
import re
import numpy
from datetime import datetime

In [None]:
# papermill parameters
output_folder = "../output/"

In [None]:
url = "https://covid19.who.int/WHO-COVID-19-global-table-data.csv"
df = pd.read_csv(url)

In [None]:
df["Date"] = datetime.utcnow().strftime("%Y-%m-%d")

In [None]:
df["Name"] = df["Name"].str.replace("\[1\]", "")
df["Name"] = df["Name"].replace(r"(.*)\s+\(.*\)", r"\1", regex=True)
df["ISO3166_1"] = ""

In [None]:
countries = list(df["Name"].unique())
for name in countries:
    search_name = name
    if name == "Global":
        continue
    elif name == "The United Kingdom":
        search_name = "United Kingdom"
    elif name == "United States of America":
        search_name = "United States"
    elif name == "occupied Palestinian territory, including east Jerusalem":
        search_name = "Jerusalem"
    elif name == "Pitcairn Islands":
        search_name = "Pitcairn"
    elif name == "Côte d’Ivoire":
        search_name = "Côte d'Ivoire"
    elif name == "Democratic Republic of the Congo":
        search_name = "Congo, The Democratic Republic of the"
    elif name == "United States Virgin Islands":
        search_name = "Virgin Islands, U.S."
    
    
    try:
        pyc = pycountry.countries.get(name=search_name)
        
        if pyc:
            df["ISO3166_1"].loc[name == df["Name"]] = pyc.alpha_2
            df["Name"].loc[name == df["Name"]] = pyc.name
            continue
        try:
            pyc_list = pycountry.countries.search_fuzzy(search_name)
            if len(pyc_list):

                df["ISO3166_1"].loc[name == df["Name"]] = pyc_list[0].alpha_2
                df["Name"].loc[name == df["Name"]] = pyc_list[0].name
                continue
        except:
            pass
        pass
    except LookupError:
        try:
            pyc_list = pycountry.countries.search_fuzzy(search_name)
            if len(pyc_list):
                df["ISO3166_1"].loc[name == df["Name"]] = pyc_list[0].alpha_2
                df["Name"].loc[name == df["Name"]] = pyc_list[0].name
                continue
        except:
            pass
        pass

In [None]:
column_map = {
    "Name": "COUNTRY_REGION",
    "Cases - cumulative total": "CASES_TOTAL",
    "Cases - cumulative total per 100000 population": "CASES_TOTAL_PER_100000",
    "Cases - newly reported in last 24 hours": "CASES",
    "Deaths - cumulative total": "DEATHS_TOTAL",
    "Deaths - cumulative total per 100000 population": "DEATHS_TOTAL_PER_100000",
    "Deaths - newly reported in last 24 hours": "DEATHS",
    "Transmission Classification": "TRANSMISSION_CLASSIFICATION",
    "Date": "DATE",
    "ISO3166_1": "ISO3166_1"
}
df = df.rename(columns=column_map)

In [None]:
df.dtypes

```sql
CREATE TABLE WHO_DAILY_REPORT (
    COUNTRY_REGION varchar,
    CASES_TOTAL int,
    CASES_TOTAL_PER_100000 float,
    CASES int,
    DEATHS_TOTAL int,
    DEATHS_TOTAL_PER_100000 float,
    DEATHS int,
    TRANSMISSION_CLASSIFICATION varchar,
    ISO3166_1 VARCHAR(2),
    DATE timestamp_ntz
)
```

In [None]:
df.to_csv(output_folder + "WHO_DAILY_REPORT.csv", index=False, columns=column_map.values())