## WHO Coronavirus disease (COVID-2019) situation reports

PDF Reports from https://www.who.int/emergencies/diseases/novel-coronavirus-2019/situation-reports in Tabular format.


In [None]:
import tabula
import pandas as pd
import pycountry
from datetime import datetime

In [None]:
# papermill parameters
output_folder = "../output/"
situation_report = '20200321-sitrep-61-covid-19'
date = '2020-03-21'

In [None]:
report_url = "https://www.who.int/docs/default-source/coronaviruse/situation-reports/"+ situation_report +".pdf"
all_tables = tabula.read_pdf(report_url, pages='all', pandas_options={'header': None})


In [None]:
country_data = pd.DataFrame([],columns=[])

# Remove all tables without 7 columns
for df in all_tables:
    if len(df.columns) == 7:
        country_data = pd.concat([country_data,df],ignore_index=True, sort=True)

# Remove columns with null country or cases 
country_data = country_data[country_data[0].notnull()]
country_data = country_data[country_data[1].notnull()]
# header row
country_data = country_data[country_data[6]!='reported case']

# Rename columns
country_data = country_data.rename(columns={0:'Country',1:'Total_Cases',2:'Cases_New',3:'Deaths',4:'Deaths_New',5:'Transmission_Classification',6:'Days_Since_Last_Reported_Case'})


In [None]:
changed_names = {
    "The United Kingdom": "United Kingdom",
    "Serbia††": "Serbia",
    "Iran (Islamic Republic of)": "Iran",
    "occupied Palestinian territory": "Palestine",
    "Venezuela (Bolivarian Republic of)": "Venezuela",
    "Bolivia (Plurinational State of)": "Bolivia",
    "Côte d’Ivoire": "Côte d'Ivoire"
}

country_data["Country"] = country_data["Country"].str.replace('\r', ' ')
country_data["Country"] = country_data["Country"].replace(changed_names)

def resolve_iso3166_1_row(row):
    country = pycountry.countries.search_fuzzy( row["Country"])[0]
    row["ISO3166-1"] = country.alpha_2
    row['Country/Region'] = country.name
    return row
    

data = country_data.apply(resolve_iso3166_1_row, axis="columns")
        


In [None]:
data

## Adding Metadata

Before we save the file locally, we add the `Last_Update_Date` in `UTC` time zone.

In addition, we add report name and location.

In [None]:
data["Date"] = date
data["Situation_Report_name"] = situation_report
data["Situation_Report_URL"] = report_url
data["Last_Update_Date"] = datetime.utcnow()


In [None]:
data.to_csv(output_folder + "WHO_SITUATION_REPORTS.csv", index=False)