Skip to content

Commit

Permalink
added sql WHO_TIMESERIES from WHO_DAILY
Browse files Browse the repository at this point in the history
  • Loading branch information
piglerp committed Feb 26, 2021
1 parent 8c15304 commit b147f74
Show file tree
Hide file tree
Showing 2 changed files with 262 additions and 0 deletions.
222 changes: 222 additions & 0 deletions notebooks/WHO_DAILY_REPORT.ipynb
@@ -0,0 +1,222 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "senior-ceramic",
"metadata": {},
"source": [
"## WHO Coronavirus disease (COVID-2019) 24 hour reports\n",
"\n",
"24 hour report from https://covid19.who.int/table\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "proud-florist",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import pycountry\n",
"import requests\n",
"import os\n",
"import re\n",
"import numpy\n",
"from datetime import datetime"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "enormous-mount",
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"# papermill parameters\n",
"output_folder = \"../output/\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "nominated-ecology",
"metadata": {},
"outputs": [],
"source": [
"url = \"https://covid19.who.int/WHO-COVID-19-global-table-data.csv\"\n",
"df = pd.read_csv(url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "brave-class",
"metadata": {},
"outputs": [],
"source": [
"df[\"Date\"] = datetime.utcnow().strftime(\"%Y-%m-%d\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "conservative-albania",
"metadata": {},
"outputs": [],
"source": [
"df[\"Name\"] = df[\"Name\"].str.replace(\"\\[1\\]\", \"\")\n",
"df[\"Name\"] = df[\"Name\"].replace(r\"(.*)\\s+\\(.*\\)\", r\"\\1\", regex=True)\n",
"df[\"ISO3166_1\"] = \"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "historic-clear",
"metadata": {},
"outputs": [],
"source": [
"countries = list(df[\"Name\"].unique())\n",
"for name in countries:\n",
" search_name = name\n",
" if name == \"Global\":\n",
" continue\n",
" elif name == \"The United Kingdom\":\n",
" search_name = \"United Kingdom\"\n",
" elif name == \"United States of America\":\n",
" search_name = \"United States\"\n",
" elif name == \"occupied Palestinian territory, including east Jerusalem\":\n",
" search_name = \"Jerusalem\"\n",
" elif name == \"Pitcairn Islands\":\n",
" search_name = \"Pitcairn\"\n",
" elif name == \"Côte d’Ivoire\":\n",
" search_name = \"Côte d'Ivoire\"\n",
" elif name == \"Democratic Republic of the Congo\":\n",
" search_name = \"Congo, The Democratic Republic of the\"\n",
" elif name == \"United States Virgin Islands\":\n",
" search_name = \"Virgin Islands, U.S.\"\n",
" \n",
" \n",
" try:\n",
" pyc = pycountry.countries.get(name=search_name)\n",
" \n",
" if pyc:\n",
" df[\"ISO3166_1\"].loc[name == df[\"Name\"]] = pyc.alpha_2\n",
" df[\"Name\"].loc[name == df[\"Name\"]] = pyc.name\n",
" continue\n",
" try:\n",
" pyc_list = pycountry.countries.search_fuzzy(search_name)\n",
" if len(pyc_list):\n",
"\n",
" df[\"ISO3166_1\"].loc[name == df[\"Name\"]] = pyc_list[0].alpha_2\n",
" df[\"Name\"].loc[name == df[\"Name\"]] = pyc_list[0].name\n",
" continue\n",
" except:\n",
" pass\n",
" pass\n",
" except LookupError:\n",
" try:\n",
" pyc_list = pycountry.countries.search_fuzzy(search_name)\n",
" if len(pyc_list):\n",
" df[\"ISO3166_1\"].loc[name == df[\"Name\"]] = pyc_list[0].alpha_2\n",
" df[\"Name\"].loc[name == df[\"Name\"]] = pyc_list[0].name\n",
" continue\n",
" except:\n",
" pass\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "exact-breath",
"metadata": {},
"outputs": [],
"source": [
"column_map = {\n",
" \"Name\": \"COUNTRY_REGION\",\n",
" \"Cases - cumulative total\": \"CASES_TOTAL\",\n",
" \"Cases - cumulative total per 100000 population\": \"CASES_TOTAL_PER_100000\",\n",
" \"Cases - newly reported in last 24 hours\": \"CASES\",\n",
" \"Deaths - cumulative total\": \"DEATHS_TOTAL\",\n",
" \"Deaths - cumulative total per 100000 population\": \"DEATHS_TOTAL_PER_100000\",\n",
" \"Deaths - newly reported in last 24 hours\": \"DEATHS\",\n",
" \"Transmission Classification\": \"TRANSMISSION_CLASSIFICATION\",\n",
" \"Date\": \"DATE\",\n",
" \"ISO3166_1\": \"ISO3166_1\"\n",
"}\n",
"df = df.rename(columns=column_map)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "hired-mainland",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df.dtypes"
]
},
{
"cell_type": "markdown",
"id": "mysterious-storm",
"metadata": {},
"source": [
"```sql\n",
"CREATE TABLE WHO_DAILY_REPORT (\n",
" COUNTRY_REGION varchar,\n",
" CASES_TOTAL int,\n",
" CASES_TOTAL_PER_100000 float,\n",
" CASES int,\n",
" DEATHS_TOTAL int,\n",
" DEATHS_TOTAL_PER_100000 float,\n",
" DEATHS int,\n",
" TRANSMISSION_CLASSIFICATION varchar,\n",
" ISO3166_1 VARCHAR(2),\n",
" DATE timestamp_ntz\n",
")\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "lightweight-partnership",
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(output_folder + \"WHO_DAILY_REPORT.csv\", index=False, columns=column_map.values())"
]
}
],
"metadata": {
"celltoolbar": "Tags",
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
40 changes: 40 additions & 0 deletions snowflake/sql/WHO_DAILY_REPORT_timeseries.sql
@@ -0,0 +1,40 @@
CREATE TABLE IF NOT EXISTS WHO_TIMESERIES (
COUNTRY_REGION VARCHAR,
CASES_TOTAL int,
CASES_TOTAL_PER_100000 float,
CASES int,
DEATHS_TOTAL int,
DEATHS_TOTAL_PER_100000 float,
DEATHS int,
TRANSMISSION_CLASSIFICATION varchar,
ISO3166_1 VARCHAR(2),
DATE timestamp_ntz
);

INSERT INTO WHO_TIMESERIES (
COUNTRY_REGION,
CASES_TOTAL,
CASES_TOTAL_PER_100000,
CASES,
DEATHS_TOTAL,
DEATHS_TOTAL_PER_100000,
DEATHS,
TRANSMISSION_CLASSIFICATION,
ISO3166_1,
DATE
)
SELECT COUNTRY_REGION,
CASES_TOTAL,
CASES_TOTAL_PER_100000,
CASES,
DEATHS_TOTAL,
DEATHS_TOTAL_PER_100000,
DEATHS,
TRANSMISSION_CLASSIFICATION,
ISO3166_1,
DATE
FROM WHO_DAILY_REPORT
WHERE WHO_DAILY_REPORT.DATE NOT IN (
SELECT MAX(DATE) FROM WHO_TIMESERIES.DATE
)
;

0 comments on commit b147f74

Please sign in to comment.