Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added sql WHO_TIMESERIES from WHO_DAILY
- Loading branch information
Showing
2 changed files
with
262 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "senior-ceramic", | ||
"metadata": {}, | ||
"source": [ | ||
"## WHO Coronavirus disease (COVID-2019) 24 hour reports\n", | ||
"\n", | ||
"24 hour report from https://covid19.who.int/table\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "proud-florist", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import pycountry\n", | ||
"import requests\n", | ||
"import os\n", | ||
"import re\n", | ||
"import numpy\n", | ||
"from datetime import datetime" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "enormous-mount", | ||
"metadata": { | ||
"tags": [ | ||
"parameters" | ||
] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# papermill parameters\n", | ||
"output_folder = \"../output/\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "nominated-ecology", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"url = \"https://covid19.who.int/WHO-COVID-19-global-table-data.csv\"\n", | ||
"df = pd.read_csv(url)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "brave-class", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df[\"Date\"] = datetime.utcnow().strftime(\"%Y-%m-%d\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "conservative-albania", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df[\"Name\"] = df[\"Name\"].str.replace(\"\\[1\\]\", \"\")\n", | ||
"df[\"Name\"] = df[\"Name\"].replace(r\"(.*)\\s+\\(.*\\)\", r\"\\1\", regex=True)\n", | ||
"df[\"ISO3166_1\"] = \"\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "historic-clear", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"countries = list(df[\"Name\"].unique())\n", | ||
"for name in countries:\n", | ||
" search_name = name\n", | ||
" if name == \"Global\":\n", | ||
" continue\n", | ||
" elif name == \"The United Kingdom\":\n", | ||
" search_name = \"United Kingdom\"\n", | ||
" elif name == \"United States of America\":\n", | ||
" search_name = \"United States\"\n", | ||
" elif name == \"occupied Palestinian territory, including east Jerusalem\":\n", | ||
" search_name = \"Jerusalem\"\n", | ||
" elif name == \"Pitcairn Islands\":\n", | ||
" search_name = \"Pitcairn\"\n", | ||
" elif name == \"Côte d’Ivoire\":\n", | ||
" search_name = \"Côte d'Ivoire\"\n", | ||
" elif name == \"Democratic Republic of the Congo\":\n", | ||
" search_name = \"Congo, The Democratic Republic of the\"\n", | ||
" elif name == \"United States Virgin Islands\":\n", | ||
" search_name = \"Virgin Islands, U.S.\"\n", | ||
" \n", | ||
" \n", | ||
" try:\n", | ||
" pyc = pycountry.countries.get(name=search_name)\n", | ||
" \n", | ||
" if pyc:\n", | ||
" df[\"ISO3166_1\"].loc[name == df[\"Name\"]] = pyc.alpha_2\n", | ||
" df[\"Name\"].loc[name == df[\"Name\"]] = pyc.name\n", | ||
" continue\n", | ||
" try:\n", | ||
" pyc_list = pycountry.countries.search_fuzzy(search_name)\n", | ||
" if len(pyc_list):\n", | ||
"\n", | ||
" df[\"ISO3166_1\"].loc[name == df[\"Name\"]] = pyc_list[0].alpha_2\n", | ||
" df[\"Name\"].loc[name == df[\"Name\"]] = pyc_list[0].name\n", | ||
" continue\n", | ||
" except:\n", | ||
" pass\n", | ||
" pass\n", | ||
" except LookupError:\n", | ||
" try:\n", | ||
" pyc_list = pycountry.countries.search_fuzzy(search_name)\n", | ||
" if len(pyc_list):\n", | ||
" df[\"ISO3166_1\"].loc[name == df[\"Name\"]] = pyc_list[0].alpha_2\n", | ||
" df[\"Name\"].loc[name == df[\"Name\"]] = pyc_list[0].name\n", | ||
" continue\n", | ||
" except:\n", | ||
" pass\n", | ||
" pass" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "exact-breath", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"column_map = {\n", | ||
" \"Name\": \"COUNTRY_REGION\",\n", | ||
" \"Cases - cumulative total\": \"CASES_TOTAL\",\n", | ||
" \"Cases - cumulative total per 100000 population\": \"CASES_TOTAL_PER_100000\",\n", | ||
" \"Cases - newly reported in last 24 hours\": \"CASES\",\n", | ||
" \"Deaths - cumulative total\": \"DEATHS_TOTAL\",\n", | ||
" \"Deaths - cumulative total per 100000 population\": \"DEATHS_TOTAL_PER_100000\",\n", | ||
" \"Deaths - newly reported in last 24 hours\": \"DEATHS\",\n", | ||
" \"Transmission Classification\": \"TRANSMISSION_CLASSIFICATION\",\n", | ||
" \"Date\": \"DATE\",\n", | ||
" \"ISO3166_1\": \"ISO3166_1\"\n", | ||
"}\n", | ||
"df = df.rename(columns=column_map)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "hired-mainland", | ||
"metadata": { | ||
"scrolled": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"df.dtypes" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "mysterious-storm", | ||
"metadata": {}, | ||
"source": [ | ||
"```sql\n", | ||
"CREATE TABLE WHO_DAILY_REPORT (\n", | ||
" COUNTRY_REGION varchar,\n", | ||
" CASES_TOTAL int,\n", | ||
" CASES_TOTAL_PER_100000 float,\n", | ||
" CASES int,\n", | ||
" DEATHS_TOTAL int,\n", | ||
" DEATHS_TOTAL_PER_100000 float,\n", | ||
" DEATHS int,\n", | ||
" TRANSMISSION_CLASSIFICATION varchar,\n", | ||
" ISO3166_1 VARCHAR(2),\n", | ||
" DATE timestamp_ntz\n", | ||
")\n", | ||
"```" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "lightweight-partnership", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df.to_csv(output_folder + \"WHO_DAILY_REPORT.csv\", index=False, columns=column_map.values())" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"celltoolbar": "Tags", | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
CREATE TABLE IF NOT EXISTS WHO_TIMESERIES ( | ||
COUNTRY_REGION VARCHAR, | ||
CASES_TOTAL int, | ||
CASES_TOTAL_PER_100000 float, | ||
CASES int, | ||
DEATHS_TOTAL int, | ||
DEATHS_TOTAL_PER_100000 float, | ||
DEATHS int, | ||
TRANSMISSION_CLASSIFICATION varchar, | ||
ISO3166_1 VARCHAR(2), | ||
DATE timestamp_ntz | ||
); | ||
|
||
INSERT INTO WHO_TIMESERIES ( | ||
COUNTRY_REGION, | ||
CASES_TOTAL, | ||
CASES_TOTAL_PER_100000, | ||
CASES, | ||
DEATHS_TOTAL, | ||
DEATHS_TOTAL_PER_100000, | ||
DEATHS, | ||
TRANSMISSION_CLASSIFICATION, | ||
ISO3166_1, | ||
DATE | ||
) | ||
SELECT COUNTRY_REGION, | ||
CASES_TOTAL, | ||
CASES_TOTAL_PER_100000, | ||
CASES, | ||
DEATHS_TOTAL, | ||
DEATHS_TOTAL_PER_100000, | ||
DEATHS, | ||
TRANSMISSION_CLASSIFICATION, | ||
ISO3166_1, | ||
DATE | ||
FROM WHO_DAILY_REPORT | ||
WHERE WHO_DAILY_REPORT.DATE NOT IN ( | ||
SELECT MAX(DATE) FROM WHO_TIMESERIES.DATE | ||
) | ||
; |