# Healthsites.io bulk data import

This notebook imports a bulk data export from [Healthsites.io](healthsites.io).

In [None]:
import pandas as pd
import csv

In [None]:
# papermill parameters
output_folder = "../output/"

In [None]:
df = pd.read_csv("https://s3-us-west-1.amazonaws.com/starschema.covid/Healthsites_io_World_2020-03-19.csv", encoding="ISO-8859-1")

## Filter to columns and rename columns

We are only interested in `lat`, `long`, type (`healthcare` -> `healthcare_provider_type`), `name`, `operator`, bed count (`beds`), staffing (`staff_doct` -> `staff_medical` and `staff_nurs` -> `staff_nursing`).

We are also excluding 

* indirect medical specialities (e.g. `diagnostics`), 
* chronic care (e.g. dialysis), 
* mental health provision incl. drug rehabilitation, 
* labour and delivery,
* specialities that do not play a role in COVID-19 response (dentistry, optometry, physiotherapy &c.), and 
* alternative medicine and paramedicine.

Finally, we map various local names and different spellings into the following categories:

* `primary_care` (GPs, PCPs, paediatrics),
* `secondary_care`,
* `hospital`s incl. polyclinics,
* `emergency_and_urgent_care`,
* `health_center`s, incl. multispeciality care.

In [None]:
cols_to_keep = ["HealthX", "HealthY", "healthcare", "name", "operator", "beds", "staff_doct", "staff_nurs"]
cols_name_mapping = {"HealthX": "Long",
                     "HealthY": "Lat",
                     "healthcare": "Healthcare_Provider_Type",
                     "name": "Name",
                     "operator": "Operator",
                     "beds": "Beds",
                     "staff_doct": "Staff_Medical",
                     "staff_nurs": "Staff_Nursing"}
healthcare_provision_mapping = {"hospital": "hospital",
                                "pharmacy": "pharmacy",
                                "centre": "health_center",
                                "clinic": "secondary_care",
                                "doctor": "primary_care",
                                "yes": "primary_care",
                                "general_practitioner": "primary_care",
                                "several_doctors": "primary_care",
                                "urgent_care": "emergency_and_urgent_care",
                                "health_center": "health_center",
                                "Polyclinic": "hospital",
                                "Health Centre": "health_center",
                                "centre_de_santé": "health_center",
                                "general_medicine": "primary_care",
                                "Hospital": "hospital",
                                "group": "health_center",
                                "doctors": "health_center",
                                "ambulatorio": "health_center",
                                "primary hospital": "hospital",
                                "pediatrics": "primary_care",
                                "general;paediatrics": "primary_care",
                                "doctor; pharmacy": "primary_care",
                                "polyclinic": "hospital",
                                "heath center": "health_centr",
                                "paramedic": "emergency_and_urgent_care",
                                "doctor;midwife": "primary_care",
                                "hospital;pharmacy": "hospital",
                                "family_practice": "primary_care",
                                "nurse": "primary_care",
                                "neurologist": "secondary_care",
                                "doctor;dentist;physiotherapist;psychotherapist": "health_center",
                                "geriatric": "primary_care",
                                "laboratory;doctor": "health_center",
                                "surgery": "primary_care",
                                "doctor;dentist;physiotherapist": "health_cente",
                                "internist": "primary_care",
                                "physician_assistant": "emergency_and_urgent_care",
                                "general": "primary_care",
                                "doctor;nurse": "primary_care",
                                "generalist": "primary_caree",
                                "clinik": "hospital",
                                "clinic;dentist;doctor;pharmacy": "health_center",
                                "pharmacy;laboratory;doctor": "health_center",
                                "nursing": "primary_care",
                                "pediatrician": "primary_care",
                                "General Physician": "primary_care",
                                "centre;doctor": "health_center",
                                "polyambulatory": "health_center",
                                "paediatrics": "primary_care",
                                "clinic;doctors;dialysis": "health_center",
                                "dispensary": "pharmacy",
                                "doctor;hospital": "hospital",
                                "center": "health_center",
                                "otolaryngologist": "secondary_care",
                                "first_aid": "emergency_and_urgent_care",
                                "clinic;laboratory;physiotherapist;occupational_therapist": "health_center",
                                "doctor;pharmacy": "primary_care",
                                "pharmacy;doctor": "primary_care",
                                "doctor;pharmacy;hospital;dentist;laboratory;birthing_center": "health_center",
                                "doctor;pharmacy;dentist;laboratory": "health_center",
                                "doctor;pharmacy;hospital;dentist;physiotherapist;alternative;laboratory;optometrist;birthing_center": "health_center",
                                "clinic;doctor": "health_center",
                                "Calindagan Health Center": "health_center",
                                "doctor+pharmacy": "health_center",
                                "health_post": "primary_care",
                                "emergency_ward": "emergency_and_urgent_care",
                                "laboratory, pharmacy, clinic": "health_center",
                                "dentist;doctors": "health_center",
                                "nephrology": "secondary_care"}

In [None]:
df = df.filter(cols_to_keep, axis="columns")
df.rename(columns=cols_name_mapping, inplace=True)

In [None]:
df["Healthcare_Provider_Type"] = df["Healthcare_Provider_Type"].replace(healthcare_provision_mapping)

In [None]:
df.to_csv(output_folder + "HS_BULK_DATA.csv", index=False, quoting=csv.QUOTE_NONNUMERIC)