# API Requests and Data Preprocessing
### Citation
Junchuan Fan, Yao Li, Kathleen Stewart, Anil R. Kommareddy, Adrianne Bradford, Samantha Chiu, Frauke Kreuter, Neta Barkay, Alyssa Bilinski, Brian Kim, Roee Eliat, Tal Galili, Daniel Haimovich, Sarah LaRocca, Stanley Presser, Katherine Morris, Joshua A Salomon, Elizabeth A. Stuart, Ryan Tibshirani, Tali Alterman Barash, Curtiss Cobb, Andres Garcia, Andi Gros, Ahmed Isa, Alex Kaess, Faisal Karim, Ofir Eretz Kedosha, Shelly Matskel, Roee Melamed, Amey Patankar, Irit Rutenberg, Tal Salmona, David Vannette (2020). COVID-19 World Symptom Survey Data API. https://covidmap.umd.edu/api.html

In [2]:
import requests
import json
import time
from collections import defaultdict

In [2]:
# Get all countries
response = requests.get('https://covidmap.umd.edu/api/country')
countries = [e['country'] for e in json.loads(response.text)['data']]

In [3]:
# Get region list
response = requests.get('https://covidmap.umd.edu/api/region')
regions = defaultdict(list)
for e in json.loads(response.text)['data']:
    regions[e['country']].append(e['region'])

## Get all indicator data for the months of October, November, December, and January

In [24]:
# Get data
date_ranges = [
    '20201001-20201031', # Oct
    '20201101-20201130', # Nov
    '20201201-20201231', # Dec
    '20210101-20210131', # Jan
]
indicators = [
    'covid',
    'flu',
    'finance',
    'contact',
    'anosmia',
    'vaccine_acpt',
    'covid_vaccine',
    'trust_fam',
    'trust_healthcare',
    'trust_who',
    'trust_govt',
    'trust_politicians',
    'twodoses',
    'concerned_sideeffects',
    'hesitant_sideeffects',
    'modified_acceptance',
    'access_wash',
    'wash_hands_24h_3to6',
    'wash_hands_24h_7orMore',
    'cmty_covid'
]

data = {} # will be used to merge all data

# Merge data from api
for r in date_ranges:
    for i in indicators:
        response = requests.get(f'https://covidmap.umd.edu/api/resources?indicator={i}&type=daily&country=all&daterange={r}')
        for e in json.loads(response.text)['data']:
            k = f"{e['iso_code']}_{e['survey_date']}"
        
            data[k] = {**data.get(k, {}), **e}

        time.sleep(0.1)
        print(f"{json.loads(response.text)['status']}: Indicator: {i}, Date Range: {r}")


print('Done')

success: Indicator: covid, Date Range: 20201001-20201031
success: Indicator: flu, Date Range: 20201001-20201031
success: Indicator: finance, Date Range: 20201001-20201031
success: Indicator: contact, Date Range: 20201001-20201031
success: Indicator: anosmia, Date Range: 20201001-20201031
success: Indicator: vaccine_acpt, Date Range: 20201001-20201031
success: Indicator: covid_vaccine, Date Range: 20201001-20201031
success: Indicator: trust_fam, Date Range: 20201001-20201031
success: Indicator: trust_healthcare, Date Range: 20201001-20201031
success: Indicator: trust_who, Date Range: 20201001-20201031
success: Indicator: trust_govt, Date Range: 20201001-20201031
success: Indicator: trust_politicians, Date Range: 20201001-20201031
success: Indicator: twodoses, Date Range: 20201001-20201031
success: Indicator: concerned_sideeffects, Date Range: 20201001-20201031
success: Indicator: hesitant_sideeffects, Date Range: 20201001-20201031
success: Indicator: modified_acceptance, Date Range: 202

In [25]:
import pprint

response = requests.get('https://api.covid19api.com/countries')
countries = json.loads(countries.text)

response = requests.get('https://api.covid19api.com/summary')
response = json.loads(response.text)

pp = pprint.PrettyPrinter(indent=2)
for e in response['Countries']:
    # reformat date and remove time
    date = e['Date']
    date = date[0:4] + date[8:10] + date[5:7]

    k = f"{e['CountryCode']}_{date}"
    if k in data:
        data[k] = {**data[k], **e}

## Some preprocessing

In [32]:
import csv
# keys = set().union(*(d.keys() for d in data.values()))
# keys = list(keys)
# keys.sort()

# Manually determine key order
keys = [
    'gid_0','iso_code','country', 'sample_size', 'survey_date', 
    'access_wash_se', 'access_wash_se_unw', 'anos_se', 
    'anos_se_unw', 'cli_se', 'cli_se_unw', 'community_cli_se', 
    'community_cli_se_unw', 'dc_se_unw', 'hf_se', 'hf_se_unw', 'ili_se', 'ili_se_unw',
    'mc_se', 'pct_access_wash', 'pct_access_wash_unw', 'pct_community_cli', 'pct_community_cli_unw', 'pct_trust_fam', 
    'pct_trust_fam_unw', 'pct_trust_govt', 'pct_trust_govt_unw', 'pct_trust_healthcare', 'pct_trust_healthcare_unw', 
    'pct_trust_politicians', 'pct_trust_politicians_unw', 'pct_trust_who', 'pct_trust_who_unw', 'pct_wash_hands_24h_3to6', 
    'pct_wash_hands_24h_3to6_unw', 'pct_wash_hands_24h_7ormore', 'pct_wash_hands_24h_7ormore_unw', 'percent_anos', 
    'percent_anos_unw', 'percent_cli', 'percent_cli_unw', 'percent_dc', 'percent_dc_unw', 'percent_hf', 'percent_hf_unw', 
    'percent_ili', 'percent_ili_unw', 'percent_vu', 'percent_vu_unw', 'trust_fam_se', 
    'trust_fam_se_unw', 'trust_govt_se', 'trust_govt_se_unw', 'trust_healthcare_se', 'trust_healthcare_se_unw', 
    'trust_politicians_se', 'trust_politicians_se_unw', 'trust_who_se', 'trust_who_se_unw', 'vu_se', 'vu_se_unw',
    'wash_hands_24h_3to6_se', 'wash_hands_24h_3to6_se_unw', 'wash_hands_24h_7ormore_se', 'wash_hands_24h_7ormore_se_unw', 'hesitant_sideeffects_se_unw', 'pct_hesitant_sideeffects', 'pct_concerned_sideeffects_unw', 'concerned_sideeffects_se', 'pct_concerned_sideeffects', 'concerned_sideeffects_se_unw', 'hesitant_sideeffects_se', 'pct_hesitant_sideeffects_unw', 'covid_vaccine_se', 'pct_covid_vaccine_unw', 'pct_covid_vaccine', 'covid_vaccine_se_unw', 'modified_acceptance_se', 'pct_modified_acceptance', 'pct_modified_acceptance_unw', 'modified_acceptance_se_unw', 'twodoses_se', 'pct_twodoses_unw', 'pct_twodoses', 'twodoses_se_unw'
]
with open('compiled.csv', 'w', newline='') as outfile:
    writer = csv.DictWriter(outfile, fieldnames=keys)
    writer.writeheader()
    writer.writerows(data.values())