# Using the Bing API to Collect Coronavirus Statistics

This is the backend for "Bend the Curve" - https://github.com/shomilj/Bend-the-Curve-iOS.

In [1]:
import requests, json
import numpy as np

To be honest, I'm not sure if these headers are necessary! Regardless, I'm not sure if these are official API's – I pulled them from https://www.bing.com/covid.

In [4]:
headers = {
    'authority': 'www.bing.com',
    'pragma': 'no-cache',
    'cache-control': 'no-cache',
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
    'sec-fetch-dest': 'empty',
    'accept': '*/*',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-mode': 'cors',
    'referer': 'https://www.bing.com/covid',
    'accept-language': 'en-US,en;q=0.9',
}

params = (
    ('IG', '140FBA510971427188AF4078EE911038'),
)

metadata = requests.get('https://www.bing.com/covid/data', headers=headers, params=params).json()
headers = {
    'authority': 'www.bing.com',
    'pragma': 'no-cache',
    'cache-control': 'no-cache',
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
    'sec-fetch-dest': 'empty',
    'accept': '*/*',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-mode': 'cors',
    'referer': 'https://www.bing.com/covid',
    'accept-language': 'en-US,en;q=0.9',
}

graphdata = requests.get('https://www.bing.com/covid/graphdata', headers=headers).json()
targets = list(graphdata.keys())

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

*graphdata* contains data needed to generate the graphs.

In [3]:
graphdata['newyork_unitedstates'][-1]

NameError: name 'graphdata' is not defined

In [30]:
graphdata['unitedstates'][:3]

[{'confirmed': 1, 'fatal': 0, 'recovered': 0, 'date': '2020-01-24'},
 {'confirmed': 2, 'fatal': 0, 'recovered': 0, 'date': '2020-01-25'},
 {'confirmed': 2, 'fatal': 0, 'recovered': 0, 'date': '2020-01-26'}]

*metadata* is a tree-like structure containing regions and subregions, with data associated with each node.

In [31]:
metadata.keys()

dict_keys(['id', 'displayName', 'areas', 'totalConfirmed', 'totalDeaths', 'totalRecovered', 'lastUpdated'])

Let's parse the tree-like structure into something more usable. *Regions* contains a list of key-metadata pairs (metadata = numConfirmed, numDeaths, etc...)

In [32]:
regions = {}
def parse(d, parent='Global'):
    try:
        areas = d.pop('areas')
    except:
        areas = []
    regions[d.get('id')] = d
    [parse(a, d.get('displayName')) for a in areas]
parse(metadata)

Here's an example.

In [33]:
regions['unitedstates']

{'id': 'unitedstates',
 'displayName': 'United States',
 'totalConfirmed': 83545,
 'totalDeaths': 1201,
 'totalRecovered': 1864,
 'lastUpdated': '2020-03-27T03:36:43.163Z',
 'lat': 39.495914459228516,
 'long': -98.98998260498047,
 'parentId': 'world'}

We use a safediv function to avoid division by zero errors.

In [34]:
def safediv(a, b):
    try:
        return round(100 * ((a - b) / b))
    except:
        return 0

For each target region, extract all relevant statistics and graph data.

In [40]:
uploads = {}

for target in targets:
    meta = regions.get(target)
    if meta == None:
        continue
    
    td = regions.get(target).get("totalDeaths")
    tr = regions.get(target).get("totalRecovered")
    tc = regions.get(target).get("totalConfirmed")
    
    if td == None: td = graphdata[target][-1].get('fatal')
    if tr == None: tr = graphdata[target][-1].get('recovered')
    if tc == None: tc = graphdata[target][-1].get('confirmed')
    dates = [r.get('date') for r in graphdata[target]] + ["Today"]
    deaths = [r.get('fatal') for r in graphdata[target]] + [td]
    confirmed = [r.get('confirmed') for r in graphdata[target]] + [tc]
    recovered = [r.get('recovered') for r in graphdata[target]] + [tr]
    
    deaths_log = list(map(int, (100 * np.log1p(deaths))))
    confirmed_log = list(map(int, (100 * np.log1p(confirmed))))
    recovered_log = list(map(int, (100 * np.log1p(recovered))))
    # deaths = list((np.log1p([r.get('fatal') for r in graphdata[target]] + [td]) * 100))
    # confirmed = list((np.rint(np.log1p([r.get('confirmed') for r in graphdata[target]] + [tc]) * 100)))
    # recovered = list((np.rint(np.log1p([r.get('recovered') for r in graphdata[target]] + [tr]) * 100)))

    
    if len(dates) > 12:
        stats = {
            'oneDayPercent': safediv(confirmed[-1], confirmed[-3]),
            'fiveDayPercent': safediv(confirmed[-1], confirmed[-8]),
            'tenDayPercent':  safediv(confirmed[-1], confirmed[-13]),
            'oneDayNum': tc - confirmed[-3],
            'fiveDayNum': tc - confirmed[-8],
            'tenDayNum': tc - confirmed[-13],
            'confirmedCount': tc
        }
    else:
        stats = {'oneDayPercent': 0,
                'fiveDayPercent': 0,
                'tenDayPercent': 0,
                'oneDayNum': 0,
                'fiveDayNum': 0,
                'tenDayNum': 0,
                'confirmedCount': 0}
    
    try:
        parent = regions.get(meta.get('parentId')).get('displayName')
    except:
        parent = 'Global'
    
    if parent == "Italy": continue
        
    upload = {
        'country': parent,
        'lat': meta.get('lat'),
        'lon': meta.get('long'),
        'region': meta.get('displayName'),
        'timeSeriesKeys': ['Confirmed', 'Deaths', 'Recovered'],
        'timeSeriesX': dates,
        'timeSeriesY': {
            'Confirmed': confirmed,
            'Deaths': deaths,
            'Recovered': recovered
        },
        'oneDayPercent': []
    }
    upload.update(stats)
    if upload['country'] == 'Global' and upload.get('region') != None:
        upload['country'] = upload['region']
    uploads[target] = upload

Now, *uploads* contains the data needed for the app to work!

In [41]:
uploads['madrid_spain']

{'country': 'Spain',
 'lat': 40.49482727050781,
 'lon': -3.715894937515259,
 'region': 'Madrid',
 'timeSeriesKeys': ['Confirmed', 'Deaths', 'Recovered'],
 'timeSeriesX': ['2020-02-28',
  '2020-02-29',
  '2020-03-01',
  '2020-03-02',
  '2020-03-04',
  '2020-03-05',
  '2020-03-06',
  '2020-03-07',
  '2020-03-08',
  '2020-03-09',
  '2020-03-11',
  '2020-03-12',
  '2020-03-13',
  '2020-03-14',
  '2020-03-15',
  '2020-03-16',
  '2020-03-17',
  '2020-03-18',
  '2020-03-19',
  '2020-03-20',
  '2020-03-21',
  '2020-03-22',
  '2020-03-23',
  '2020-03-24',
  '2020-03-25',
  'Today'],
 'timeSeriesY': {'Confirmed': [5,
   29,
   53,
   77,
   70,
   90,
   137,
   469,
   849,
   1181,
   1024,
   1388,
   1990,
   2940,
   3544,
   4875,
   5641,
   6781,
   7169,
   8925,
   9706,
   10579,
   12356,
   14597,
   17166,
   17166],
  'Deaths': [0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   1535,
   1825,
   2090

# Upload to Firebase/Firestore

The app uses a Firebase/Firestore backend. Let's upload our data to that backend.

In [42]:
from firebase_admin import firestore, credentials
import firebase_admin
try:
    cred = credentials.Certificate('/Users/shomil/Documents/ServerKeys/ncov19.json')
    firebase_admin.initialize_app(cred)
except Exception as e:
    try:
        firebase_admin.initialize_app()
    except Exception as f:
        print("Already initialized!")
        print(e, f)
db = firestore.client()

Already initialized!
The default Firebase app already exists. This means you called initialize_app() more than once without providing an app name as the second argument. In most cases you only need to call initialize_app() once. But if you do want to initialize multiple apps, pass a second argument to initialize_app() to give each app a unique name. The default Firebase app already exists. This means you called initialize_app() more than once without providing an app name as the second argument. In most cases you only need to call initialize_app() once. But if you do want to initialize multiple apps, pass a second argument to initialize_app() to give each app a unique name.


In [43]:
batch = db.batch()
count = 0
for k, v in uploads.items():
    batch.set(db.collection('statistics').document(k), v)
    count += 1
    if count == 200:
        batch.commit()
        count = 0
batch.commit()
print("Completed upload!")

Completed upload!


In [44]:
# def delete_all():
#     batch = db.batch()
#     def delete_collection(coll_ref, batch_size):
#         docs = coll_ref.limit(batch_size).stream()
#         deleted = 0

#         for doc in docs:
#             print(u'Deleting doc {} => {}'.format(doc.id, doc.to_dict()))
#             batch.delete(doc.reference)
#             deleted = deleted + 1

#         if deleted >= batch_size:
#             batch.commit()
#             return delete_collection(coll_ref, batch_size)
#     delete_collection(db.collection('statistics'), 100)
#     batch.commit()
#delete_all()