In [1]:
LINK_RECOVERED = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"
LINK_DEATHS = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"
LINK_CONFIRMED = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"

In [2]:
import pandas as pd
import numpy as np
import requests, io
from collections import defaultdict

In [3]:
df_recovered_orig = pd.read_csv(io.StringIO(requests.get(LINK_RECOVERED).content.decode('utf-8')))
df_deaths_orig = pd.read_csv(io.StringIO(requests.get(LINK_DEATHS).content.decode('utf-8')))
df_confirmed_orig = pd.read_csv(io.StringIO(requests.get(LINK_CONFIRMED).content.decode('utf-8')))

In [4]:
df_recovered_orig = df_recovered_orig.append(df_recovered_orig.sum(numeric_only=True), ignore_index=True)
df_recovered_orig.at[df_recovered_orig.index[-1], 'Country/Region'] = 'Global'
df_recovered_orig.at[df_recovered_orig.index[-1], 'Province/State'] = 'Global'

df_deaths_orig = df_deaths_orig.append(df_deaths_orig.sum(numeric_only=True), ignore_index=True)
df_deaths_orig.at[df_deaths_orig.index[-1], 'Country/Region'] = 'Global'
df_deaths_orig.at[df_deaths_orig.index[-1], 'Province/State'] = 'Global'

df_confirmed_orig = df_confirmed_orig.append(df_confirmed_orig.sum(numeric_only=True), ignore_index=True)
df_confirmed_orig.at[df_confirmed_orig.index[-1], 'Country/Region'] = 'Global'
df_confirmed_orig.at[df_confirmed_orig.index[-1], 'Province/State'] = 'Global'

In [5]:
today = df_recovered_orig.columns[-1]
yesterday = df_recovered_orig.columns[-2]
daybefore = df_recovered_orig.columns[-3]

In [6]:
confirmed_groups = df_confirmed_orig[(~pd.isnull(df_confirmed_orig['Province/State']))].groupby('Country/Region').sum().reset_index()
recovered_groups = df_recovered_orig[(~pd.isnull(df_recovered_orig['Province/State']))].groupby('Country/Region').sum().reset_index()
deaths_groups = df_deaths_orig[(~pd.isnull(df_deaths_orig['Province/State']))].groupby('Country/Region').sum().reset_index()

df_confirmed = pd.concat([df_confirmed_orig, confirmed_groups], sort=False)
df_deaths = pd.concat([df_deaths_orig, deaths_groups], sort=False)
df_recovered = pd.concat([df_recovered_orig, recovered_groups], sort=False)

In [7]:
for df in [df_confirmed, df_deaths, df_recovered]:
    for i, row in list(df.iterrows()):
        if row['Country/Region'] == row['Province/State']:
            df.drop(i, inplace=True)

In [8]:
latest = df_confirmed.columns[-2]
oneday = df_confirmed.columns[-3]
fiveday = df_confirmed.columns[-8]
tenday = df_confirmed.columns[-13]

groups = [("oneDay", oneday), ("fiveDay", fiveday), ("tenDay", tenday)]
for g in groups:
    label = g[0]
    col = g[1]
    for df in [df_recovered, df_deaths, df_confirmed]:
        df[label + 'Num'] = df[today] - df[col]
        df[label + 'Percent'] = 100 * ((df[today] - df[col]) / df[col])

In [9]:
def replacena(x):
    if pd.isnull(x) or np.isnan(x):
        return 0
    else:
        return x

In [10]:
upload = []
upload_data = defaultdict(dict)

for label, df in [('Confirmed', df_confirmed), ('Dead', df_deaths), ('Recovered', df_recovered)]:
    for _, row in df.iterrows():
        row = row.to_dict()
        region = row.pop('Province/State')
        country = row.pop('Country/Region')
        key = str(region) + '__' + country
        upload_data[key][label] = {
            'lat': row.pop('Lat'),
            'lon': row.pop('Long'),
            'region': str(region),
            'country': country,
            'oneDayNum': replacena(row.pop('oneDayNum')),
            'fiveDayNum': replacena(row.pop('fiveDayNum')),
            'tenDayNum': replacena(row.pop('tenDayNum')),
            'oneDayPercent': replacena(row.pop('oneDayPercent')),
            'fiveDayPercent': replacena(row.pop('fiveDayPercent')),
            'tenDayPercent': replacena(row.pop('tenDayPercent')),
            'key': key
        }
        X_axis = list(row.keys())
        y_axis = list(row.values())
        upload_data[key][label]['X'] = X_axis
        upload_data[key][label]['Y'] = y_axis
        
uploads = {}
for key, dataset in upload_data.items():
    confirmed_X = dataset['Confirmed'].pop('X')
    confirmed_Y = dataset['Confirmed'].pop('Y')
    deaths_Y = dataset['Dead'].pop('Y')    
    recovered_Y = dataset['Recovered'].pop('Y')
    
    timeSeriesY = {'Confirmed': confirmed_Y,
                  'Deaths': deaths_Y,
                  'Recovered': recovered_Y}
    
    upload = dataset['Confirmed']
    upload['timeSeriesX'] = confirmed_X
    upload['timeSeriesY'] = timeSeriesY
    upload['timeSeriesKeys'] = ['Confirmed', 'Deaths', 'Recovered']
    uploads[key] = upload

In [11]:
from firebase_admin import firestore, credentials
import firebase_admin
try:
    cred = credentials.Certificate('/Users/shomil/Documents/ServerKeys/ncov19.json')
    firebase_admin.initialize_app(cred)
except Exception as e:
    try:
        firebase_admin.initialize_app()
    except Exception as f:
        print("Already initialized!")
        print(e, f)
db = firestore.client()

In [12]:
batch = db.batch()
count = 0
for k, v in uploads.items():
    batch.set(db.collection('statistics').document(k), v)
    count += 1
    if count == 200:
        batch.commit()
        count = 0
batch.commit()

[update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   seconds: 1585030757
   nanos: 454110000
 }, update_time {
   second

In [13]:
print("SUCCESSFULLY PUSHED DATA!")

SUCCESSFULLY PUSHED DATA!


In [14]:
# delete_collection(db.collection('statistics'), 100)

In [38]:
# def delete_collection(coll_ref, batch_size):
#     docs = coll_ref.limit(batch_size).stream()
#     deleted = 0

#     for doc in docs:
#         print(u'Deleting doc {} => {}'.format(doc.id, doc.to_dict()))
#         doc.reference.delete()
#         deleted = deleted + 1

#     if deleted >= batch_size:
#         return delete_collection(coll_ref, batch_size)