In [64]:
import pandas as pd
from datetime import date, timedelta
import io
import requests

In [65]:
def request_file(date):
    URL = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/"
    try:
        req = requests.get(URL + date.strftime("%m-%d-%Y") + ".csv")
        print("Request status code:", req.status_code)
        error = False
        return req, error
    except:
        print("Error retrieving file from GitHub")
        error = True
        return req, error

In [66]:
req, error = request_file(date.today())

Request status code: 200


In [67]:
if req.status_code == 404:
    request_file(today - timedelta(days = 1))

In [79]:
df = pd.read_csv(io.StringIO(req.content.decode('utf-8')))
df.rename(columns={'Admin2': 'County', 'Province_State': 'State', 'Country_Region': 'Country', 
                   'Last_Update': 'Update', 'Long_': 'Long', 'Combined_Key': 'Key'}, inplace=True)
df.set_index('Key', inplace=True)
for column in ["County", "State"]: 
    df[column] = df[column].str.replace(' ','-')
    df[column] = df[column].str.lower()
df = df[df["Country"] == "US"]

In [83]:
df.head()

Unnamed: 0_level_0,FIPS,County,State,Country,Update,Lat,Long,Confirmed,Deaths,Recovered,Active
Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Abbeville, South Carolina, US",45001.0,abbeville,south-carolina,US,2020-04-04 23:34:21,34.223334,-82.461707,6,0,0,0
"Acadia, Louisiana, US",22001.0,acadia,louisiana,US,2020-04-04 23:34:21,30.295065,-92.414197,65,2,0,0
"Accomack, Virginia, US",51001.0,accomack,virginia,US,2020-04-04 23:34:21,37.767072,-75.632346,8,0,0,0
"Ada, Idaho, US",16001.0,ada,idaho,US,2020-04-04 23:34:21,43.452658,-116.241552,360,3,0,0
"Adair, Iowa, US",19001.0,adair,iowa,US,2020-04-04 23:34:21,41.330756,-94.471059,1,0,0,0


In [84]:
# df[df.duplicated(['County'])]
# County is not unique, need combination of state and county

In [85]:
df['Confirmed'].sum()

308850

In [96]:
df[(df["State"] == "new-york")][["Confirmed", "Deaths", "Recovered", "Active"]]

Unnamed: 0_level_0,Confirmed,Deaths,Recovered,Active
Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Albany, New York, US",293,6,0,0
"Allegany, New York, US",16,1,0,0
"Broome, New York, US",65,4,0,0
"Cattaraugus, New York, US",9,0,0,0
"Cayuga, New York, US",7,0,0,0
"Chautauqua, New York, US",10,1,0,0
"Chemung, New York, US",36,0,0,0
"Chenango, New York, US",39,0,0,0
"Clinton, New York, US",31,0,0,0
"Columbia, New York, US",57,2,0,0


In [86]:
query_state = "california"
query_county = "alameda"

In [87]:
query = df[(df["State"] == query_state) & (df["County"] == query_county)][["Confirmed", "Deaths", "Recovered", "Active"]]

In [92]:
query = query.to_dict(orient = "records")

In [93]:
query[0].update({"last_updated": "XXX"})

In [94]:
query

[{'Confirmed': 443,
  'Deaths': 12,
  'Recovered': 0,
  'Active': 0,
  'last_updated': 'XXX'}]