In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import requests
import us

from datetime import date, timedelta
from pprint import pprint

In [2]:
#Initialize data source URL
url = 'https://covid-api.com/api/reports'

#Initialize list of dates: all dates between today and March 1, 2020
#API requires queries by date, so we will need to request one date at a time
start_date = date(2020,3,1)
end_date = date.today()
dates = [str(start_date + timedelta(n)) for n in range(int((end_date - start_date).days))]
#print(dates)

#Initialize data frame
cases_df = pd.DataFrame(columns=[
    'Active',
    'Confirmed',
    'Date', 
    'Deaths',
    'Fatality Rate', 
    'Recovered', 
    'State',
    'State Abbr',
    'Active Diff',
    'Confirmed Diff',
    'Deaths Diff',
    'Recovered Diff',
    'Lat',
    'Lon'
])

In [3]:
#Loop through all dates established above
for date in dates:
    
    #Set up the params for USA and the current date, and request
    query_params = {'date' : date,
                    'iso' : 'USA'}
    response = requests.get(url, params=query_params)
    print(f"Retreiving {response.url}...")
    json = response.json()
    
    try:
        data = json['data']

        #In this API, the 'data' element is a list whose elements represent states
        #Loop through the list to extract by-state information
        for datum in data:
            
            try:
                
                #Get the state name from the current 'datum'
                region = datum['region']
                state = region['province']
                
                #Only proceed with reading if the state is an actual US state
                #Other information is not joinable to income and population data
                #and should be ignored for this project
                if (state in [states.name for states in us.states.STATES]):
                
                    #print(f"Loading information for {state}...")
                    
                    #Read fields for the current 'state' row
                    active = datum['active']
                    active_diff = datum['active_diff']
                    confirmed = datum['confirmed']
                    confirmed_diff = datum['confirmed_diff']
                    date = datum['date']
                    deaths = datum['deaths']
                    deaths_diff = datum['deaths_diff']
                    fatality_rate = datum['fatality_rate']
                    recovered = datum['recovered']
                    recovered_diff = datum['recovered_diff']
                    state_lat = region['lat']
                    state_lon = region['long']

                    #Create a dictionary with the contents for the current 'state' row
                    data_row = {
                        'Active' : active,
                        'Confirmed' : confirmed,
                        'Date' : date,
                        'Deaths' : deaths,
                        'Fatality Rate' : fatality_rate,
                        'Recovered' : recovered,
                        'State' : state,
                        'State Abbr' : us.states.lookup(state).abbr,
                        'Active Diff' : active_diff,
                        'Confirmed Diff' : confirmed_diff,
                        'Deaths Diff' : deaths_diff,
                        'Recovered Diff' : recovered_diff,
                        'Lat' : state_lat,
                        'Lon' : state_lon
                    }

                    #Append the dictionary as a row to the data frame
                    cases_df = cases_df.append(data_row, ignore_index=True)
                
            except KeyError: #If a particular data element doesn't have an expected field
                print("A requested field was not found; skipping...")
                
    except KeyError: #If a particular date result doesn't have an expected field
        print("No data returned; skipping...")

#Show a sample from the data frame to show it is populated
cases_df.head()

Retreiving https://covid-api.com/api/reports?date=2020-03-01&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-02&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-03&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-04&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-05&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-06&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-07&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-08&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-09&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-10&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-11&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-12&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-13&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-03-14&iso

Retreiving https://covid-api.com/api/reports?date=2020-06-23&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-06-24&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-06-25&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-06-26&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-06-27&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-06-28&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-06-29&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-06-30&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-07-01&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-07-02&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-07-03&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-07-04&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-07-05&iso=USA...
Retreiving https://covid-api.com/api/reports?date=2020-07-06&iso

Unnamed: 0,Active,Confirmed,Date,Deaths,Fatality Rate,Recovered,State,State Abbr,Active Diff,Confirmed Diff,Deaths Diff,Recovered Diff,Lat,Lon
0,0,0,2020-03-10,0,0.0,0,Alaska,AK,0,0,0,0,61.3707,-152.4044
1,5,6,2020-03-10,0,0.0,1,Arizona,AZ,4,5,0,1,33.7298,-111.4312
2,0,0,2020-03-10,0,0.0,0,Arkansas,AR,0,0,0,0,34.9697,-92.3731
3,140,144,2020-03-10,2,0.0139,2,California,CA,138,142,2,2,36.1162,-119.6816
4,15,15,2020-03-10,0,0.0,0,Colorado,CO,0,0,0,0,39.0598,-105.3111


In [4]:
#Check that we have data in all rows of all columns
#This is particularly important for the measure columns (Cases, Deaths, etc.)
cases_df.count()

Active            6795
Confirmed         6795
Date              6795
Deaths            6795
Fatality Rate     6795
Recovered         6795
State             6795
State Abbr        6795
Active Diff       6795
Confirmed Diff    6795
Deaths Diff       6795
Recovered Diff    6795
Lat               6795
Lon               6795
dtype: int64

In [5]:
#Check to ensure there is no noise in the State Column
#Specifically, we should only have 50 unique values
print(len(cases_df['State'].unique()))

50


In [7]:
#Save results to CSV for separate plotting
cases_df.to_csv("Resources/CasesByState-Daily.csv", index=False)