In [3]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import requests
import us

from datetime import date, timedelta
from pprint import pprint

In [4]:
def GetDailyCasesAndDeathsData():

    #Initialize data source URL
    url = 'https://covid-api.com/api/reports'

    #Initialize list of dates: all dates between today and March 1, 2020
    #API requires queries by date, so we will need to request one date at a time
    start_date = date(2020,3,1)
    end_date = date.today()
    dates = [str(start_date + timedelta(n)) for n in range(int((end_date - start_date).days))]

    #Initialize data frame
    cases_df = pd.DataFrame(columns=[
        'Active',
        'Confirmed',
        'Date', 
        'Deaths',
        'Fatality Rate', 
        'Recovered', 
        'State',
        'State Abbr',
        'Active Diff',
        'Confirmed Diff',
        'Deaths Diff',
        'Recovered Diff',
        'Lat',
        'Lon'
    ])
    
    #Loop through all dates established above
    for date in dates:

        #Set up the params for USA and the current date, and request
        query_params = {'date' : date,
                        'iso' : 'USA'}
        response = requests.get(url, params=query_params)
        print(f"Retreiving {response.url}...")
        json = response.json()

        try:
            data = json['data']

            #In this API, the 'data' element is a list whose elements represent states
            #Loop through the list to extract by-state information
            for datum in data:

                try:

                    #Get the state name from the current 'datum'
                    region = datum['region']
                    state = region['province']

                    #Only proceed with reading if the state is an actual US state
                    #Other information is not joinable to income and population data
                    #and should be ignored for this project
                    if (state in [states.name for states in us.states.STATES]):

                        #print(f"Loading information for {state}...")

                        #Read fields for the current 'state' row
                        active = datum['active']
                        active_diff = datum['active_diff']
                        confirmed = datum['confirmed']
                        confirmed_diff = datum['confirmed_diff']
                        date = datum['date']
                        deaths = datum['deaths']
                        deaths_diff = datum['deaths_diff']
                        fatality_rate = datum['fatality_rate']
                        recovered = datum['recovered']
                        recovered_diff = datum['recovered_diff']
                        state_lat = region['lat']
                        state_lon = region['long']

                        #Create a dictionary with the contents for the current 'state' row
                        data_row = {
                            'Active' : active,
                            'Confirmed' : confirmed,
                            'Date' : date,
                            'Deaths' : deaths,
                            'Fatality Rate' : fatality_rate,
                            'Recovered' : recovered,
                            'State' : state,
                            'State Abbr' : us.states.lookup(state).abbr,
                            'Active Diff' : active_diff,
                            'Confirmed Diff' : confirmed_diff,
                            'Deaths Diff' : deaths_diff,
                            'Recovered Diff' : recovered_diff,
                            'Lat' : state_lat,
                            'Lon' : state_lon
                        }

                        #Append the dictionary as a row to the data frame
                        cases_df = cases_df.append(data_row, ignore_index=True)

                except KeyError: #If a particular data element doesn't have an expected field
                    print("A requested field was not found; skipping...")

        except KeyError: #If a particular date result doesn't have an expected field
            print("No data returned; skipping...")

    #Check that we have data in all rows of all columns
    #This is particularly important for the measure columns (Cases, Deaths, etc.)
    print(cases_df.count())
    
    #Check to ensure there is no noise in the State Column
    #Specifically, we should only have 50 unique values
    print(len(cases_df['State'].unique()))
    
    #Save results to CSV for separate plotting
    cases_df.to_csv("CasesByState-Daily.csv", index=False)
    
    return

In [5]:
def GetBimonthlyCasesAndDeathsData():
    
    #Initialize data source URL
    url = 'https://covid-api.com/api/reports'

    #Initialize list of dates: all dates between today and March 1, 2020
    #API requires queries by date, so we will need to request one date at a time
    dates = ['2020-03-01', '2020-03-15', '2020-04-01', '2020-04-15', '2020-05-01',
             '2020-05-15', '2020-06-01', '2020-06-15', '2020-07-01', '2020-07-15']

    #Initialize data frame
    cases_df = pd.DataFrame(columns=[
        'Active',
        'Confirmed',
        'Date', 
        'Deaths',
        'Fatality Rate', 
        'Recovered', 
        'State',
        'State Abbr',
        'Lat',
        'Lon'
    ])
    
    #Loop through all dates established above
    for date in dates:

        #Set up the params for USA and the current date, and request
        query_params = {'date' : date,
                        'iso' : 'USA'}
        response = requests.get(url, params=query_params)
        print(f"Retreiving {response.url}...")
        json = response.json()

        try:
            data = json['data']

            #In this API, the 'data' element is a list whose elements represent states
            #Loop through the list to extract by-state information
            for datum in data:

                try:

                    #Get the state name from the current 'datum'
                    region = datum['region']
                    state = region['province']

                    #Only proceed with reading if the state is an actual US state
                    #Other information is not joinable to income and population data
                    #and should be ignored for this project
                    if (state in [states.name for states in us.states.STATES]):

                        #print(f"Loading information for {state}...")

                        #Read fields for the current 'state' row
                        active = datum['active']
                        confirmed = datum['confirmed']
                        date = datum['date']
                        deaths = datum['deaths']
                        fatality_rate = datum['fatality_rate']
                        recovered = datum['recovered']
                        state_lat = region['lat']
                        state_lon = region['long']

                        #Create a dictionary with the contents for the current 'state' row
                        data_row = {
                            'Active' : active,
                            'Confirmed' : confirmed,
                            'Date' : date,
                            'Deaths' : deaths,
                            'Fatality Rate' : fatality_rate,
                            'Recovered' : recovered,
                            'State' : state,
                            'State Abbr' : us.states.lookup(state).abbr,
                            'Lat' : state_lat,
                            'Lon' : state_lon
                        }

                        #Append the dictionary as a row to the data frame
                        cases_df = cases_df.append(data_row, ignore_index=True)

                except KeyError: #If a particular data element doesn't have an expected field
                    print("A requested field was not found; skipping...")

        except KeyError: #If a particular date result doesn't have an expected field
            print("No data returned; skipping...")

    #Check that we have data in all rows of all columns
    #This is particularly important for the measure columns (Cases, Deaths, etc.)
    print(cases_df.count())
    
    #Check to ensure there is no noise in the State Column
    #Specifically, we should only have 50 unique values
    print(len(cases_df['State'].unique()))
    
    #Save results to CSV for separate plotting
    cases_df.to_csv("CasesByState.csv", index=False)
    
    return