Source of data can be obtained [here](https://github.com/CSSEGISandData/COVID-19) - provided by John Hopkins University.

In [1]:
from datetime import date
from datetime import timedelta
from typing import TypeVar
import pandas as pd
pd.options.display.max_rows=1000
DataFrame = TypeVar('pd.core.frame.DataFrame')

In [2]:
def getCoronaVirusDataUSA(iso_date: str) -> DataFrame:
    """ Obtain counts of confirmed SARS-COV-2 / Covid-19 in the U.S. for a single, specific date
    
    Parameter
    ---------
    iso_date : str
        Date in 'YYYY-MM-DD' format for which you want the counts from
        
    Returns
    -------
    Pandas dataframe containing confirmed counts by US states
    """
    
    data_date: str = date.fromisoformat(iso_date).strftime('%m-%d-%Y')
    data_url: str = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{data_date}.csv"
        
    df: DataFrame = pd.read_csv(data_url)
    df_us: DataFrame = df[df['Country/Region'] == 'US'].query("Confirmed > 0")
    
    # Unfortunatey, JHU has been inconsistent on how they capture Province/State
    # For example, "Province/State" column would include ", US" like "Washingtion, US" in certain dates' data
    # While data in other dates, the "Province/State" column would not include ", US" or rather just "Washington"
    # Furthermore, data from early dates also included city name, along with state code
    
    # Test if Province/State contains a comma. If so, then perform a string split and extract just the state name
    if ',' in df_us.iloc[0,0]:
        df_us: DataFrame = df_us.assign(State=df_us['Province/State'].map(lambda x: x.split(",")[1].strip() if x.find(",") != -1 else x))
        grouped: DataFrame = df_us.groupby(['State'])[['Confirmed']].agg('sum').sort_values(by='Confirmed', ascending=False).reset_index()
        
        return grouped
    # else the Province/State column contains only the state name
    else:
        return df_us.sort_values(by='Confirmed', ascending=False).iloc[:, range(6)].reset_index(drop=True)

In [3]:
latest_date = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
latest_date

'2020-03-21'

In [4]:
df = getCoronaVirusDataUSA(latest_date)
df

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,New York,US,2020-03-21T22:43:04,11710,60,0
1,Washington,US,2020-03-21T22:43:04,1793,94,0
2,California,US,2020-03-21T22:43:04,1364,24,0
3,New Jersey,US,2020-03-21T19:43:03,1327,16,0
4,Michigan,US,2020-03-21T22:43:04,788,5,0
5,Illinois,US,2020-03-21T23:13:18,753,6,0
6,Florida,US,2020-03-21T23:13:18,659,13,0
7,Louisiana,US,2020-03-21T15:43:05,585,16,0
8,Texas,US,2020-03-21T23:13:17,581,5,0
9,Massachusetts,US,2020-03-21T23:13:18,525,1,0


In [5]:
df.select_dtypes(include='number').sum().to_frame(name='Counts').reset_index().rename(columns={'index': 'Category'})

Unnamed: 0,Category,Counts
0,Confirmed,25493
1,Deaths,307
2,Recovered,171
