# Coronavirus (Covid-19) Data in the United States

Data from The New York Times, based on reports from state and local health agencies

In [None]:
import pandas as pd
import datetime
import pycountry

In [None]:
# papermill parameters
output_folder = "../output/"

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv", dtype={'fips': str})

df["ISO3166_1"] = "US"

In [None]:
subdivisions = {k.name: k.code.replace("US-", "") for k in pycountry.subdivisions.get(country_code="US")}

In [None]:
df["ISO3166_2"] = df["state"].apply(lambda x: subdivisions.get(x))

In [None]:
df = df.sort_values(by=['county', 'date', 'ISO3166_1', 'ISO3166_2'])
df['cases_since_prev_day'] = df.groupby(['county','state'])['cases'].diff().fillna(0).astype(int)
df['deaths_since_prev_day'] = df.groupby(['county','state'])['deaths'].diff().fillna(0).astype(int)


In [None]:
df["Last_Update_Date"] = datetime.datetime.utcnow()
df['Last_Reported_Flag'] = df['date'] == df['date'].max()

df.to_csv(output_folder + "NYT_US_COVID19.csv", index=False)