In [223]:
import pandas as pd
import pymmwr as pm
import datetime

def get_epi_data(date):
    format_str = '%m/%d/%y' # The format
    dt = datetime.datetime.strptime(date, format_str).date()
    epi = pm.date_to_mmwr_week(dt)
    return epi["year"], epi["week"], epi["day"]

df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")
fips_codes = pd.read_csv('../template/state_fips_codes.csv')

# aggregate by state and nationally
state_agg = df.groupby(['Province_State']).sum()
us_nat = df.groupby(['Country_Region']).sum()
df_state_nat = state_agg.append(us_nat)

# drop unnecessary columns
cols = list(range(0,6))
df_truth = df_state_nat.drop(df_state_nat.columns[cols], axis=1)

# convert matrix to repeating row format 
df_truth = df_truth.unstack()
df_truth = df_truth.reset_index()  

# get epi data from date
df_truth['year'], df_truth['week'], df_truth['day'] = \
    zip(*df_truth['level_0'].map(get_epi_data))

# Observed data on the seventh day
df_truth = df_truth[df_truth['day'] == 7]

# add leading zeros to epi week
df_truth['week'] = df_truth['week'].apply(lambda x: '{0:0>2}'.format(x))

# define epiweek
df_truth['epiweek'] = df_truth['year'].astype(str) + df_truth['week'] 

# rename columns
df_truth = df_truth.rename(columns={0: "value",
                            "level_1": "location_long"})

# Only visualize certain states
states = ['US', 'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut',
          'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky',
          'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri',
          'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
          'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island',
          'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
          'West Virginia', 'Wisconsin', 'Wyoming', 'District of Columbia']
df_truth = df_truth[df_truth["location_long"].isin(states)]

# Get state IDs
df_truth = df_truth.merge(fips_codes, left_on='location_long', right_on='state_name', how='left')
df_truth.loc[df_truth["location_long"] == "US", "state"] = "nat"

# rename location
df_truth = df_truth.rename(columns={"state": "location"})

# only output "location", "epiweek", "value"
df_truth = df_truth[["location", "epiweek", "value"]]

# write to json
with open('flusight-master/covid-csv-tools/dist/state_actual/2019.json', 'w') as f:
    f.write(df_truth.to_json(orient='records'))