In [3]:
import pandas as pd
import pymmwr as pm
import datetime


def get_epi_data(date):
    format_str = '%m/%d/%y'  # The format
    dt = datetime.datetime.strptime(date, format_str).date()
    epi = pm.date_to_epiweek(dt)
    return epi.year, epi.week, epi.day



In [9]:
df_state_nat

Unnamed: 0,UID,code3,FIPS,Lat,Long_,Population,1/22/20,1/23/20,1/24/20,1/25/20,...,4/10/20,4/11/20,4/12/20,4/13/20,4/14/20,4/15/20,4/16/20,4/17/20,4/18/20,4/19/20
Alabama,5796241491,57960,241491.0,2203.246784,-5809.578199,4903185,0,0,0,0,...,80,92,93,99,114,118,133,148,153,157
Alaska,2604232344,26040,232344.0,1747.579877,-4229.319334,731545,0,0,0,0,...,7,8,8,8,9,9,9,9,9,9
American Samoa,16,16,60.0,-14.271,-170.132,55641,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Arizona,1428230216,14280,230216.0,505.138555,-1671.948482,7278717,0,0,0,0,...,97,108,115,122,131,142,150,169,180,184
Arkansas,6468550635,64680,550635.0,2618.391704,-6932.54837,3017804,0,0,0,0,...,21,25,27,29,32,33,37,37,38,39
California,5040521376,50400,521376.0,2194.949775,-7002.258461,39512223,0,0,0,0,...,583,604,640,714,767,860,956,1037,1140,1177
Colorado,5544685999,55440,685999.0,2491.870366,-6751.119357,5758736,0,0,0,0,...,226,250,289,306,327,328,355,372,389,420
Connecticut,840242082,8400,242082.0,332.909441,-581.254637,3565287,0,0,0,0,...,448,494,554,602,671,868,971,1036,1086,1127
Delaware,420200029,4200,200029.0,117.32783,-226.599712,973764,0,0,0,0,...,32,33,35,41,43,46,55,61,67,67
Diamond Princess,84088888,840,88888.0,0.0,0.0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
def create_truth_data(df, fips_codes):
    # drop unnecessary columns
    cols = list(range(0, 6))
    df_truth = df.drop(df_state_nat.columns[cols], axis=1)

    # convert matrix to repeating row format
    df_truth = df_truth.unstack()
    df_truth = df_truth.reset_index()

    # get epi data from date
    df_truth['year'], df_truth['week'], df_truth['day'] = \
        zip(*df_truth['level_0'].map(get_epi_data))

    # rename columns
    df_truth = df_truth.rename(columns={0: "value",
                                        "level_1": "location_long"})
    # Only visualize certain states
    states = ['US', 'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut',
              'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky',
              'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri',
              'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
              'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island',
              'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
              'West Virginia', 'Wisconsin', 'Wyoming', 'District of Columbia']
    df_truth = df_truth[df_truth["location_long"].isin(states)]

    # Get state IDs
    df_truth = df_truth.merge(fips_codes, left_on='location_long', right_on='state_name', how='left')
    df_truth.loc[df_truth["location_long"] == "US", "state_code"] = "US"
    df_truth["state_code"].replace({"US": 1000}, inplace=True)  # so that can be converted to int

    # convert FIPS code to int
    df_truth["state_code"] = df_truth["state_code"].astype(int)

    # add leading zeros to state code
    df_truth['state_code'] = df_truth['state_code'].apply(lambda x: '{0:0>2}'.format(x))

    # convert 1000 back to US
    df_truth["state_code"].replace({"1000": "US"}, inplace=True)
    df_truth.loc[df_truth["location_long"] == "US", "state"] = "nat"


    '''
    ####################################
    # Truth data output for visualization
    ####################################
    '''
    # Observed data on the seventh day
    df_truth = df_truth[df_truth['day'] == 7]
    df_truth['week'] = df_truth['week'] + 1  # shift epiweek on axis

    print(df_truth)
    
    # add leading zeros to epi week
    df_truth['week'] = df_truth['week'].apply(lambda x: '{0:0>2}'.format(x))

    # define epiweek
    df_truth['epiweek'] = df_truth['year'].astype(str) + df_truth['week']

    # only output "location", "epiweek", "value"
    df_truth = df_truth.rename(columns={"state": "location"})
    df_truth_short = df_truth[["location", "epiweek", "value"]]

    df_truth_short["value"].replace({0: 0.1}, inplace=True)

    return df_truth_short
  

df = pd.read_csv(
    "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")
fips_codes = pd.read_csv('../template/state_fips_codes.csv')

# aggregate by state and nationally
state_agg = df.groupby(['Province_State']).sum()
us_nat = df.groupby(['Country_Region']).sum()
df_state_nat = state_agg.append(us_nat)

df_out = create_truth_data(df_state_nat, fips_codes)
df_out["target"] = "Cumulative Death"

df_out = create_truth_data(df_state_nat, fips_codes)
df_out["target"] = "Incident Death"

# # write to json
# with open('flusight-master/covid-csv-tools/dist/state_actual/2019.json', 'w') as f:
#     f.write(df_truth_short.to_json(orient='records'))

      level_0  location_long  value  year  week  day state state_code  \
156   1/25/20        Alabama      0  2020     5    7    AL         01   
157   1/25/20         Alaska      0  2020     5    7    AK         02   
158   1/25/20        Arizona      0  2020     5    7    AZ         04   
159   1/25/20       Arkansas      0  2020     5    7    AR         05   
160   1/25/20     California      0  2020     5    7    CA         06   
...       ...            ...    ...   ...   ...  ...   ...        ...   
4571  4/18/20     Washington    613  2020    17    7    WA         53   
4572  4/18/20  West Virginia     16  2020    17    7    WV         54   
4573  4/18/20      Wisconsin    212  2020    17    7    WI         55   
4574  4/18/20        Wyoming      2  2020    17    7    WY         56   
4575  4/18/20             US  38664  2020    17    7   nat         US   

         state_name  
156         Alabama  
157          Alaska  
158         Arizona  
159        Arkansas  
160      Cali

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


In [None]:


'''
####################################
# Daily truth data output for reference
####################################
'''

# only output "location", "epiweek", "value"
df_byday = df_truth.rename(columns={"level_0": "date", "state_code": "location", "location_long": "location_name"})

# select columns
df_byday = df_byday[["date", "location", "location_name", "value"]]

# change to yyyy/mm/dd format
df_byday['date'] = pd.to_datetime(df_byday['date'])

df_byday.to_csv('../data-processed/truth-cum-death.csv', index=False)



'''
####################################
# Truth data output for Zoltar & Scoring
####################################
'''
# rename location
df_truth_long = df_truth.rename(columns={"week": "epiweek",
                                         "state_code": "unit",
                                         "level_0": "date"})
# get timezero
df_truth_long['date'] = pd.to_datetime(df_truth_long['date'])

# find week-ahead targets
for i in range(4):
    weeks_ahead = i + 1
    days_back = 5 + (weeks_ahead * 7)  # timezero is on Mondays

    df_calc = df_truth_long  # initialize df

    # find timezero and target
    df_calc['timezero'] = df_calc['date'] - datetime.timedelta(days=days_back)
    df_calc['target'] = "%i_week_ahead_cum" % weeks_ahead

    # select columns
    df_calc = df_calc[["timezero", "unit", "target", "value"]]

    # concatenate truth
    if i == 0:
        df_out = df_calc
    else:
        df_out = pd.concat([df_out, df_calc])

# write truth to csv
df_out.to_csv('../data-processed/zoltar-truth-cum-death.csv', index=False)