### Formatting reported cases and deaths and calculating Future Death Case Estimations (p)
Output: Pickle File (dictionary of DataFrames)


In [1]:
import pandas as pd, numpy as np
from datetime import datetime, timedelta
import pickle


In [2]:
cases = pd.read_csv('time_series_covid19_confirmed_US.csv')
deaths = pd.read_csv('time_series_covid19_deaths_US.csv')
cfeatures = pd.read_excel('county_features.xlsx')
cfeatures['fips'] = cfeatures['FIPS']


all_dates = [datetime(2020,1,31)+timedelta(days=i) for i in range(91)]
case_dates = list(cases)[20:]

cases_full = cases[case_dates+['FIPS']].set_index('FIPS')
cases_full.columns = all_dates

deaths_full = deaths[case_dates+['FIPS']].set_index('FIPS')
deaths_full.columns = all_dates


In [3]:
cases_added = cases_full.copy().drop(all_dates[0],axis=1)
deaths_added = deaths_full.copy().drop(all_dates[0],axis=1)
for date in all_dates[::-1][:-1]:
    cases_added[date] = cases_full[date].values - cases_full[date-timedelta(days=1)].values
    deaths_added[date] = deaths_full[date].values - deaths_full[date-timedelta(days=1)].values
cases_added.columns = all_dates[1:]
deaths_added.columns = all_dates[1:]

deaths_rolling = pd.DataFrame()
for date in list(deaths_added)[6:]:
    rolling_average = deaths_added[[date-timedelta(days=i) for i in range(7)]].mean(axis=1)
    deaths_rolling[date] = rolling_average
    
cases_rolling = pd.DataFrame()
for date in list(cases_added)[6:]:
    rolling_average = cases_added[[date-timedelta(days=i) for i in range(7)]].mean(axis=1)
    cases_rolling[date] = rolling_average


#### Adjusting deaths by age demographics (compared to Australia)

In [4]:
# collecting and generating age statistics
age_df = cfeatures[['fips','Demographics: % 65 and over','population']].copy()
age_df['popage'] = age_df['Demographics: % 65 and over']*age_df['population']
national_percent_over_65 = age_df.sum()['popage']/age_df.sum()['population']
over_65_mortality = 100/(national_percent_over_65*2)*.0125
age_df.columns = ['FIPS','over65','population','not_used']
age_df = age_df.drop('not_used',axis=1)

# adjusting deaths based on demographics
estimated_cases_by_deaths = deaths_added.reset_index()
estimated_cases_by_deaths = pd.merge(estimated_cases_by_deaths,age_df,how='left',on='FIPS')

age_vect = estimated_cases_by_deaths['over65']
estimated_cases_by_deaths = estimated_cases_by_deaths.drop(['over65','population'],axis=1)
fips_index = estimated_cases_by_deaths.pop('FIPS')

est_matrix = np.array(estimated_cases_by_deaths)

# generating comparison with Australia study
age_matrix = np.array([15.7/age_vect for i in range(len(est_matrix[0]))]).transpose()

#estmated Cases By Deaths
ECBD = pd.DataFrame(age_matrix * est_matrix)
ECBD.columns = [x - timedelta(days=14) for x in sorted(list(deaths_added))[:]]

ECBD['index'] = fips_index  
ECBD = ECBD.set_index('index').fillna(0)


In [5]:
time_series = {'cases_full':cases_full,'cases_added':cases_added,'deaths_full':deaths_full,'deaths_added':deaths_added,'ECBD':ECBD}
for key in time_series:
    time_series[key] = time_series[key].copy().rename_axis('FIPS').reset_index().dropna().set_index('FIPS')
    print(type(time_series[key]))


<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [6]:
pickle.dump(time_series,open('time_series.pickle','wb'))
