In [None]:
import covidcast
import time
import json
from datetime import date, datetime
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# https://gist.github.com/rogerallen/1583593
us_state_abbrev={'Alabama':'AL','Alaska':'AK','American Samoa':'AS','Arizona':'AZ','Arkansas':'AR','California':'CA','Colorado':'CO','Connecticut':'CT','Delaware':'DE','District of Columbia':'DC','Florida':'FL','Georgia':'GA','Guam':'GU','Hawaii':'HI','Idaho':'ID','Illinois':'IL','Indiana':'IN','Iowa':'IA','Kansas':'KS','Kentucky':'KY','Louisiana':'LA','Maine':'ME','Maryland':'MD','Massachusetts':'MA','Michigan':'MI','Minnesota':'MN','Mississippi':'MS','Missouri':'MO','Montana':'MT','Nebraska':'NE','Nevada':'NV','New Hampshire':'NH','New Jersey':'NJ','New Mexico':'NM','New York':'NY','North Carolina':'NC','North Dakota':'ND','Northern Mariana Islands':'MP','Ohio':'OH','Oklahoma':'OK','Oregon':'OR','Pennsylvania':'PA','Puerto Rico':'PR','Rhode Island':'RI','South Carolina':'SC','South Dakota':'SD','Tennessee':'TN','Texas':'TX','Utah':'UT','Vermont':'VT','Virgin Islands':'VI','Virginia':'VA','Washington':'WA','West Virginia':'WV','Wisconsin':'WI','Wyoming':'WY'}

abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))

## Download fb-survey data

In [169]:
indicators = {
    'smoothed_wcli': None,
    'smoothed_hh_cmnty_cli': None,
    'smoothed_wothers_masked': None,
    'smoothed_wwearing_mask': None,
    'smoothed_wtravel_outside_state_5d': None,
    'smoothed_wwork_outside_home_1d': None,
    'smoothed_wspent_time_1d': None,
    'smoothed_wlarge_event_1d': None,
    'smoothed_wpublic_transit_1d': None,
    'smoothed_wanxious_5d': None,
    'smoothed_wdepressed_5d': None,
    'smoothed_wfelt_isolated_5d': None,
    'smoothed_wworried_become_ill': None,
    'smoothed_wworried_finances': None,
}

for ind in indicators.keys():
    temp = covidcast.signal(
        'fb-survey', ind,
        date(2020, 4, 4), date(2021, 2, 19),
        geo_type="state"
    )

    temp['index'] = temp['geo_value'] + temp['time_value'].astype(str)
    temp.rename(columns={
        'value': f'{ind}_value',
        'stderr': f'{ind}_stderr',
        'sample_size': f'{ind}_sample_size'
    }, inplace=True)
    temp.set_index('index', inplace=True)

    indicators[ind] = temp
    print(f'Downloaded {ind}. Shape: {temp.shape}')

Downloaded smoothed_wcli. Shape: (16390, 10)
Downloaded smoothed_hh_cmnty_cli. Shape: (15931, 10)
Downloaded smoothed_wothers_masked. Shape: (4482, 10)
Downloaded smoothed_wwearing_mask. Shape: (8339, 10)
Downloaded smoothed_wtravel_outside_state_5d. Shape: (16360, 10)
Downloaded smoothed_wwork_outside_home_1d. Shape: (8408, 10)
Downloaded smoothed_wspent_time_1d. Shape: (8408, 10)
Downloaded smoothed_wlarge_event_1d. Shape: (8408, 10)
Downloaded smoothed_wpublic_transit_1d. Shape: (8408, 10)
Downloaded smoothed_wanxious_5d. Shape: (8406, 10)
Downloaded smoothed_wdepressed_5d. Shape: (8405, 10)
Downloaded smoothed_wfelt_isolated_5d. Shape: (8405, 10)
Downloaded smoothed_wworried_become_ill. Shape: (8407, 10)
Downloaded smoothed_wworried_finances. Shape: (8407, 10)


## Merge fb-survey data

In [171]:
from functools import reduce

# data_frames = [
#     smoothed_wcli,
#     smoothed_hh_cmnty_cli,
#     smoothed_wothers_masked,
#     smoothed_wwearing_mask,
#     smoothed_wtravel_outside_state_5d,
#     smoothed_wwork_outside_home_1d,
#     smoothed_wspent_time_1d,
#     smoothed_wlarge_event_1d,
#     smoothed_wpublic_transit_1d,
#     smoothed_wanxious_5d,
#     smoothed_wdepressed_5d,
#     smoothed_wfelt_isolated_5d,
#     smoothed_wworried_become_ill,
#     smoothed_wworried_finances
# ]

merged = reduce(lambda left, right: pd.merge(left, right, how='outer',
                                             left_index=True, right_index=True,
                                             suffixes=('', '_delme')), 
                indicators.values())
merged = merged[[c for c in merged.columns if not c.endswith('_delme')]]
print(merged.shape)
merged.head()

(16396, 49)


Unnamed: 0_level_0,geo_value,signal,time_value,issue,lag,smoothed_wcli_value,smoothed_wcli_stderr,smoothed_wcli_sample_size,geo_type,data_source,...,smoothed_wdepressed_5d_sample_size,smoothed_wfelt_isolated_5d_value,smoothed_wfelt_isolated_5d_stderr,smoothed_wfelt_isolated_5d_sample_size,smoothed_wworried_become_ill_value,smoothed_wworried_become_ill_stderr,smoothed_wworried_become_ill_sample_size,smoothed_wworried_finances_value,smoothed_wworried_finances_stderr,smoothed_wworried_finances_sample_size
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ak2020-04-06,ak,smoothed_wcli,2020-04-06,2020-09-03,150.0,1.502092,0.700182,412.0,state,fb-survey,...,,,,,,,,,,
ak2020-04-07,ak,smoothed_wcli,2020-04-07,2020-09-03,149.0,1.209285,0.385696,1258.0,state,fb-survey,...,,,,,,,,,,
ak2020-04-08,ak,smoothed_wcli,2020-04-08,2020-09-03,148.0,1.130105,0.281751,1934.0,state,fb-survey,...,,,,,,,,,,
ak2020-04-09,ak,smoothed_wcli,2020-04-09,2020-09-03,147.0,0.935932,0.21379,2447.0,state,fb-survey,...,,,,,,,,,,
ak2020-04-10,ak,smoothed_wcli,2020-04-10,2020-09-03,146.0,0.829453,0.187829,2685.0,state,fb-survey,...,,,,,,,,,,


# Get JHU cases and deaths data

In [188]:
indicators2 = {
    'confirmed_7dav_cumulative_prop': None,
    'confirmed_7dav_incidence_prop': None,
    'deaths_7dav_incidence_prop': None,
    'confirmed_incidence_prop': None,
    'deaths_incidence_prop': None
}

for ind in indicators2.keys():
    temp = covidcast.signal(
        'jhu-csse', ind,
        date(2020, 4, 4), date(2021, 2, 19),
        geo_type="state"
    )

    temp['index'] = temp['geo_value'] + temp['time_value'].astype(str)
    temp.rename(columns={
        'value': f'{ind}_value',
        'stderr': f'_delme',
        'sample_size': f'_delme'
    }, inplace=True)
    temp.set_index('index', inplace=True)

    indicators2[ind] = temp
    print(f'Downloaded {ind}. Shape: {temp.shape}')

Downloaded confirmed_7dav_cumulative_prop. Shape: (18032, 10)
Downloaded confirmed_7dav_incidence_prop. Shape: (18032, 10)
Downloaded deaths_7dav_incidence_prop. Shape: (18032, 10)
Downloaded confirmed_incidence_prop. Shape: (18032, 10)
Downloaded deaths_incidence_prop. Shape: (18032, 10)


In [186]:
merged = reduce(lambda left, right: pd.merge(left, right, how='inner',
                                             left_index=True, right_index=True,
                                             suffixes=('', '_delme')), 
                [merged] + list(indicators2.values()))
merged = merged[[c for c in merged.columns if not c.endswith('_delme')]]
merged.drop(['signal', 'data_source'], axis=1, inplace=True)

print(merged.shape)
merged.head()

(16396, 51)


Unnamed: 0_level_0,geo_value,time_value,issue,lag,smoothed_wcli_value,smoothed_wcli_stderr,smoothed_wcli_sample_size,geo_type,smoothed_hh_cmnty_cli_value,smoothed_hh_cmnty_cli_stderr,...,smoothed_wworried_become_ill_value,smoothed_wworried_become_ill_stderr,smoothed_wworried_become_ill_sample_size,smoothed_wworried_finances_value,smoothed_wworried_finances_stderr,smoothed_wworried_finances_sample_size,confirmed_7dav_cumulative_prop_value,confirmed_7dav_incidence_prop_value,deaths_7dav_incidence_prop_value,confirmed_incidence_prop_value
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ak2020-04-06,ak,2020-04-06,2020-09-03,150.0,1.502092,0.700182,412.0,state,,,...,,,,,,,22.399482,1.461695,0.059258,0.82961
ak2020-04-07,ak,2020-04-07,2020-09-03,149.0,1.209285,0.385696,1258.0,state,,,...,,,,,,,24.098208,1.698726,0.059258,3.041905
ak2020-04-08,ak,2020-04-08,2020-09-03,148.0,1.130105,0.281751,1934.0,state,,,...,,,,,,,25.796935,1.698726,0.079011,1.797489
ak2020-04-09,ak,2020-04-09,2020-09-03,147.0,0.935932,0.21379,2447.0,state,,,...,,,,,,,27.535166,1.738231,0.079011,1.382684
ak2020-04-10,ak,2020-04-10,2020-09-03,146.0,0.829453,0.187829,2685.0,state,,,...,,,,,,,29.273398,1.738231,0.079011,1.520953


In [187]:
with open('output2.xlsx', 'wb') as out:
    writer = pd.ExcelWriter(out, engine='openpyxl')
    merged.to_excel(writer)
    writer.save()

In [183]:
data = covidcast.signal(
        'jhu-csse', 'confirmed_7dav_incidence_prop',
        date(2020, 9, 8), date(2021, 2, 19),
        geo_type="state"
)

In [184]:
covidcast.animate(data, 'confirmed_7dav_incidence_prop_sep8_feb19.mp4')

100%|██████████| 322/322 [18:12<00:00,  3.39s/it]
