In [83]:
from datetime import date
from functools import reduce
import pandas as pd
import covidcast

How does the percentage of people wearing masks in one month compare with the number of cases recorded in each state ?

In [45]:
maskdf = covidcast.signal("fb-survey", "smoothed_wearing_mask", date(2020, 11, 2), date(2020, 12,2), "state")

In [52]:
maskdf.head()

Unnamed: 0,geo_value,signal,time_value,issue,lag,value,stderr,sample_size,geo_type,data_source
0,ak,smoothed_wearing_mask,2020-11-02,2020-11-07,5,83.953901,1.54686,563.0,state,fb-survey
1,al,smoothed_wearing_mask,2020-11-02,2020-11-07,5,82.856552,0.816425,2131.0443,state,fb-survey
2,ar,smoothed_wearing_mask,2020-11-02,2020-11-07,5,86.770515,0.819072,1711.084,state,fb-survey
3,az,smoothed_wearing_mask,2020-11-02,2020-11-07,5,87.352293,0.560127,3521.3777,state,fb-survey
4,ca,smoothed_wearing_mask,2020-11-02,2020-11-07,5,91.936424,0.23867,13014.3176,state,fb-survey


Ex. percentage of people in nys that wore a mask each day from 11/02/20 - 12/02/20

In [75]:
maskdf[maskdf["geo_value"] == "ny"]

Unnamed: 0,geo_value,signal,time_value,issue,lag,value,stderr,sample_size,geo_type,data_source
34,ny,smoothed_wearing_mask,2020-11-02,2020-11-07,5,94.023539,0.262483,8156.0007,state,fb-survey
34,ny,smoothed_wearing_mask,2020-11-03,2020-11-08,5,93.993903,0.262401,8199.0002,state,fb-survey
34,ny,smoothed_wearing_mask,2020-11-04,2020-11-09,5,94.019738,0.272014,7599.0001,state,fb-survey
34,ny,smoothed_wearing_mask,2020-11-05,2020-11-10,5,94.084839,0.276869,7259.9998,state,fb-survey
34,ny,smoothed_wearing_mask,2020-11-06,2020-11-11,5,94.265025,0.268068,7522.9992,state,fb-survey
34,ny,smoothed_wearing_mask,2020-11-07,2020-11-12,5,94.229344,0.284587,6713.9988,state,fb-survey
34,ny,smoothed_wearing_mask,2020-11-08,2020-11-13,5,94.28108,0.303645,5847.999,state,fb-survey
34,ny,smoothed_wearing_mask,2020-11-09,2020-11-14,5,94.155734,0.312438,5636.9996,state,fb-survey
34,ny,smoothed_wearing_mask,2020-11-10,2020-11-15,5,94.221473,0.296266,6203.0,state,fb-survey
34,ny,smoothed_wearing_mask,2020-11-11,2020-11-16,5,94.219856,0.274607,7221.9997,state,fb-survey


Since we are given values for each day in our time frame (one month), I believe we should take the average of the percentage of those wearing masks each day in the month to get an average number of the population in the state that wore a mask

In [73]:
states = maskdf["geo_value"].unique().tolist()

In [89]:
statesMaskForNov = {}

In [96]:
for state in states:
    statesMaskForNov[state] = [(reduce(lambda x,y: x+y, maskdf[maskdf["geo_value"] == state]["value"].tolist()) / 31)]

In [101]:
df = pd.DataFrame.from_dict(statesMaskForNov, orient="index", columns=["Avg % of Pop. Wearing Masks"])

Now we can see the average % of the population wearing masks for each state

In [102]:
df

Unnamed: 0,Avg % of Pop. Wearing Masks
ak,87.089642
al,84.171078
ar,87.175086
az,89.309515
ca,92.950907
co,92.289583
ct,96.136208
dc,98.154282
de,94.552745
fl,86.544892


In [46]:
casesdf = covidcast.signal("jhu-csse", "confirmed_cumulative_num", date(2020, 11, 2), date(2020, 12, 2), "state")

In [48]:
casesdf[casesdf["geo_value"] == "ny"]

Unnamed: 0,geo_value,signal,time_value,issue,lag,value,stderr,sample_size,geo_type,data_source
34,ny,confirmed_cumulative_num,2020-11-02,2020-11-03,1,511368,,,state,jhu-csse
34,ny,confirmed_cumulative_num,2020-11-03,2020-11-04,1,513689,,,state,jhu-csse
34,ny,confirmed_cumulative_num,2020-11-04,2020-11-05,1,515815,,,state,jhu-csse
34,ny,confirmed_cumulative_num,2020-11-05,2020-11-06,1,518812,,,state,jhu-csse
34,ny,confirmed_cumulative_num,2020-11-06,2020-11-07,1,522021,,,state,jhu-csse
34,ny,confirmed_cumulative_num,2020-11-07,2020-11-08,1,525608,,,state,jhu-csse
34,ny,confirmed_cumulative_num,2020-11-08,2020-11-09,1,529036,,,state,jhu-csse
34,ny,confirmed_cumulative_num,2020-11-09,2020-11-10,1,532180,,,state,jhu-csse
34,ny,confirmed_cumulative_num,2020-11-10,2020-11-13,3,536145,,,state,jhu-csse
34,ny,confirmed_cumulative_num,2020-11-11,2020-11-13,2,540965,,,state,jhu-csse


Again here, I think we should take the average number of cases for the month for each state.