In [None]:
import pandas as pd
import numpy as np
import s3fs
import os
import time

In [None]:
from geo import stco,sub
from safegraph_py_functions import safegraph_py_functions as sgpy

In [None]:
%load_ext dotenv
%dotenv
myAccessKey = os.getenv('myAccessKey')
mySecretKey = os.getenv('mySecretKey')

start = time.time()

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
cbg_nyc = pd.read_csv('../data/nyc_cbg.csv')

In [None]:
# specify the SG key and secret
fs = s3fs.S3FileSystem(profile="safegraphws", key=myAccessKey, secret=mySecretKey, client_kwargs={'endpoint_url': 'https://s3.wasabisys.com', 'region_name':'us-east-1'})

### Prep county-level table for 2019

Records represent individual CBGs (for later aggregation)

In [None]:
# years, months and days
#years = ['2019','2020']
monthList =["01","02","03","04","05","06", "07","08","09","10","11","12"]
dayNumList =[31, 28, 31, 30, 31, 30, 31, 31,30,31,30,31] 
dayList =["01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31"]

In [None]:
##Check candidate device count for 2019
frames = []
for m in range (0,12):
    for d in range(0, dayNumList[m]):
        with fs.open(f'sg-c19-response/social-distancing/v2/2019/{monthList[m]}/{dayList[d]}/2019-{monthList[m]}-{dayList[d]}-social-distancing.csv.gz','rb') as f:
            print(f'2019-{monthList[m]}-{dayList[d]}')
            # read SG's file
            df = pd.read_csv(f, escapechar='\\', compression='gzip')
            # filter NYC's Origin CBGs
            dff = pd.merge(cbg_nyc, df, left_on="orig_cbg", right_on="origin_census_block_group", how="inner")
            dff['date_y-m-d'] = dff['date_range_start'].str[:10]
            frames.append(dff)

In [None]:
df_19 = pd.concat(frames)
df_19 = df_19[['date_y-m-d','orig_stco','completely_home_device_count','candidate_device_count','device_count']]

### Prep county-level table for 2020

Records represent individual CBGs (for later aggregation)

In [None]:
# updated february day for leap year
monthList =["01","02","03","04","05","06", "07","08"]
dayNumList =[31, 29, 31, 30, 31, 30, 31, 31] 

In [None]:
##Check candidate device count for 2019
frames = []
for m in range (0,8):
    for d in range(0, dayNumList[m]):
        with fs.open(f'sg-c19-response/social-distancing/v2/2020/{monthList[m]}/{dayList[d]}/2020-{monthList[m]}-{dayList[d]}-social-distancing.csv.gz','rb') as f:
            print(f'2020-{monthList[m]}-{dayList[d]}')
            # read SG's file
            df = pd.read_csv(f, escapechar='\\', compression='gzip')
            # filter NYC's Origin CBGs
            dff = pd.merge(cbg_nyc, df, left_on="orig_cbg", right_on="origin_census_block_group", how="inner")
            dff['date_y-m-d'] = dff['date_range_start'].str[:10]
            frames.append(dff)

In [None]:
df_20 = pd.concat(frames)
df_20 = df_20[['date_y-m-d','orig_stco','completely_home_device_count','candidate_device_count','device_count']]

In [None]:
df_20.head(20)

### Add 2019 and 2020 tables together

In [None]:
table = pd.concat([df_19,df_20],axis=0)
table.head(20)

In [None]:
#aggregate all of the block-level outputs to county-level
table_co = table.groupby(['date_y-m-d','orig_stco']).agg({'device_count':np.sum,'completely_home_device_count':np.sum,\
                                                    'candidate_device_count':np.sum})

In [None]:
table_co.head(20)

In [None]:
#aggregate to NYC total
table_nyc = table.groupby(['date_y-m-d']).agg({'device_count':np.sum,'completely_home_device_count':np.sum,\
                                                    'candidate_device_count':np.sum})
table_nyc.head(20)

In [None]:
table_co.to_excel("output/dara/full_candidate_device_check_borough.xlsx")
table_nyc.to_excel("output/dara/full_candidate_device_check_nyc.xlsx")