## NYC NTA table for bucketed time away from home by day
##### Output is large Excel table with all NTAs by day, total devices counted for bucketed_time_away, devices away for more than >1321 mins (i.e. 22 hours), devices away for < 20 min, and calculated share of devices away from home for >1321 min of devices counted in bucketed time away

In [1]:
import pandas as pd
import numpy as np
import s3fs
import os
import time

In [2]:
from geo import stco,sub
from safegraph_py_functions import safegraph_py_functions as sgpy

In [3]:
%load_ext dotenv
%dotenv
myAccessKey = os.getenv('myAccessKey')
mySecretKey = os.getenv('mySecretKey')

start = time.time()

In [4]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [5]:
# read nyc origin cbgs & geoxwalk doc
cbg_nyc = pd.read_csv(f'../data/nyc_cbg.csv')
nta_nyc = pd.read_excel(f'../data/nyc_geo_xwalk.xlsx')

In [6]:
# specify the SG key and secret
fs = s3fs.S3FileSystem(profile="safegraphws", key=myAccessKey, secret=mySecretKey, client_kwargs={'endpoint_url': 'https://s3.wasabisys.com', 'region_name':'us-east-1'})

## RUNNING FOR 2019 ONLY

In [7]:
y = "2020"
#years = ["2019","2020"]

monthList =["01","02","03","04","05","06","07","08","09","10","11","12"]
dayNumList =[31, 28, 31, 30, 31, 30, 31,31,30,31,30,31] 
dayList =["01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31"]

In [8]:
#Run an iteration to unpack the bucketed time away from home

In [9]:
frames = [] 
for m in range(8,9):
    for d in range(0,dayNumList[m]):
        with fs.open(f'sg-c19-response/social-distancing/v2/{y}/{monthList[m]}/{dayList[d]}/{y}-{monthList[m]}-{dayList[d]}-social-distancing.csv.gz','rb') as f:
            print(f'{y}-{monthList[m]}-{dayList[d]}')
            # read SG's file
            df = pd.read_csv(f, escapechar='\\', compression='gzip')
            # filter NYC's Origin CBGs
            df = pd.merge(nta_nyc, df, left_on="orig_cbg", right_on="origin_census_block_group", how="inner")

            #unpack json bucketed time away from home
            df = sgpy.unpack_json_and_merge(df, json_column='bucketed_away_from_home_time', key_col_name='away_from_home_time', value_col_name='away_count')
            
            ##Make new columns
            df['date_y-m-d'] = df['date_range_start'].str[:10]
            #Make new table with select columns
            dff = df[['date_y-m-d','orig_nta','away_from_home_time','away_count']]
            frames.append(dff) 

2020-09-01
2020-09-02
2020-09-03
2020-09-04
2020-09-05
2020-09-06
2020-09-07
2020-09-08
2020-09-09
2020-09-10
2020-09-11
2020-09-12
2020-09-13
2020-09-14
2020-09-15
2020-09-16
2020-09-17
2020-09-18
2020-09-19
2020-09-20
2020-09-21
2020-09-22
2020-09-23
2020-09-24
2020-09-25
2020-09-26
2020-09-27
2020-09-28
2020-09-29
2020-09-30


In [10]:
df_away = pd.concat(frames)
df_away = pd.pivot_table(df_away,values=['away_count'],index=['date_y-m-d','orig_nta'],columns=['away_from_home_time'],aggfunc=np.sum,fill_value=0,margins=True)

In [11]:
df_away.columns = df_away.columns.get_level_values(1)
df_away = df_away.reset_index()

In [12]:
df_away_reduced = df_away[['date_y-m-d', 'orig_nta','All','1321-1440','<20']]
df_away_reduced['pct_away_13211440'] = df_away_reduced.loc[:,'1321-1440']/df_away_reduced.loc[:,'All']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_away_reduced['pct_away_13211440'] = df_away_reduced.loc[:,'1321-1440']/df_away_reduced.loc[:,'All']


In [13]:
df_away_reduced.to_excel(f'output/Time Away/2020_09_time-away-reduced.xlsx')

In [14]:
end = time.time()
elapsed = end - start
print(f'Run time - {elapsed} seconds')

Run time - 554.9079279899597 seconds


In [15]:
df_away_reduced.head(20)

away_from_home_time,date_y-m-d,orig_nta,All,1321-1440,<20,pct_away_13211440
0,2020-09-01,BK09,283,8,122,0.028269
1,2020-09-01,BK17,4187,68,1619,0.016241
2,2020-09-01,BK19,1816,20,751,0.011013
3,2020-09-01,BK21,1646,42,648,0.025516
4,2020-09-01,BK23,781,20,285,0.025608
5,2020-09-01,BK25,1815,18,684,0.009917
6,2020-09-01,BK26,1500,32,644,0.021333
7,2020-09-01,BK27,2141,35,890,0.016348
8,2020-09-01,BK28,4454,72,2079,0.016165
9,2020-09-01,BK29,3120,69,1422,0.022115
