In [3]:
    import importlib
import json
import os
from pathlib import Path
import shutil
import sys

from arcgis.features import GeoAccessor, GeoSeriesAccessor
from arcgis.gis import GIS
import boto3
from dotenv import load_dotenv, find_dotenv
import pandas as pd
from tqdm import tqdm
import zipfile

In [4]:
# paths to common data locations - NOTE: to convert any path to a raw string, simply use str(path_instance)
dir_prj = Path('./').absolute().parent

dir_data = dir_prj/'data'

dir_raw = dir_data/'raw'
dir_ext = dir_data/'external'
dir_int = dir_data/'interim'
dir_out = dir_data/'processed'

gdb_raw = dir_raw/'raw.gdb'
gdb_int = dir_int/'interim.gdb'
gdb_out = dir_out/'processed.gdb'

# import the project package from the project package path
sys.path.append(str(dir_prj/'src'))
import pdx_parks

# load the "autoreload" extension so that code can change, & always reload modules so that as you change code in src, it gets loaded
%load_ext autoreload
%autoreload 2

# load environment variables from .env
load_dotenv(find_dotenv())

True

In [7]:
ptrns_df = pd.read_parquet(dir_int/'sg'/'patterns.parquet')

ptrns_df[['safegraph_place_id', 'raw_visit_counts', 'distance_from_home', 'year', 'month']].head()

Unnamed: 0,safegraph_place_id,raw_visit_counts,distance_from_home,year,month
0,sg:465da3a640834193b3167cb8214a7325,204,1198783.0,2019,6
1,sg:c3ab3119ff924f5f8438f8075d61a1dd,418,10173.0,2019,6
2,sg:a2b62948ca5f4395b86678a216a51190,33,10871.0,2019,6
3,sg:06a21e9e0fc144a988d669990c3f01f5,344,16883.0,2019,6
4,sg:f160ad07acb54f7db8e052b025a4a241,3,,2019,6


In [8]:
import itertools

ptrns_jul_df = ptrns_df[ptrns_df.month == 7][['safegraph_place_id', 'raw_visit_counts', 'distance_from_home', 'year', 'month']]

comp_df = pd.Series(ptrns_jul_df.safegraph_place_id.unique()).to_frame('safegraph_place_id')

for mtrc_yr in itertools.product(['raw_visit_counts', 'distance_from_home'], ptrns_jul_df.year.unique()):
    metric = mtrc_yr[0]
    year = mtrc_yr[1]
    col = f'{metric}_{year}'
    comp_tmp = ptrns_jul_df[ptrns_jul_df.year == year][['safegraph_place_id', metric]].set_index('safegraph_place_id')
    comp_tmp.columns = [col]
    comp_df = comp_df.join(comp_tmp, on='safegraph_place_id')
    
comp_df

Unnamed: 0,safegraph_place_id,raw_visit_counts_2019,raw_visit_counts_2020,distance_from_home_2019,distance_from_home_2020
0,sg:465da3a640834193b3167cb8214a7325,253.0,130.0,1370819.0,1125234.0
1,sg:c3ab3119ff924f5f8438f8075d61a1dd,419.0,274.0,7399.0,8234.0
2,sg:a2b62948ca5f4395b86678a216a51190,52.0,16.0,12051.0,6742.0
3,sg:06a21e9e0fc144a988d669990c3f01f5,420.0,542.0,18807.0,18996.0
4,sg:170d6d8a39d44f22874af1f35799b89a,2168.0,737.0,10088.0,5540.0
...,...,...,...,...,...
357,sg:077d2286767c473fa7c1d8574556a575,,39.0,,7720.0
358,sg:3e6650089ebb4c50abe2c49d43b1f976,,28.0,,5246.0
359,sg:5899cdce314f4355ba4cf32ac5ae746e,,67.0,,10495.0
360,sg:f9fab565f35946cb972dcad4f2c4f15f,,1330.0,,15177.0


In [257]:
comp_df['raw_visit_counts_delta'] = comp_df['raw_visit_counts_2020'] - comp_df['raw_visit_counts_2019']
comp_df['distance_from_home_delta'] = comp_df['distance_from_home_2020'] - comp_df['distance_from_home_2019']
comp_df['raw_visit_counts_delta_pct'] = comp_df['raw_visit_counts_delta'] / comp_df['raw_visit_counts_2019']
comp_df['distance_from_home_delta_pct'] = comp_df['distance_from_home_delta'] / comp_df['distance_from_home_2019']

comp_df.head()

Unnamed: 0,safegraph_place_id,raw_visit_counts_2019,raw_visit_counts_2020,distance_from_home_2019,distance_from_home_2020,raw_visit_counts_delta,distance_from_home_delta,raw_visit_counts_delta_pct,distance_from_home_delta_pct
0,sg:465da3a640834193b3167cb8214a7325,253.0,130.0,1370819.0,1125234.0,-123.0,-245585.0,-0.486166,-0.179152
1,sg:c3ab3119ff924f5f8438f8075d61a1dd,419.0,274.0,7399.0,8234.0,-145.0,835.0,-0.346062,0.112853
2,sg:a2b62948ca5f4395b86678a216a51190,52.0,16.0,12051.0,6742.0,-36.0,-5309.0,-0.692308,-0.440544
3,sg:06a21e9e0fc144a988d669990c3f01f5,420.0,542.0,18807.0,18996.0,122.0,189.0,0.290476,0.010049
4,sg:170d6d8a39d44f22874af1f35799b89a,2168.0,737.0,10088.0,5540.0,-1431.0,-4548.0,-0.660055,-0.450833


In [261]:
comb_df = poi_df.join(comp_df.set_index('safegraph_place_id'), on='sgpid')
comb_df.spatial.set_geometry('SHAPE')

comb_df.head()

Unnamed: 0,sgpid,psgpid,sgbid,poiName,brands,topCat,latitude,longitude,address,city,...,zip_code,SHAPE,raw_visit_counts_2019,raw_visit_counts_2020,distance_from_home_2019,distance_from_home_2020,raw_visit_counts_delta,distance_from_home_delta,raw_visit_counts_delta_pct,distance_from_home_delta_pct
0,sg:608f9918c8e6432780dba619c4a88c3d,,,Healy Heights Park,,"Museums, Historical Sites, and Similar Institu...",45.492833,-122.698466,Healy Heights Park,Portland,...,97239,"{""x"": -13658730.7562, ""y"": 5699444.5308, ""spat...",140.0,104.0,5384.0,7302.0,-36.0,1918.0,-0.257143,0.356241
1,sg:94d78ec671b4472dbef6fe4b7945d2b1,,,Harney Park,,"Museums, Historical Sites, and Similar Institu...",45.46254,-122.593244,Harney Park,Portland,...,97206,"{""x"": -13647017.4968, ""y"": 5694635.255199999, ...",271.0,255.0,6768.0,5928.0,-16.0,-840.0,-0.059041,-0.124113
2,sg:a16d01fc59234220839ab23c5ae4fdf6,,,Peninsula Crossing Trail,,"Museums, Historical Sites, and Similar Institu...",45.593004,-122.726733,Peninsula Crossing Trail,Portland,...,97203,"{""x"": -13661877.4243, ""y"": 5715365.990599997, ...",,432.0,,11773.0,,,,
3,sg:afe2a1f4fab740fab57ce21d51e061a9,,,Wellington Park,,"Museums, Historical Sites, and Similar Institu...",45.553219,-122.594489,Wellington Park,Portland,...,97218,"{""x"": -13647156.089499999, ""y"": 5709039.054399...",186.0,125.0,7610.0,1772.0,-61.0,-5838.0,-0.327957,-0.767148
4,sg:81184956709c489fb4e36e9048e1639e,,,Bethany Wetlands Natural Area,,"Museums, Historical Sites, and Similar Institu...",45.466553,-122.531375,Ash Creek Natural Area,Portland,...,97219,"{""x"": -13640130.271200001, ""y"": 5695272.2051, ...",,206.0,,8190.0,,,,


In [263]:
comb_df.spatial.to_featureclass(gdb_int/'sg_poi_pdx_parks')

'D:\\projects\\pdx-parks\\data\\interim\\interim.gdb\\sg_poi_pdx_parks'

# Experimentation Section

In [126]:
bktd_dwell = fl_df.bucketed_dwell_times.apply(lambda val: pd.Series(json.loads(val)))
bktd_dwell.columns = [f'dwell_{c}'.replace('<', 'under').replace('-', '_').replace('>', 'over') for c in bktd_dwell.columns]
bktd_dwell

Unnamed: 0,dwell_under5,dwell_5_20,dwell_21_60,dwell_61_240,dwell_over240
0,6,74,47,74,25
1,1,4,3,1,0
2,2,66,38,25,10
3,0,6,6,10,6
4,9,148,137,154,30
...,...,...,...,...,...
96,18,519,315,433,45
97,0,14,15,19,9
98,3,29,17,23,9
99,17,299,126,145,115


In [132]:
sum_df = fl_df[['safegraph_place_id', 'location_name', 'raw_visit_counts', 'distance_from_home']]

sum_df.head()

Unnamed: 0,safegraph_place_id,location_name,raw_visit_counts,distance_from_home
0,sg:540d0f4bb20d4a5aa25f8e1c4d2e6354,Colonel Summers Park,226,3532.0
1,sg:fa34d1fd210c4d0187b7b0ce403da09f,Schweitzer Restoration Area,9,3181.0
2,sg:adf87bec514d4afab326c52cc8bb1570,Fulton Park,141,19492.0
3,sg:b80b8720977049b9990b93bb5b782e2b,Shanghai Tunnels,28,11855.0
4,sg:dc1cea9f581d41d3a3028c4bdbe04110,Irving Park,478,4016.0


In [133]:
sum_df.sort_values('raw_visit_counts', ascending=False)

Unnamed: 0,safegraph_place_id,location_name,raw_visit_counts,distance_from_home
15,sg:05cc35de35e0420f95981f919b48437a,Friends of Gateway Green,1452,8979.0
96,sg:f9fab565f35946cb972dcad4f2c4f15f,M James Gleason Memorial Boat Ramp,1330,15177.0
44,sg:9356f1b190a64b47b360dc6123b6acba,Laurelhurst Park,1272,10861.0
14,sg:b14fded97e3044b8988e3a525e13312b,Ed Benedict Park,1048,7295.0
87,sg:18977199399744cbb1865464a7c120fd,Council Crest Park,879,9907.0
...,...,...,...,...
49,sg:b47a3ccbcbb747298ee5c3ec6d39d50a,Washington State Horse Council,16,22999.0
59,sg:dc3b99194af7461bb05e406656f65d55,Glenn L Jackson Memorial Bridge,11,8894.0
1,sg:fa34d1fd210c4d0187b7b0ce403da09f,Schweitzer Restoration Area,9,3181.0
65,sg:95befdb52dcc4ad799d18c5db206fb19,Eastmoreland Garden,3,


In [109]:
df_lst = []

for year in pth_df.year.unique():
    
    for month in pth_df.month.unique():
        
        sg_mth_df = pd.concat([pd.read_parquet(f.pth) for idx, f in pth_df[(pth_df.year == year) & (pth_df.month == month)].iterrows()])
        
        for _, sg_r in sg_mth_df.iterrows():
        
            sg_pid = sg_r.safegraph_place_id
            hm_bg_lst = sg_r.visitor_home_cbgs
            
            hc_df = pd.DataFrame.from_dict(json.loads(hm_bg_lst), orient="index", columns=['visitor_home_count'])
            
            hc_df.index.name='block_group_id'
            hc_df.reset_index(inplace=True)
            
            hc_df['sg_id'] = sg_pid
            hc_df['year'] = year
            hc_df['month'] = month
            
            df_lst.append(hc_df)
            
hm_ct_df = pd.concat(df_lst)

hm_ct_df

Unnamed: 0,block_group_id,visitor_home_count,sg_id,year,month
0,060375433212,5,sg:465da3a640834193b3167cb8214a7325,2019,6
1,410050225001,5,sg:465da3a640834193b3167cb8214a7325,2019,6
2,320030029511,4,sg:465da3a640834193b3167cb8214a7325,2019,6
3,131530211132,4,sg:465da3a640834193b3167cb8214a7325,2019,6
4,530110417001,4,sg:465da3a640834193b3167cb8214a7325,2019,6
...,...,...,...,...,...
35,530670113001,4,sg:ff6f7c7e3e1b4ac9876261b221fe7f83,2020,8
36,530150016003,4,sg:ff6f7c7e3e1b4ac9876261b221fe7f83,2020,8
37,410510040022,4,sg:ff6f7c7e3e1b4ac9876261b221fe7f83,2020,8
38,410050229061,4,sg:ff6f7c7e3e1b4ac9876261b221fe7f83,2020,8


In [110]:
hm_ct_df.to_parquet(dir_int/'hm_cnt.parquet')