In [1]:
from pathlib import Path

import arcpy
from arcgis.features import GeoAccessor
import pandas as pd

from gtfs_tools.gtfs import GtfsDataset
from gtfs_tools.utils.gtfs import add_modality_descriptions

In [2]:
dir_prj = Path.cwd().parent

dir_data = dir_prj / 'data'
dir_raw = dir_data / 'raw'
dir_int = dir_data / 'interim'

gdb_int = dir_int / 'interim.gdb'

gtfs_parent_dir = dir_raw / 'gtfs_sf'

stops_pth = gdb_int / f'{gtfs_parent_dir.stem}_stops'
stops_id_col = 'stop_uid'

poi_pth = gdb_int / f'cbsa_sf_h3_09_cnt'
poi_id_col = 'GRID_ID'
modality_category = 'fixed' ## bus or fixed

h3_poly_pth = gdb_int / 'cbsa_sf_h3_09'

assert gtfs_parent_dir.exists()

arcpy.env.overwriteOutput = True

## Load Data

In [14]:
stops_df = GeoAccessor.from_featureclass(stops_pth)

stops_df.info()
stops_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31397 entries, 0 to 31396
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   OBJECTID       31397 non-null  Int64   
 1   stop_uid       31397 non-null  string  
 2   stop_id        31397 non-null  string  
 3   modality_code  31397 non-null  string  
 4   modality_desc  31397 non-null  string  
 5   modality_cat   31397 non-null  string  
 6   SHAPE          31397 non-null  geometry
dtypes: Int64(1), geometry(1), string(5)
memory usage: 1.7 MB


Unnamed: 0,OBJECTID,stop_uid,stop_id,modality_code,modality_desc,modality_cat,SHAPE
0,35,amtrak_ACA,ACA,2,rail,fixed,"{""x"": -121.81602399999997, ""y"": 38.01770000000..."
1,100,amtrak_BKY,BKY,2,rail,fixed,"{""x"": -122.30070699999999, ""y"": 37.86725400000..."
2,278,amtrak_EMY,EMY,2,rail,fixed,"{""x"": -122.29161499999998, ""y"": 37.84047400000..."
3,279,amtrak_EMY,EMY,2,rail,fixed,"{""x"": -122.29161499999998, ""y"": 37.84047400000..."
4,280,amtrak_EMY,EMY,2,rail,fixed,"{""x"": -122.29161499999998, ""y"": 37.84047400000..."


In [4]:
poi_df = GeoAccessor.from_featureclass(poi_pth)
h3_poly_df = GeoAccessor.from_featureclass(h3_poly_pth)

poi_df.info()
poi_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 419510 entries, 0 to 419509
Data columns (total 4 columns):
 #   Column    Non-Null Count   Dtype   
---  ------    --------------   -----   
 0   STORE_ID  419510 non-null  string  
 1   GRID_ID   419510 non-null  string  
 2   OBJECTID  419510 non-null  Int64   
 3   SHAPE     419510 non-null  geometry
dtypes: Int64(1), geometry(1), string(2)
memory usage: 13.2 MB


Unnamed: 0,STORE_ID,GRID_ID,OBJECTID,SHAPE
0,1,8a2830100007fff,1,"{""x"": -122.18726885499996, ""y"": 38.03733354700..."
1,2,8a283010000ffff,2,"{""x"": -122.18832398399996, ""y"": 38.03816791200..."
2,3,8a2830100017fff,3,"{""x"": -122.18577712299998, ""y"": 38.03767358500..."
3,4,8a283010001ffff,4,"{""x"": -122.18683225399997, ""y"": 38.03850796400..."
4,5,8a2830100027fff,5,"{""x"": -122.18770544499995, ""y"": 38.03615912000..."


## Filter by Modality

In [11]:
stops_df = stops_df.loc[stops_df['modality_cat'] == modality_category]

stops_df.info()
stops_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 1421 entries, 0 to 31369
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   OBJECTID       1421 non-null   Int64   
 1   stop_uid       1421 non-null   string  
 2   stop_id        1421 non-null   string  
 3   modality_code  1421 non-null   string  
 4   modality_desc  1421 non-null   string  
 5   modality_cat   1421 non-null   string  
 6   SHAPE          1421 non-null   geometry
dtypes: Int64(1), geometry(1), string(5)
memory usage: 90.2 KB


Unnamed: 0,OBJECTID,stop_uid,stop_id,modality_code,modality_desc,modality_cat,SHAPE
0,35,amtrak_ACA,ACA,2,rail,fixed,"{""x"": -121.81602399999997, ""y"": 38.01770000000..."
1,100,amtrak_BKY,BKY,2,rail,fixed,"{""x"": -122.30070699999999, ""y"": 37.86725400000..."
2,278,amtrak_EMY,EMY,2,rail,fixed,"{""x"": -122.29161499999998, ""y"": 37.84047400000..."
3,279,amtrak_EMY,EMY,2,rail,fixed,"{""x"": -122.29161499999998, ""y"": 37.84047400000..."
4,280,amtrak_EMY,EMY,2,rail,fixed,"{""x"": -122.29161499999998, ""y"": 37.84047400000..."


In [17]:
near_df = GeoAccessor.from_table(
    arcpy.analysis.GenerateNearTable(
        str(poi_pth), 
        near_features=str(stops_pth), 
        out_table='memory/near_tbl', 
        search_radius='2.5 miles', 
        method='geodesic'
    )[0]
)

near_df['eucl_dist_miles'] = near_df['NEAR_DIST'] * 0.00062137

near_df.drop(columns=['OBJECTID', 'NEAR_DIST'], inplace=True)

near_df = poi_df[['OBJECTID', 'STORE_ID', poi_id_col]].join(near_df.set_index('IN_FID'), on='OBJECTID', how='left')

near_df['NEAR_FID'] = near_df['NEAR_FID'].astype('Int64')

near_df.join(stops_df.set_index('OBJECTID')[stops_id_col], on='NEAR_FID', how='left').info()

near_df = near_df.join(stops_df.set_index('OBJECTID')[stops_id_col], on='NEAR_FID', how='left').loc[:,['GRID_ID', stops_id_col, 'eucl_dist_miles']]

near_df.info()
near_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 419510 entries, 0 to 419509
Data columns (total 6 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   OBJECTID         419510 non-null  Int64  
 1   STORE_ID         419510 non-null  string 
 2   GRID_ID          419510 non-null  string 
 3   NEAR_FID         250986 non-null  Int64  
 4   eucl_dist_miles  250986 non-null  float64
 5   stop_uid         250986 non-null  string 
dtypes: Int64(2), float64(1), string(3)
memory usage: 20.0 MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 419510 entries, 0 to 419509
Data columns (total 3 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   GRID_ID          419510 non-null  string 
 1   stop_uid         250986 non-null  string 
 2   eucl_dist_miles  250986 non-null  float64
dtypes: float64(1), string(2)
memory usage: 9.6 MB


Unnamed: 0,GRID_ID,stop_uid,eucl_dist_miles
0,8a2830100007fff,soltrans_253,1.859308
1,8a283010000ffff,soltrans_253,1.851372
2,8a2830100017fff,soltrans_253,1.791282
3,8a283010001ffff,soltrans_253,1.781174
4,8a2830100027fff,soltrans_253,1.93797


In [19]:
stop_near_df = near_df.sort_values(['stop_uid', 'GRID_ID', 'eucl_dist_miles']).dropna()

stop_near_df['idx'] = stop_near_df.groupby(["stop_uid"]).cumcount() + 1

stop_near_df[stop_near_df['idx'] == 1]

Unnamed: 0,GRID_ID,stop_uid,eucl_dist_miles,idx
17687,8a28308364b7fff,actransit_10,0.217108,1
69872,8a283098d217fff,actransit_100,0.026822,1
324403,8a2834615207fff,actransit_1000,0.084069,1
324410,8a2834615247fff,actransit_1001,0.172787,1
318817,8a2834602c07fff,actransit_1002,0.099505,1
...,...,...,...,...
378,8a2830101247fff,westcatwesterncontracosta_791647,0.014332,1
48440,8a28308dc49ffff,westcatwesterncontracosta_791648,0.082698,1
6123,8a283012d40ffff,westcatwesterncontracosta_836604,0.126889,1
6026,8a283012d007fff,westcatwesterncontracosta_844542,0.160454,1


In [20]:
h3_eucl_df = h3_poly_df.drop(columns='OBJECTID').merge(stop_near_df.drop(columns='idx'), on='GRID_ID', how='left')

h3_eucl_df.info()
h3_eucl_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61125 entries, 0 to 61124
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype   
---  ------           --------------  -----   
 0   GRID_ID          61125 non-null  string  
 1   SHAPE            61125 non-null  geometry
 2   stop_uid         0 non-null      string  
 3   eucl_dist_miles  0 non-null      float64 
dtypes: float64(1), geometry(1), string(2)
memory usage: 1.9 MB


Unnamed: 0,GRID_ID,SHAPE,stop_uid,eucl_dist_miles
0,89283010003ffff,"{""rings"": [[[-122.18600756799998, 38.035829551...",,
1,89283010007ffff,"{""rings"": [[[-122.18960950099995, 38.037158254...",,
2,8928301000bffff,"{""rings"": [[[-122.18258743399997, 38.037683999...",,
3,8928301000fffff,"{""rings"": [[[-122.18618946599997, 38.039012780...",,
4,89283010013ffff,"{""rings"": [[[-122.18582568099998, 38.032646236...",,


In [69]:
h3_eucl_df.spatial.to_featureclass(gdb_int / 'cbsa_sf_h3_09_nearest_eucl')

'D:\\projects\\gtfs-tools\\data\\interim\\interim.gdb\\cbsa_sf_h3_09_nearest_eucl'