In [32]:
from arcgis.features import GeoAccessor

from sklearn.preprocessing import minmax_scale

In [8]:
stops_fc = r'D:\projects\gtfs-tools\data\interim\interim.gdb\la_cbsa_stops_index_factors'
h3_cnt_fc = r'D:\projects\gtfs-tools\data\external\external.gdb\cbsa_la_h3_08_centroids'
h3_fc = r'D:\projects\gtfs-tools\data\external\external.gdb\cbsa_la_h3_08'
near_tbl = r'D:\projects\gtfs-tools\data\interim\interim.gdb\cbsa_la_h3_08_near_tbl'

In [3]:
from typing import Union

import numpy as np

def get_sigmoid_distance_decay_index(
    distance: Union[float, int], steepness: Union[float, int], offset: Union[float, int]
) -> float:
    """
    Get sigmoid distance decay index.
    Args:
        distance: Distance to calculate decay for.
        steepness:
        offset:
    """
    distance_index = 1 / (1 + np.exp(steepness * (distance - offset)))

    return distance_index


def get_bus_stop_distance_decay_index(distance: Union[float, int]) -> float:
    """
    Get the distance decay coefficient for a bus stop.
    Args:
        distance: Walking distance in miles to the bus stop.
    """
    distance_index = get_sigmoid_distance_decay_index(distance, 5.8, 0.65)
    return distance_index


def get_light_rail_stop_distance_decay_index(distance: Union[float, int]) -> float:
    """
    Get the distance decay coefficient for a light rail stop.
    Args:
        distance: Walking distance in miles to the light rail stop or station.
    """
    distance_index = get_sigmoid_distance_decay_index(distance, 4.8, 1.3) * 0.98
    return distance_index

In [65]:
stops_df = GeoAccessor.from_featureclass(stops_fc).set_index('OBJECTID', drop=True)
stops_df = stops_df.loc[stops_df['trip_count'].notnull()]
stops_df['weighted_route_factor'] = stops_df['weighted_route_factor'].apply(lambda val: 1 if val == 3 else 2)
stops_df['trip_count'] = minmax_scale(stops_df['trip_count'])

stops_df.info()
stops_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 39162 entries, 1 to 40251
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype   
---  ------                 --------------  -----   
 0   stop_id                39162 non-null  string  
 1   stop_name              39162 non-null  string  
 2   trip_count             39162 non-null  float64 
 3   late_night             39162 non-null  Int32   
 4   route_type             39162 non-null  string  
 5   weighted_route_factor  39162 non-null  int64   
 6   SHAPE                  39162 non-null  geometry
dtypes: Int32(1), float64(1), geometry(1), int64(1), string(3)
memory usage: 2.3 MB


Unnamed: 0_level_0,stop_id,stop_name,trip_count,late_night,route_type,weighted_route_factor,SHAPE
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0a40ca18-a300-492f-b261-9c7c7278d617,Katella-Auburn,0.005482,0,3,1,"{""x"": -117.89383766799995, ""y"": 33.80347291800..."
2,3013,Peacock - Candlewood,0.014619,0,33,2,"{""x"": -117.90666499999998, ""y"": 33.80492000000..."
3,2001,Harbor SB & Katella,0.022613,0,3,1,"{""x"": -117.91546699999998, ""y"": 33.80284400000..."
4,6024,Element - SunCoast,0.014847,0,33,2,"{""x"": -117.90989899899995, ""y"": 33.80837264700..."
5,2039,Anaheim Marriott Convention Center,0.022613,0,3,1,"{""x"": -117.91778720999997, ""y"": 33.79942228000..."


In [66]:
h3_df = GeoAccessor.from_featureclass(h3_cnt_fc).loc[:,['OBJECTID', 'GRID_ID']].set_index('OBJECTID')

h3_df.info()
h3_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 15405 entries, 1 to 15405
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   GRID_ID  15405 non-null  string
dtypes: string(1)
memory usage: 255.7 KB


Unnamed: 0_level_0,GRID_ID
OBJECTID,Unnamed: 1_level_1
1,8829124805fffff
2,882912480dfffff
3,8829124821fffff
4,8829124825fffff
5,8829124829fffff


In [67]:
near_df = GeoAccessor.from_table(near_tbl)
near_df['near_miles'] = near_df['NEAR_DIST'] * 0.00062137

near_df.info()
near_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7794740 entries, 0 to 7794739
Data columns (total 6 columns):
 #   Column      Dtype  
---  ------      -----  
 0   OBJECTID    int32  
 1   IN_FID      int32  
 2   NEAR_FID    int32  
 3   NEAR_DIST   float64
 4   NEAR_RANK   int32  
 5   near_miles  float64
dtypes: float64(2), int32(4)
memory usage: 237.9 MB


Unnamed: 0,OBJECTID,IN_FID,NEAR_FID,NEAR_DIST,NEAR_RANK,near_miles
0,1,13,19473,7556.25292,1,4.695229
1,2,17,19473,7382.419983,1,4.587214
2,3,18,19473,7169.686254,1,4.455028
3,4,18,26426,7502.379333,2,4.661753
4,5,18,17107,7503.865568,3,4.662677


In [68]:
idx_df = near_df[['IN_FID', 'NEAR_FID', 'near_miles']].join(h3_df, on='IN_FID').join(stops_df, on='NEAR_FID').loc[:,['GRID_ID', 'stop_id', 'stop_name', 'trip_count', 'late_night', 'weighted_route_factor', 'near_miles']]

idx_df

Unnamed: 0,GRID_ID,stop_id,stop_name,trip_count,late_night,weighted_route_factor,near_miles
0,8829124865fffff,6605,Trancas Canyon / Pacific Coast Highway,0.027867,0,1.0,4.695229
1,882912486dfffff,6605,Trancas Canyon / Pacific Coast Highway,0.027867,0,1.0,4.587214
2,8829124901fffff,6605,Trancas Canyon / Pacific Coast Highway,0.027867,0,1.0,4.455028
3,8829124901fffff,20235,Lindero Canyon / Lakeview Canyon,0.010050,0,1.0,4.661753
4,8829124901fffff,2986,Lindero Canyon / Lakeview Canyon,0.010050,0,1.0,4.662677
...,...,...,...,...,...,...,...
7794735,8829a57b37fffff,2004,BROOKHURST-ELLIS,0.015304,0,1.0,4.993533
7794736,8829a57b37fffff,7536,WESTMINSTER-MILAN,0.029466,1,1.0,4.998094
7794737,8829a57b37fffff,7535,WESTMINSTER-HAMMON,0.029466,1,1.0,4.998173
7794738,8829a57b37fffff,2697,BROOKHURST-ENT 19361 RANCHO MOBILE,0.015304,0,1.0,4.998186


In [69]:
idx_df['bus_coef'] = idx_df['near_miles'].apply(get_bus_stop_distance_decay_index)
idx_df['rail_coef'] = idx_df['near_miles'].apply(get_light_rail_stop_distance_decay_index)

idx_df

Unnamed: 0,GRID_ID,stop_id,stop_name,trip_count,late_night,weighted_route_factor,near_miles,bus_coef,rail_coef
0,8829124865fffff,6605,Trancas Canyon / Pacific Coast Highway,0.027867,0,1.0,4.695229,6.463096e-11,8.193813e-08
1,882912486dfffff,6605,Trancas Canyon / Pacific Coast Highway,0.027867,0,1.0,4.587214,1.209259e-10,1.376115e-07
2,8829124901fffff,6605,Trancas Canyon / Pacific Coast Highway,0.027867,0,1.0,4.455028,2.603063e-10,2.595446e-07
3,8829124901fffff,20235,Lindero Canyon / Lakeview Canyon,0.010050,0,1.0,4.661753,7.848057e-11,9.622089e-08
4,8829124901fffff,2986,Lindero Canyon / Lakeview Canyon,0.010050,0,1.0,4.662677,7.806133e-11,9.579531e-08
...,...,...,...,...,...,...,...,...,...
7794735,8829a57b37fffff,2004,BROOKHURST-ELLIS,0.015304,0,1.0,4.993533,1.145616e-11,1.957205e-08
7794736,8829a57b37fffff,7536,WESTMINSTER-MILAN,0.029466,1,1.0,4.998094,1.115712e-11,1.914829e-08
7794737,8829a57b37fffff,7535,WESTMINSTER-HAMMON,0.029466,1,1.0,4.998173,1.115197e-11,1.914097e-08
7794738,8829a57b37fffff,2697,BROOKHURST-ENT 19361 RANCHO MOBILE,0.015304,0,1.0,4.998186,1.115117e-11,1.913985e-08


In [31]:
idx_df.groupby('GRID_ID')[['bus_coef', 'rail_coef']].sum()

Unnamed: 0_level_0,bus_coef,rail_coef
GRID_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
8829124865fffff,6.463096e-11,8.193813e-08
882912486dfffff,1.209259e-10,1.376115e-07
8829124901fffff,5.567180e-10,6.616257e-07
8829124903fffff,5.090579e-10,7.359435e-07
8829124905fffff,8.153062e-09,7.780939e-06
...,...,...
8829a57b25fffff,2.248774e+01,5.999840e+01
8829a57b27fffff,1.026916e+01,4.329151e+01
8829a57b2dfffff,1.931063e+01,4.538123e+01
8829a57b35fffff,2.050275e+00,7.115400e+00
