In [337]:
from pathlib import Path

import arcpy
from arcgis.features import GeoAccessor
import pandas as pd
from sklearn.preprocessing import minmax_scale

from gtfs_tools.gtfs import GtfsDataset
from gtfs_tools.utils.gtfs import add_modality_descriptions, calculate_headway

In [338]:
dir_prj = Path.cwd().parent
dir_data = dir_prj / 'data'

dir_raw = dir_data / 'raw'
dir_int = dir_data / 'interim'
dir_ext = dir_data / 'external'

gdb_int = dir_int / 'interim.gdb'
gdb_ext = dir_ext / 'external.gdb'

gtfs_parent_dir = dir_raw / 'gtfs_la_cbsa'

gtfs_factor_dir = dir_int / f'{gtfs_parent_dir.name}_factors_dist_decay.parquet'
gtfs_stops_dir = dir_int / f'{gtfs_parent_dir.name}_stops_dist_decay.parquet'

stops_fc = gdb_int / f'{gtfs_parent_dir.name}_stops'
poi_fc = gdb_ext / f'cbsa_la_h3_08_centroids'
poi_poly_fc = gdb_ext / f'cbsa_la_h3_08'

arcpy.env.overwriteOutput = True

## Assemble Trip Factor and Stops Data

In [339]:
gtfs_pth_lst = [pth.parent for pth in gtfs_parent_dir.glob('**/agency.txt')]

In [340]:
for gtfs_cache_dir in [gtfs_factor_dir, gtfs_stops_dir]:
    if not gtfs_cache_dir.exists():
        gtfs_cache_dir.mkdir(parents=True)

In [341]:
for gtfs_pth in gtfs_pth_lst:

    # create a gtfs dataset 
    gtfs = GtfsDataset(gtfs_pth, standardize_route_types=True)

    # build the parquet part output path
    factor_pth = gtfs_factor_dir / ("factors_mdb_" + gtfs.gtfs_folder.name.split("=")[1] + ".part")
    stops_pth = gtfs_stops_dir / ("stops_mdb_" + gtfs.gtfs_folder.name.split("=")[1] + ".part")

    #### LATE BY STOP ####

    # get the needed columns for calculating overnight service
    tm_df = gtfs.stop_times.data.loc[:,['stop_id', 'arrival_time']]

    # calculate hour of the day by getting the timedelta hours and retaining the remainder when dividing by 24
    tm_df['hours'] = tm_df['arrival_time'].dt.components.hours % 24

    # calculate late night by evaluating if the stop time is after an hour in the evening or before an hour in the morning
    tm_df['late_night'] = (tm_df['hours'] <= 3) | (tm_df['hours'] >= 23)

    # get just the overnight boolean
    late_df = tm_df[['stop_id', 'late_night']].drop_duplicates()

    ### HEADWAY ###
    headway_df = gtfs.stop_times.headway[['stop_id', 'headway']].groupby('stop_id').mean().rename(columns={'headway': 'stop_headway_mean'})

    ### FIXED ROUTE TYPE AND AGENCY ###

    # get data frame of stops, routes and route types
    rt_df = gtfs.routes.data.loc[:,['route_id', 'route_type', 'agency_id']].drop_duplicates()

    # add modality descriptions
    rt_df = add_modality_descriptions(rt_df)

    # add the agency name
    rt_df = rt_df.merge(gtfs.agency.data[['agency_id', 'agency_name']], on='agency_id')

    # flag fixed types (not bus or school bus)
    rt_df['fixed_modality'] = ~rt_df['route_type'].isin(['3', '31'])

    # add factor for each route; fixed routes are weighted 3x more than bus routes
    rt_df['modality_factor'] = rt_df['fixed_modality'].apply(lambda val: 1 if val else 1/3)

    # create the combined factor dataframe
    factor_df = (gtfs._crosstab_stop_trip
                 .merge(gtfs._crosstab_stop_route, on='stop_id')
                 .merge(rt_df, on='route_id')
                 .merge(late_df, on='stop_id')
                 .merge(headway_df, on='stop_id')
                 .drop(columns=['agency_id'])
                )

    # create stop, trip and route uid column using the agency name
    agency_root = factor_df['agency_name'].str.lower().str.findall(r'\w+').str.join('') + '_'
    factor_df['stop_uid'] = agency_root + factor_df['stop_id']
    factor_df['trip_uid'] = agency_root + factor_df['trip_id']
    factor_df['route_uid'] = agency_root + factor_df['route_id']

    #### SAVE OUTPUT ####
    factor_df.to_parquet(factor_pth)

    # create stops with uid
    stops_df = (gtfs.stops.sedf[['stop_id', 'stop_name', 'SHAPE']]
                .merge(gtfs._crosstab_stop_agency, on='stop_id')
                .merge(gtfs.agency.data[['agency_id', 'agency_name']], on='agency_id')
                .drop(columns='agency_id')
               )

    stops_df['stop_uid'] = stops_df['agency_name'].str.lower().str.findall(r'\w+').str.join('') + '_' + stops_df['stop_id']
    # save stops
    stops_df.spatial.to_parquet(stops_pth)

In [342]:
# bulk load stops, set geometry and write to a feature class for geoprocessing
stops_df = pd.concat(GeoAccessor.from_parquet(pqt_pth) for pqt_pth in gtfs_stops_dir.glob('*.part'))
stops_df.spatial.set_geometry('SHAPE')

stops_df.spatial.to_featureclass(stops_fc)

'D:\\projects\\gtfs-tools\\data\\interim\\interim.gdb\\gtfs_la_cbsa_stops'

## Proximity - Near Table

In [343]:
stops_df = GeoAccessor.from_featureclass(stops_fc)

stops_df.info()
stops_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39281 entries, 0 to 39280
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   OBJECTID     39281 non-null  Int64   
 1   stop_id      39281 non-null  string  
 2   stop_name    39281 non-null  string  
 3   agency_name  39281 non-null  string  
 4   stop_uid     39281 non-null  string  
 5   SHAPE        39281 non-null  geometry
dtypes: Int64(1), geometry(1), string(4)
memory usage: 1.8 MB


Unnamed: 0,OBJECTID,stop_id,stop_name,agency_name,stop_uid,SHAPE
0,1,0a40ca18-a300-492f-b261-9c7c7278d617,Katella-Auburn,ART,art_0a40ca18-a300-492f-b261-9c7c7278d617,"{""x"": -117.89383766799995, ""y"": 33.80347291800..."
1,2,3013,Peacock - Candlewood,ART,art_3013,"{""x"": -117.90666499999998, ""y"": 33.80492000000..."
2,3,2001,Harbor SB & Katella,ART,art_2001,"{""x"": -117.91546699999998, ""y"": 33.80284400000..."
3,4,6024,Element - SunCoast,ART,art_6024,"{""x"": -117.90989899899995, ""y"": 33.80837264700..."
4,5,2039,Anaheim Marriott Convention Center,ART,art_2039,"{""x"": -117.91778720999997, ""y"": 33.79942228000..."


In [344]:
poi_df = GeoAccessor.from_featureclass(poi_fc)

poi_df.info()
poi_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15405 entries, 0 to 15404
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   STORE_ID  15405 non-null  string  
 1   GRID_ID   15405 non-null  string  
 2   OBJECTID  15405 non-null  Int64   
 3   SHAPE     15405 non-null  geometry
dtypes: Int64(1), geometry(1), string(2)
memory usage: 496.6 KB


Unnamed: 0,STORE_ID,GRID_ID,OBJECTID,SHAPE
0,1,8829124805fffff,1,"{""x"": -118.9374176328347, ""y"": 34.078006631385..."
1,2,882912480dfffff,2,"{""x"": -118.9406671569935, ""y"": 34.069604494762..."
2,3,8829124821fffff,3,"{""x"": -118.923847051033, ""y"": 34.0884719843598..."
3,4,8829124825fffff,4,"{""x"": -118.913525619574, ""y"": 34.0905347683213..."
4,5,8829124829fffff,5,"{""x"": -118.9270981257108, ""y"": 34.080070330812..."


In [345]:
near_df = GeoAccessor.from_table(
    arcpy.analysis.GenerateNearTable(
        str(poi_fc), 
        near_features=str(stops_fc), 
        out_table='memory/near_tbl', 
        search_radius='5 miles', 
        method='geodesic'
    )[0]
)

near_df['dist_miles'] = near_df['NEAR_DIST'] * 0.00062137

near_df.drop(columns=['OBJECTID', 'NEAR_DIST'], inplace=True)

near_df = poi_df[['OBJECTID', 'STORE_ID', 'GRID_ID']].join(near_df.set_index('IN_FID'), on='OBJECTID', how='left')

near_df['NEAR_FID'] = near_df['NEAR_FID'].astype('Int64')

near_df = near_df.join(stops_df.set_index('OBJECTID')['stop_uid'], on='NEAR_FID', how='left').loc[:,['GRID_ID', 'stop_uid', 'dist_miles']]

near_df.info()
near_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15405 entries, 0 to 15404
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   GRID_ID     15405 non-null  string 
 1   stop_uid    10217 non-null  string 
 2   dist_miles  10217 non-null  float64
dtypes: float64(1), string(2)
memory usage: 361.2 KB


Unnamed: 0,GRID_ID,stop_uid,dist_miles
0,8829124805fffff,,
1,882912480dfffff,,
2,8829124821fffff,,
3,8829124825fffff,,
4,8829124829fffff,,


### Calculate Distance Decay Factor

In [346]:
from typing import Union

import numpy as np

def get_sigmoid_distance_decay_index(
    distance: Union[float, int], steepness: Union[float, int], offset: Union[float, int]
) -> float:
    """
    Get sigmoid distance decay index.
    
    Args:
        distance: Distance to calculate decay for.
        steepness:
        offset:
    """
    distance_index = 1 / (1 + np.exp(steepness * (distance - offset)))

    return distance_index


def get_bus_stop_distance_decay_index(distance: Union[float, int]) -> float:
    """
    Get the distance decay coefficient for a bus stop.
    
    Args:
        distance: Walking distance in miles to the bus stop.
    """
    distance_index = get_sigmoid_distance_decay_index(distance, 5.8, 0.65)
    return distance_index


def get_light_rail_stop_distance_decay_index(distance: Union[float, int]) -> float:
    """
    Get the distance decay coefficient for a light rail stop.
    
    Args:
        distance: Walking distance in miles to the light rail stop or station.
    """
    distance_index = get_sigmoid_distance_decay_index(distance, 4.8, 1.3) * 0.98
    return distance_index

def get_distance_decay(distance: Union[float, int], modality: Union[str, int]):
    """
    Get correct distance decay based on modality code.
    
    Args:
        distance: Walking distance in miles to transit station.
        modality: Modality code for transit type being accessed.
    """
    # cast modality for consistency
    if isinstance(modality, int):
        modality = str(modality)
        
    # based on modality apply correct distance decay
    if modality in ['3', '31']:
        idx = get_bus_stop_distance_decay_index(distance)
    else:
        idx = get_light_rail_stop_distance_decay_index(distance)
        
    return idx    

In [347]:
factor_raw_df = pd.concat((pd.read_parquet(prt_pth) for prt_pth in gtfs_factor_dir.glob('*.part'))).drop(columns=['agency_name', 'stop_id', 'trip_id', 'route_id']).drop_duplicates()

factor_raw_df.info()
factor_raw_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 15303547 entries, 0 to 6945
Data columns (total 9 columns):
 #   Column             Dtype  
---  ------             -----  
 0   route_type         object 
 1   route_type_desc    object 
 2   fixed_modality     bool   
 3   modality_factor    float64
 4   late_night         bool   
 5   stop_headway_mean  float64
 6   stop_uid           object 
 7   trip_uid           object 
 8   route_uid          object 
dtypes: bool(2), float64(2), object(5)
memory usage: 963.2+ MB


Unnamed: 0,route_type,route_type_desc,fixed_modality,modality_factor,late_night,stop_headway_mean,stop_uid,trip_uid,route_uid
0,3,bus,False,0.333333,False,19.027778,art_9,art_05999c78-fd76-4ae7-8eb0-3b2893d3b035:1,art_fe28ed1e-274e-402a-b0c1-3f8d1bf9b7c5
1,3,bus,False,0.333333,False,19.027778,art_9,art_05999c78-fd76-4ae7-8eb0-3b2893d3b035:10,art_fe28ed1e-274e-402a-b0c1-3f8d1bf9b7c5
2,3,bus,False,0.333333,False,19.027778,art_9,art_05999c78-fd76-4ae7-8eb0-3b2893d3b035:11,art_fe28ed1e-274e-402a-b0c1-3f8d1bf9b7c5
3,3,bus,False,0.333333,False,19.027778,art_9,art_05999c78-fd76-4ae7-8eb0-3b2893d3b035:12,art_fe28ed1e-274e-402a-b0c1-3f8d1bf9b7c5
4,3,bus,False,0.333333,False,19.027778,art_9,art_05999c78-fd76-4ae7-8eb0-3b2893d3b035:13,art_fe28ed1e-274e-402a-b0c1-3f8d1bf9b7c5


In [348]:
factor_df = (
    near_df.merge(factor_raw_df, on='stop_uid', how='inner')
    .rename(columns={
        'GRID_ID': 'h3_idx',
        'route_type': 'modality',
        'route_type_desc': 'modality_desc'
    })
    # .drop(columns=['IN_FID', 'NEAR_FID', 'STORE_ID'])
)

factor_df = factor_df[[
    'h3_idx', 'stop_uid', 'trip_uid', 'route_uid', 'modality', 'modality_factor', 'late_night', 'stop_headway_mean', 'dist_miles'
]]

factor_df['distance_decay_coeff'] = factor_df[['dist_miles', 'modality']].apply(lambda r: get_distance_decay(*r), axis=1)
factor_df['headway_factor'] = 1 - minmax_scale(factor_df['stop_headway_mean']) 

factor_df.info()
factor_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2245147 entries, 0 to 2245146
Data columns (total 11 columns):
 #   Column                Dtype  
---  ------                -----  
 0   h3_idx                string 
 1   stop_uid              object 
 2   trip_uid              object 
 3   route_uid             object 
 4   modality              object 
 5   modality_factor       float64
 6   late_night            bool   
 7   stop_headway_mean     float64
 8   dist_miles            float64
 9   distance_decay_coeff  float64
 10  headway_factor        float64
dtypes: bool(1), float64(5), object(4), string(1)
memory usage: 173.4+ MB


Unnamed: 0,h3_idx,stop_uid,trip_uid,route_uid,modality,modality_factor,late_night,stop_headway_mean,dist_miles,distance_decay_coeff,headway_factor
0,8829124865fffff,metrolosangeles_6605,metrolosangeles_10134000010531-JUNE24,metrolosangeles_134-13183,3,0.333333,False,10.722892,4.695229,6.463096e-11,0.988484
1,8829124865fffff,metrolosangeles_6605,metrolosangeles_10134000010600-JUNE24,metrolosangeles_134-13183,3,0.333333,False,10.722892,4.695229,6.463096e-11,0.988484
2,8829124865fffff,metrolosangeles_6605,metrolosangeles_10134000010624-JUNE24,metrolosangeles_134-13183,3,0.333333,False,10.722892,4.695229,6.463096e-11,0.988484
3,8829124865fffff,metrolosangeles_6605,metrolosangeles_10134000010643-JUNE24,metrolosangeles_134-13183,3,0.333333,False,10.722892,4.695229,6.463096e-11,0.988484
4,8829124865fffff,metrolosangeles_6605,metrolosangeles_10134000010701-JUNE24,metrolosangeles_134-13183,3,0.333333,False,10.722892,4.695229,6.463096e-11,0.988484


### Bus Factors (less than 1.5 miles)

In [349]:
factor_bus_df = (
    factor_df[
        (
            (factor_df['modality'] == '3') 
            | (factor_df['modality'] == '31')
        )
        & (factor_df['dist_miles'] <= 1.5)
    ].sort_values(['h3_idx', 'trip_uid', 'dist_miles'], ascending=False)
)

factor_bus_df.info()
factor_bus_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 1887974 entries, 2230758 to 4428
Data columns (total 11 columns):
 #   Column                Dtype  
---  ------                -----  
 0   h3_idx                string 
 1   stop_uid              object 
 2   trip_uid              object 
 3   route_uid             object 
 4   modality              object 
 5   modality_factor       float64
 6   late_night            bool   
 7   stop_headway_mean     float64
 8   dist_miles            float64
 9   distance_decay_coeff  float64
 10  headway_factor        float64
dtypes: bool(1), float64(5), object(4), string(1)
memory usage: 160.2+ MB


Unnamed: 0,h3_idx,stop_uid,trip_uid,route_uid,modality,modality_factor,late_night,stop_headway_mean,dist_miles,distance_decay_coeff,headway_factor
2230758,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553270,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527
2230757,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553269,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527
2230756,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553268,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527
2230755,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553267,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527
2230754,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553266,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527


### Fixed (non-bus) Factors (less than 2.5 miles)

In [350]:
factor_fixed_df = (
    factor_df[
        ~(
            (factor_df['modality'] == '3') 
            | (factor_df['modality'] == '31')
        )
        & (factor_df['dist_miles'] <= 2.5)
    ].sort_values(['h3_idx', 'trip_uid', 'dist_miles'], ascending=False)
)

factor_fixed_df.info()
factor_fixed_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 37814 entries, 1927630 to 94464
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   h3_idx                37814 non-null  string 
 1   stop_uid              37814 non-null  object 
 2   trip_uid              37814 non-null  object 
 3   route_uid             37814 non-null  object 
 4   modality              37814 non-null  object 
 5   modality_factor       37814 non-null  float64
 6   late_night            37814 non-null  bool   
 7   stop_headway_mean     37814 non-null  float64
 8   dist_miles            37814 non-null  float64
 9   distance_decay_coeff  37814 non-null  float64
 10  headway_factor        37814 non-null  float64
dtypes: bool(1), float64(5), object(4), string(1)
memory usage: 3.2+ MB


Unnamed: 0,h3_idx,stop_uid,trip_uid,route_uid,modality,modality_factor,late_night,stop_headway_mean,dist_miles,distance_decay_coeff,headway_factor
1927630,8829a565e7fffff,metrolosangeles_80302,metrolosangeles_60670440,metrolosangeles_803,0,1.0,False,6.385787,0.199846,0.975038,0.993915
1927631,8829a565e7fffff,metrolosangeles_80302,metrolosangeles_60670440,metrolosangeles_803,0,1.0,True,6.385787,0.199846,0.975038,0.993915
1927628,8829a565e7fffff,metrolosangeles_80302,metrolosangeles_60670436,metrolosangeles_803,0,1.0,False,6.385787,0.199846,0.975038,0.993915
1927629,8829a565e7fffff,metrolosangeles_80302,metrolosangeles_60670436,metrolosangeles_803,0,1.0,True,6.385787,0.199846,0.975038,0.993915
1927626,8829a565e7fffff,metrolosangeles_80302,metrolosangeles_60670435,metrolosangeles_803,0,1.0,False,6.385787,0.199846,0.975038,0.993915


### Reassemble Factor Table

In [351]:
factor_df = pd.concat((factor_bus_df, factor_fixed_df))

factor_df.info()
factor_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 1925788 entries, 2230758 to 94464
Data columns (total 11 columns):
 #   Column                Dtype  
---  ------                -----  
 0   h3_idx                string 
 1   stop_uid              object 
 2   trip_uid              object 
 3   route_uid             object 
 4   modality              object 
 5   modality_factor       float64
 6   late_night            bool   
 7   stop_headway_mean     float64
 8   dist_miles            float64
 9   distance_decay_coeff  float64
 10  headway_factor        float64
dtypes: bool(1), float64(5), object(4), string(1)
memory usage: 163.5+ MB


Unnamed: 0,h3_idx,stop_uid,trip_uid,route_uid,modality,modality_factor,late_night,stop_headway_mean,dist_miles,distance_decay_coeff,headway_factor
2230758,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553270,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527
2230757,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553269,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527
2230756,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553268,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527
2230755,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553267,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527
2230754,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553266,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527


### Get Trip Count by Point of Interest (H3 index)

In [352]:
trip_idx_df = factor_df[['h3_idx', 'trip_uid']].groupby(['h3_idx']).nunique().rename(columns={'trip_uid': 'trip_count'})

trip_idx_df['trip_count_factor'] = minmax_scale(trip_idx_df['trip_count'])

trip_idx_df.info()
trip_idx_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 6184 entries, 8829124b2dfffff to 8829a57b37fffff
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   trip_count         6184 non-null   int64  
 1   trip_count_factor  6184 non-null   float64
dtypes: float64(1), int64(1)
memory usage: 144.9 KB


Unnamed: 0_level_0,trip_count,trip_count_factor
h3_idx,Unnamed: 1_level_1,Unnamed: 2_level_1
8829124b2dfffff,123,0.027893
8829a020b3fffff,35,0.007607
8829a020bbfffff,35,0.007607
8829a02565fffff,35,0.007607
8829a0256dfffff,35,0.007607


In [353]:
factor_df = factor_df.merge(trip_idx_df, on='h3_idx', how='outer')

factor_df.info()
factor_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1925788 entries, 0 to 1925787
Data columns (total 13 columns):
 #   Column                Dtype  
---  ------                -----  
 0   h3_idx                string 
 1   stop_uid              object 
 2   trip_uid              object 
 3   route_uid             object 
 4   modality              object 
 5   modality_factor       float64
 6   late_night            bool   
 7   stop_headway_mean     float64
 8   dist_miles            float64
 9   distance_decay_coeff  float64
 10  headway_factor        float64
 11  trip_count            int64  
 12  trip_count_factor     float64
dtypes: bool(1), float64(6), int64(1), object(4), string(1)
memory usage: 178.1+ MB


Unnamed: 0,h3_idx,stop_uid,trip_uid,route_uid,modality,modality_factor,late_night,stop_headway_mean,dist_miles,distance_decay_coeff,headway_factor,trip_count,trip_count_factor
0,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553270,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527,45,0.009912
1,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553269,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527,45,0.009912
2,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553268,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527,45,0.009912
3,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553267,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527,45,0.009912
4,8829a57b37fffff,orangecountytransportationauthority_5190,orangecountytransportationauthority_11553266,orangecountytransportationauthority_1,3,0.333333,False,34.642857,0.671382,0.469035,0.958527,45,0.009912


In [354]:
factor_df = factor_df[['h3_idx', 'stop_uid', 'headway_factor', 'trip_count_factor', 'modality_factor', 'late_night', 'distance_decay_coeff']]

factor_df.info()
factor_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1925788 entries, 0 to 1925787
Data columns (total 7 columns):
 #   Column                Dtype  
---  ------                -----  
 0   h3_idx                string 
 1   stop_uid              object 
 2   headway_factor        float64
 3   trip_count_factor     float64
 4   modality_factor       float64
 5   late_night            bool   
 6   distance_decay_coeff  float64
dtypes: bool(1), float64(4), object(1), string(1)
memory usage: 90.0+ MB


Unnamed: 0,h3_idx,stop_uid,headway_factor,trip_count_factor,modality_factor,late_night,distance_decay_coeff
0,8829a57b37fffff,orangecountytransportationauthority_5190,0.958527,0.009912,0.333333,False,0.469035
1,8829a57b37fffff,orangecountytransportationauthority_5190,0.958527,0.009912,0.333333,False,0.469035
2,8829a57b37fffff,orangecountytransportationauthority_5190,0.958527,0.009912,0.333333,False,0.469035
3,8829a57b37fffff,orangecountytransportationauthority_5190,0.958527,0.009912,0.333333,False,0.469035
4,8829a57b37fffff,orangecountytransportationauthority_5190,0.958527,0.009912,0.333333,False,0.469035


In [355]:
dist_coeff_df = factor_df[['h3_idx', 'stop_uid', 'distance_decay_coeff']].drop_duplicates().set_index(['h3_idx', 'stop_uid'])

dist_coeff_df

Unnamed: 0_level_0,Unnamed: 1_level_0,distance_decay_coeff
h3_idx,stop_uid,Unnamed: 2_level_1
8829a57b37fffff,orangecountytransportationauthority_5190,0.469035
8829a57b35fffff,orangecountytransportationauthority_5190,0.964667
8829a57b2dfffff,orangecountytransportationauthority_2997,0.938851
8829a57b27fffff,orangecountytransportationauthority_2986,0.668239
8829a57b25fffff,orangecountytransportationauthority_2560,0.954052
...,...,...
8829a0a2d5fffff,metrolinktrains_171,0.970159
8829a0961dfffff,metrolinktrains_133,0.974513
8829a09613fffff,metrolinktrains_152,0.974076
8829a09611fffff,amtrak_SNP,0.949691


In [356]:
modality_fctr_df = factor_df[['h3_idx', 'stop_uid', 'modality_factor']].groupby(['h3_idx', 'stop_uid']).sum()
headway_fctr_df = factor_df[['h3_idx', 'stop_uid', 'headway_factor']].groupby(['h3_idx', 'stop_uid']).mean()

h3_df = headway_fctr_df.join(modality_fctr_df).join(dist_coeff).reset_index().drop(columns='stop_uid')

h3_df.head()

Unnamed: 0,h3_idx,headway_factor,modality_factor,distance_decay_coeff
0,8829124b2dfffff,0.988484,41.0,0.010253
1,8829a020b3fffff,0.937963,11.666667,0.296045
2,8829a020bbfffff,0.937963,11.666667,0.040029
3,8829a02565fffff,0.937963,11.666667,0.031046
4,8829a0256dfffff,0.937963,11.666667,0.012082


In [357]:
h3_df['transit_index'] = (h3_df['headway_factor'] + h3_df['modality_factor']) / 2 * h3_df['distance_decay_coeff']

h3_df = h3_df[['h3_idx', 'transit_index']].groupby('h3_idx').sum().reset_index()

h3_df

Unnamed: 0,h3_idx,transit_index
0,8829124b2dfffff,0.215247
1,8829a020b3fffff,1.865770
2,8829a020bbfffff,0.252273
3,8829a02565fffff,0.195663
4,8829a0256dfffff,0.076146
...,...,...
6179,8829a57b25fffff,9.047505
6180,8829a57b27fffff,5.332043
6181,8829a57b2dfffff,7.650091
6182,8829a57b35fffff,7.697331


In [358]:
h3_poly_df = GeoAccessor.from_featureclass(poi_poly_fc, fields=['GRID_ID']).rename(columns={'GRID_ID': 'h3_idx'})

h3_poly_df.info()
h3_poly_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15405 entries, 0 to 15404
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   h3_idx  15405 non-null  string  
 1   SHAPE   15405 non-null  geometry
dtypes: geometry(1), string(1)
memory usage: 240.8 KB


Unnamed: 0,h3_idx,SHAPE
0,8829124805fffff,"{""rings"": [[[-118.9361445326725, 34.0730932620..."
1,882912480dfffff,"{""rings"": [[[-118.9393940303325, 34.0646909386..."
2,8829124821fffff,"{""rings"": [[[-118.92257434308411, 34.083558632..."
3,8829124825fffff,"{""rings"": [[[-118.91225327745761, 34.085621246..."
4,8829124829fffff,"{""rings"": [[[-118.9258253911294, 34.0751567916..."


In [359]:
h3_poly_df = h3_poly_df.merge(h3_df, on='h3_idx', how='outer')
h3_poly_df.spatial.set_geometry('SHAPE')
h3_poly_df['transit_index'] = h3_poly_df['transit_index'].fillna(0)

assert h3_poly_df.spatial.validate()
h3_poly_df.info()
h3_poly_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15405 entries, 0 to 15404
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   h3_idx         15405 non-null  string  
 1   SHAPE          15405 non-null  geometry
 2   transit_index  15405 non-null  float64 
dtypes: float64(1), geometry(1), string(1)
memory usage: 361.2 KB


Unnamed: 0,h3_idx,SHAPE,transit_index
0,8829124805fffff,"{""rings"": [[[-118.9361445326725, 34.0730932620...",0.0
1,882912480dfffff,"{""rings"": [[[-118.9393940303325, 34.0646909386...",0.0
2,8829124821fffff,"{""rings"": [[[-118.92257434308411, 34.083558632...",0.0
3,8829124825fffff,"{""rings"": [[[-118.91225327745761, 34.085621246...",0.0
4,8829124829fffff,"{""rings"": [[[-118.9258253911294, 34.0751567916...",0.0


In [336]:
h3_poly_df.spatial.to_featureclass(gdb_int / f'cbsa_la_h3_08_headway_idx')

'D:\\projects\\gtfs-tools\\data\\interim\\interim.gdb\\cbsa_la_h3_08_headway_idx'