In [70]:
from pathlib import Path

import pandas as pd

from gtfs_tools.gtfs import GtfsDataset
from gtfs_tools.utils.gtfs import add_modality_descriptions

In [71]:
dir_prj = Path.cwd().parent

dir_data = dir_prj / 'data'
dir_raw = dir_data / 'raw'
dir_int = dir_data / 'interim'

gdb_int = dir_int / 'interim.gdb'

gtfs_parent_dir = dir_raw / 'gtfs_sf'

stops_pth = gdb_int / f'{gtfs_parent_dir.stem}_stops'

assert gtfs_parent_dir.exists()

In [72]:
dir_gtfs_lst = [pth.parent for pth in gtfs_parent_dir.glob('**/stops.txt')]

In [73]:
route_df = (
    add_modality_descriptions(gtfs.routes.data)
    .rename(columns={'route_type': 'modality_code', 'route_type_desc': 'modality_desc'})
    .loc[:,['route_id', 'modality_code', 'modality_desc', 'agency_id']]
)

route_df

Unnamed: 0,route_id,modality_code,modality_desc,agency_id
0,ACETrain,2,rail,CE


In [74]:
gtfs = GtfsDataset(dir_gtfs_lst[1], standardize_route_types=True)

route_df = (
    add_modality_descriptions(gtfs.routes.data)
    .rename(columns={'route_type': 'modality_code', 'route_type_desc': 'modality_desc'})
    .loc[:,['route_id', 'modality_code', 'modality_desc', 'agency_id']]
)

stops_df = (
    gtfs.stops.sedf.loc[:,['stop_id', 'SHAPE']]
    .merge(gtfs._crosstab_stop_route, on='stop_id', how='left')
    .merge(route_df, on='route_id', how='left')
    .merge(gtfs.agency.data[['agency_id', 'agency_name']], on='agency_id')
    
)

stops_df['modality_cat'] = stops_df['modality_code'].apply(lambda val: 'bus' if val in ['3', '31'] else 'fixed')

stops_df['stop_uid'] = stops_df['agency_name'].str.lower().str.findall(r'\w+').str.join('') + '_' + stops_df['stop_id']
stops_df = stops_df.loc[:,['stop_uid', 'stop_id', 'modality_code', 'modality_desc', 'modality_cat', 'SHAPE']]

stops_df

Unnamed: 0,stop_uid,stop_id,modality_code,modality_desc,modality_cat,SHAPE
0,altamontcorridorexpress_74368,74368,2,rail,fixed,"{""x"": -122.007353, ""y"": 37.559114, ""spatialRef..."
1,altamontcorridorexpress_74422,74422,2,rail,fixed,"{""x"": -121.967047, ""y"": 37.406833, ""spatialRef..."
2,altamontcorridorexpress_74586,74586,2,rail,fixed,"{""x"": -121.264143, ""y"": 37.79894, ""spatialRefe..."
3,altamontcorridorexpress_74548,74548,2,rail,fixed,"{""x"": -121.766906, ""y"": 37.685045, ""spatialRef..."
4,altamontcorridorexpress_74757,74757,2,rail,fixed,"{""x"": -121.882229, ""y"": 37.658561, ""spatialRef..."
5,altamontcorridorexpress_74752,74752,2,rail,fixed,"{""x"": -121.902378, ""y"": 37.330686, ""spatialRef..."
6,altamontcorridorexpress_74722,74722,2,rail,fixed,"{""x"": -121.936312, ""y"": 37.353506, ""spatialRef..."
7,altamontcorridorexpress_74758,74758,2,rail,fixed,"{""x"": -121.27888, ""y"": 37.95708, ""spatialRefer..."
8,altamontcorridorexpress_74872,74872,2,rail,fixed,"{""x"": -121.432738, ""y"": 37.696419, ""spatialRef..."
9,altamontcorridorexpress_74827,74827,2,rail,fixed,"{""x"": -121.717655, ""y"": 37.697062, ""spatialRef..."


In [65]:
def get_stops_sedf(gtfs: GtfsDataset) -> pd.DataFrame:
    
    if isinstance(gtfs, (str, Path)):
        gtfs = GtfsDataset(gtfs, standardize_route_types=True)
    
    route_df = (
        add_modality_descriptions(gtfs.routes.data)
        .rename(columns={'route_type': 'modality_code', 'route_type_desc': 'modality_desc'})
        .loc[:,['route_id', 'modality_code', 'modality_desc', 'agency_id']]
    )

    stops_df = (
        gtfs.stops.sedf.loc[:,['stop_id', 'SHAPE']]
        .merge(gtfs._crosstab_stop_route, on='stop_id', how='left')
        .merge(route_df, on='route_id', how='left')
        .merge(gtfs.agency.data[['agency_id', 'agency_name']], on='agency_id')

    )

    stops_df['modality_cat'] = stops_df['modality_code'].apply(lambda val: 'bus' if val in ['3', '31'] else 'fixed')

    stops_df['stop_uid'] = stops_df['agency_name'].str.lower().str.findall(r'\w+').str.join('') + '_' + stops_df['stop_id']
    stops_df = stops_df.loc[:,['stop_uid', 'stop_id', 'modality_code', 'modality_desc', 'modality_cat', 'SHAPE']]
    
    return stops_df

In [66]:
stops_df = pd.concat((get_stops_sedf(pth) for pth in dir_gtfs_lst))

In [67]:
stops_df.spatial.set_geometry('SHAPE')
assert stops_df.spatial.validate()

In [68]:
stops_df.info()
stops_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1231 entries, 0 to 1230
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   stop_uid       1231 non-null   object  
 1   stop_id        1231 non-null   object  
 2   modality_code  1231 non-null   object  
 3   modality_desc  1231 non-null   object  
 4   modality_cat   1231 non-null   object  
 5   SHAPE          1231 non-null   geometry
dtypes: geometry(1), object(5)
memory usage: 57.8+ KB


Unnamed: 0,stop_uid,stop_id,modality_code,modality_desc,modality_cat,SHAPE
0,coachusa_ABB,ABB,3,bus,bus,"{""x"": -90.314667, ""y"": 44.928553, ""spatialRefe..."
1,coachusa_CHP,CHP,3,bus,bus,"{""x"": -91.427794, ""y"": 44.883096, ""spatialRefe..."
2,coachusa_EUC,EUC,3,bus,bus,"{""x"": -91.506339, ""y"": 44.796558, ""spatialRefe..."
3,coachusa_GBY,GBY,3,bus,bus,"{""x"": -88.00276, ""y"": 44.517053, ""spatialRefer..."
4,coachusa_MKE,MKE,3,bus,bus,"{""x"": -87.917104, ""y"": 43.034518, ""spatialRefe..."


In [64]:
stops_df.spatial.to_featureclass(stops_pth)

'D:\\projects\\gtfs-tools\\data\\interim\\interim.gdb\\gtfs_sf_stops'