In [1]:
from pathlib import Path

from arcgis.features import GeoAccessor
import pandas as pd
import numpy as np

from gtfs_tools.gtfs import GtfsDataset
from gtfs_tools.utils.gtfs import add_agency_name_column, add_modality_descriptions, add_standarized_modality_column

In [2]:
gtfs_parent = Path(r'\\DevBA00007\data\gtfs_publishing\interim\Grand_County_Colorado_Bus_winterparkcousgtfszip_2024-06-19_10_43_48')

gtfs_dir = gtfs_parent / 'gtfs'

gdb_pth = gtfs_parent / 'gtfs.gdb'
line_pth = gdb_pth / 'lines'
stop_pth = gdb_pth / 'stops'

In [3]:
gtfs = GtfsDataset(gtfs_dir)

gtfs

GtfsDataset: \\DevBA00007\data\gtfs_publishing\interim\Grand_County_Colorado_Bus_winterparkcousgtfszip_2024-06-19_10_43_48\gtfs

In [4]:
routes_df = add_agency_name_column(gtfs.routes.sedf, gtfs.agency.data)

routes_df.info()
routes_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype   
---  ------                  --------------  -----   
 0   agency_id               15 non-null     object  
 1   route_id                15 non-null     object  
 2   route_short_name        15 non-null     object  
 3   route_long_name         15 non-null     object  
 4   route_desc              0 non-null      float64 
 5   route_type              15 non-null     object  
 6   route_url               0 non-null      float64 
 7   route_color             15 non-null     object  
 8   route_text_color        15 non-null     object  
 9   route_sort_order        15 non-null     int64   
 10  min_headway_minutes     15 non-null     int64   
 11  eligibility_restricted  15 non-null     int64   
 12  continuous_pickup       15 non-null     int64   
 13  continuous_drop_off     15 non-null     int64   
 14  tts_route_short_name    0 no

Unnamed: 0,agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_sort_order,min_headway_minutes,eligibility_restricted,continuous_pickup,continuous_drop_off,tts_route_short_name,tts_route_long_name,shape_id,SHAPE,agency_name
0,806,10099,FRA,Fraser/Black Line,,3,,2a2a2a,ffffff,0,30,0,1,1,,,p_177613,"{""paths"": [[[-105.76172716672248, 39.883615422...",The Lift
1,806,10099,FRA,Fraser/Black Line,,3,,2a2a2a,ffffff,0,30,0,1,1,,,p_787965,"{""paths"": [[[-105.784927, 39.917198], [-105.78...",The Lift
2,806,10099,FRA,Fraser/Black Line,,3,,2a2a2a,ffffff,0,30,0,1,1,,,p_177614,"{""paths"": [[[-105.812309, 39.945946], [-105.81...",The Lift
3,806,10096,RED,Rendezvous/Red Line,,3,,fe011f,ffffff,1,60,0,1,1,,,p_177617,"{""paths"": [[[-105.76172716672248, 39.883615422...",The Lift
4,806,10103,GR,Granby Regional Commuter,,3,,808080,ffffff,10,60,0,1,1,,,p_177627,"{""paths"": [[[-105.92525558889, 40.061079254010...",The Lift


In [6]:
add_standarized_modality_column(routes_df)

KeyError: 'route_type_std'

In [4]:
line_dtype_dict = {
    'shape_id': 'string',
    'route_id': 'string',
    'agency_id': 'string',
    'agency_name': 'string',
    'route_short_name': 'string',
    'route_long_name': 'string',
    'route_desc': 'string',
    'route_type': 'string',
    'route_url': 'string',
    'route_color': 'string',
    'route_type_text': 'string',
    'esri_route_type_carto': 'string',
    'esri_route_type_carto_desc': 'string',
    'esri_contributor': 'string',
    'esri_date_received': 'datetime64',
    'esri_date_processed': 'datetime64',
    'esri_excluded': 'Int32'
}

In [3]:
line_df = GeoAccessor.from_featureclass(line_pth)

line_dtypes = line_df.dtypes

line_dtypes

OBJECTID                               Int64
shape_id                      string[python]
route_id                      string[python]
agency_id                     string[python]
agency_name                   string[python]
route_short_name              string[python]
route_long_name               string[python]
route_desc                    string[python]
route_type                    string[python]
route_url                     string[python]
route_color                   string[python]
route_text_color              string[python]
route_type_text               string[python]
esri_route_type_carto         string[python]
esri_route_type_carto_desc    string[python]
esri_contributor              string[python]
esri_date_received            datetime64[us]
esri_date_processed           datetime64[us]
esri_excluded                          Int32
SHAPE                               geometry
dtype: object

In [6]:
routes_df = gtfs.routes.sedf

In [24]:
missing_cols = [c for c in line_dtype_dict.keys() if c not in gtfs.routes.sedf.columns]

missing_cols

['agency_name',
 'route_type_text',
 'esri_route_type_carto',
 'esri_route_type_carto_desc',
 'esri_contributor',
 'esri_date_received',
 'esri_date_processed',
 'esri_excluded']

In [21]:
stop_df = GeoAccessor.from_featureclass(stop_pth)

stop_dtypes = stop_df.dtypes

stop_dtypes

OBJECTID                              Int64
stop_id                      string[python]
stop_code                    string[python]
stop_name                    string[python]
tts_stop_name                string[python]
stop_desc                    string[python]
stop_lat                            Float64
stop_lon                            Float64
zone_id                      string[python]
stop_url                     string[python]
location_type                string[python]
parent_station               string[python]
stop_timezone                string[python]
wheelchair_boarding          string[python]
level_id                     string[python]
platform_code                string[python]
esri_contributor             string[python]
esri_date_received           datetime64[us]
esri_stop_type               string[python]
esri_stop_type_desc          string[python]
esri_stop_type_carto         string[python]
esri_stop_type_carto_desc    string[python]
esri_location_type_desc      str