# Transit Accessibility Factors

In [1]:
import pandas as pd

from gtfs_tools.gtfs import GtfsDataset

In [2]:
zip_pth = r"D:\projects\GTFS-Publishing\data\raw\StLouis_Metro_stlouisgtfszip_2024-06-04_00_00\STLouis_GTFS.zip"

In [3]:
gtfs = GtfsDataset.from_zip(zip_pth, standardize_route_types=True)

gtfs

GtfsDataset: C:\Users\joel5174\AppData\Local\Temp\tmp2o2_rawa

## Factors

### Service Frequency

Weekly Trip Count per Stop

In [26]:
gtfs.stops.trip_count

Unnamed: 0_level_0,trip_count
stop_id,Unnamed: 1_level_1
10030,64
10031,64
10032,64
10033,64
10035,64
...,...
9920,63
9971,62
9972,62
9973,62


### Overnight Service

Service after 11:00pm (2300) and before 3:00am (0300).

In [7]:
# values to evaluate for overnight service
start_time = 23
end_time = 3

# get the needed columns for calculating overnight service
tm_df = gtfs.stop_times.data.loc[:,['stop_id', 'arrival_time']]

# calculate hour of the day by getting the timedelta hours and retaining the remainder when dividing by 24
tm_df['hours'] = tm_df['arrival_time'].dt.components.hours % 24

# calculate late night by evaluating if the stop time is after an hour in the evening or before an hour in the morning
tm_df['late_night'] = (tm_df['hours'] > start_time) | (tm_df['hours'] < end_time)

tm_df.info()
tm_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 451318 entries, 0 to 451317
Data columns (total 4 columns):
 #   Column        Non-Null Count   Dtype          
---  ------        --------------   -----          
 0   stop_id       451318 non-null  object         
 1   arrival_time  451318 non-null  timedelta64[ns]
 2   hours         451318 non-null  int64          
 3   late_night    451318 non-null  bool           
dtypes: bool(1), int64(1), object(1), timedelta64[ns](1)
memory usage: 10.8+ MB


Unnamed: 0,stop_id,arrival_time,hours,late_night
0,14818,0 days 13:50:00,13,False
1,15018,0 days 13:51:00,13,False
2,2732,0 days 13:52:00,13,False
3,14484,0 days 13:54:00,13,False
4,2638,0 days 13:54:00,13,False


In [8]:
# consolidate by stop to determine if the stop offers late night service
late_df = tm_df[['stop_id', 'late_night']].groupby('stop_id').any()

late_df.info()
late_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 5109 entries, 10030 to 9974
Data columns (total 1 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   late_night  5109 non-null   bool 
dtypes: bool(1)
memory usage: 44.9+ KB


Unnamed: 0_level_0,late_night
stop_id,Unnamed: 1_level_1
10030,False
10031,False
10032,False
10033,False
10035,False


In [9]:
late_df.value_counts()

late_night
True          3153
False         1956
Name: count, dtype: int64

### Variety: Weighted Unique Routes

Weekly route count with fixed routes (not bus or school bus) weighted three times greater than standard routes.

In [12]:
# get data frame of stops, routes and route types
rt_df = pd.merge(gtfs._crosstab_stop_route, gtfs.routes.data.loc[:,['route_id', 'route_type']], on='route_id', how='left')

rt_df.info()
rt_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5800 entries, 0 to 5799
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   stop_id     5800 non-null   object
 1   route_id    5800 non-null   object
 2   route_type  5800 non-null   Int64 
dtypes: Int64(1), object(2)
memory usage: 141.7+ KB


Unnamed: 0,stop_id,route_id,route_type
0,14818,18782,3
1,15018,18782,3
2,2732,18782,3
3,14484,18782,3
4,2638,18782,3


In [14]:
# flag fixed types (not bus or school bus)
rt_df['fixed_typ'] = ~rt_df['route_type'].isin([3, 31])

rt_df.info()
rt_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5800 entries, 0 to 5799
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   stop_id     5800 non-null   object 
 1   route_id    5800 non-null   object 
 2   route_type  5800 non-null   Int64  
 3   fixed_typ   5800 non-null   boolean
dtypes: Int64(1), boolean(1), object(2)
memory usage: 153.1+ KB


Unnamed: 0,stop_id,route_id,route_type,fixed_typ
0,14818,18782,3,False
1,15018,18782,3,False
2,2732,18782,3,False
3,14484,18782,3,False
4,2638,18782,3,False


In [16]:
# add factor for each route; fixed routes are weighted 3x more than bus routes
rt_df['weighted_route_factor'] = rt_df['fixed_typ'].apply(lambda val: 3 if val else 1)

rt_df

Unnamed: 0,stop_id,route_id,route_type,fixed_typ,weighted_route_factor
0,14818,18782,3,False,1
1,15018,18782,3,False,1
2,2732,18782,3,False,1
3,14484,18782,3,False,1
4,2638,18782,3,False,1
...,...,...,...,...,...
5795,16252,18846,3,False,1
5796,16246,18846,3,False,1
5797,16253,18846,3,False,1
5798,15707,18864,3,False,1


In [18]:
# sum weighted route factor by stop
stop_rt_df = rt_df[['stop_id', 'weighted_route_factor']].groupby('stop_id').sum()

stop_rt_df.info()
stop_rt_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 5109 entries, 10030 to 9974
Data columns (total 1 columns):
 #   Column                 Non-Null Count  Dtype
---  ------                 --------------  -----
 0   weighted_route_factor  5109 non-null   int64
dtypes: int64(1)
memory usage: 79.8+ KB


Unnamed: 0_level_0,weighted_route_factor
stop_id,Unnamed: 1_level_1
10030,1
10031,1
10032,1
10033,1
10035,1


## Factors Combined

In [28]:
factor_df = gtfs.stops.trip_count.join(late_df, on='stop_id').join(stop_rt_df, on='stop_id')

factor_df.info()
factor_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 5109 entries, 10030 to 9974
Data columns (total 3 columns):
 #   Column                 Non-Null Count  Dtype
---  ------                 --------------  -----
 0   trip_count             5109 non-null   int64
 1   late_night             5109 non-null   bool 
 2   weighted_route_factor  5109 non-null   int64
dtypes: bool(1), int64(2)
memory usage: 124.7+ KB


Unnamed: 0_level_0,trip_count,late_night,weighted_route_factor
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10030,64,False,1
10031,64,False,1
10032,64,False,1
10033,64,False,1
10035,64,False,1
