<a href="https://colab.research.google.com/github/nonoumasy/LA-GTFS/blob/master/Los_Angeles_GTFS_Rail_Bus_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install gtfstk

In [0]:
import pandas as pd
import gtfstk as gt

In [0]:
feed = gt.read_gtfs('/content/gtfs.zip', dist_units='mi')
bus_feed = gt.read_gtfs('/content/gtfs_bus.zip', dist_units='mi')

# Set study date
DATE = "20200420"

In [5]:
feed.describe()

Unnamed: 0,indicator,value
0,agencies,[Metro - Los Angeles]
1,timezone,America/Los_Angeles
2,start_date,20200408
3,end_date,20200422
4,num_routes,6
5,num_trips,5998
6,num_stops,401
7,num_shapes,12
8,sample_date,20200416
9,num_routes_active_on_sample_date,6


In [0]:
feed.route_

In [8]:
bus_feed.describe()

Unnamed: 0,indicator,value
0,agencies,[Metro - Los Angeles]
1,timezone,America/Los_Angeles
2,start_date,20191222
3,end_date,20200627
4,num_routes,138
5,num_trips,32039
6,num_stops,13971
7,num_shapes,1206
8,sample_date,20191226
9,num_routes_active_on_sample_date,138


In [9]:
# validate the data
feed.validate()

Unnamed: 0,type,message,table,rows
0,warning,Unrecognized column feed_id,feed_info,[]
1,warning,Unrecognized column feed_license,feed_info,[]
2,warning,Unrecognized column feed_contact_email,feed_info,[]
3,warning,Unrecognized column feed_contact_url,feed_info,[]
4,warning,Unrecognized column tpis_name,stops,[]


In [10]:
bus_feed.validate()

Unnamed: 0,type,message,table,rows
0,warning,Unrecognized column feed_id,feed_info,[]
1,warning,Unrecognized column feed_license,feed_info,[]
2,warning,Unrecognized column feed_contact_email,feed_info,[]
3,warning,Unrecognized column feed_contact_url,feed_info,[]
5,warning,"Repeated pair (trip_id, departure_time)",stop_times,"[5, 8, 10, 14, 16, 18, 22, 25, 27, 28, 35, 40,..."
4,warning,Unrecognized column tpis_name,stops,[]


In [28]:
# access the pandas trips dataframe 
feed.trips

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,direction_id,block_id,shape_id
0,801,RDEC19-801-1_Weekday-91,50674247,,0,107,801NB_120323
1,801,RDEC19-801-1_Weekday-91,50674248,,0,102,801NB_120323
2,801,RDEC19-801-1_Weekday-91,50674249,,0,105,801NB_120323
3,801,RDEC19-801-1_Weekday-91,50674250,,0,110,801NB_120323
4,801,RDEC19-801-1_Weekday-91,50674251,,0,106,801NB_120323
...,...,...,...,...,...,...,...
5993,804,RDEC19-804-3_Sunday-99,50008659,,1,412,804SB_160306
5994,804,RDEC19-804-3_Sunday-99,50008660,,1,403,804SB_160306
5995,804,RDEC19-804-3_Sunday-99,50008661,,0,403,804NB_160306
5996,804,RDEC19-804-3_Sunday-99,50008665,,1,402,804SB_160306


In [12]:
bus_feed.trips

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,direction_id,block_id,shape_id
0,102-13131,DEC19-D05CAR-1_Weekday,49404780-DEC19-D05CAR-1_Weekday,,0,1020400,1020065_DEC19
1,102-13131,DEC19-D05CAR-1_Weekday,49404781-DEC19-D05CAR-1_Weekday,,0,1020100,1020037_DEC19
2,102-13131,DEC19-D05CAR-1_Weekday,49404782-DEC19-D05CAR-1_Weekday,,0,1020300,1020038_DEC19
3,102-13131,DEC19-D05CAR-1_Weekday,49404783-DEC19-D05CAR-1_Weekday,,0,1020300,1020037_DEC19
4,102-13131,DEC19-D05CAR-1_Weekday,49404784-DEC19-D05CAR-1_Weekday,,1,1020300,1020061_DEC19
...,...,...,...,...,...,...,...
32034,78-13131,DEC19-D09CAR-3_Sunday-RACEE0,49892049-DEC19-D09CAR-3_Sunday-RACEE0,,1,0785602,780431_DEC19
32035,78-13131,DEC19-D09CAR-3_Sunday-RACEA0,49892050-DEC19-D09CAR-3_Sunday-RACEA0,,1,0786602,780431_DEC19
32036,78-13131,DEC19-D09CAR-3_Sunday-RACEA0,49892051-DEC19-D09CAR-3_Sunday-RACEA0,,1,0786502,780432_DEC19
32037,78-13131,DEC19-D09CAR-3_Sunday-RACEB0,49892052-DEC19-D09CAR-3_Sunday-RACEB0,,0,0789201,780505_DEC19


In [0]:
def gtfs_stats(feed):
  # trip stats
  trip_stats = feed.compute_trip_stats()
  feed = feed.append_dist_to_stop_times(trip_stats)
  print(feed.stop_times.T)

  # route stats
  route_stats = feed.compute_route_stats(trip_stats, dates=[DATE])
  print(route_stats.T)

  # merge route_name
  route_stats = route_stats.merge(feed.routes.filter(["route_id", "route_long_name"]))

  cols = [
    "route_id",
    "route_short_name", 
    "route_long_name", 
    "route_type", 
    "start_time",
    "end_time",
    "num_trips",
    "max_headway",
    "mean_trip_distance",
    "mean_trip_duration",
    "service_speed",
  ]
  print(route_stats)

  # The most/least frequent routes are
  print('The most/least frequent routes are: ')
  print(route_stats.filter(cols).sort_values("max_headway").T)
  print()

  # The shortest/longest routes are
  print('The shortest/longest routes are: ')
  print(route_stats.filter(cols).sort_values("mean_trip_distance").T)
  print()

  # The slowest/fastest routes are
  print('The slowest/fastest routes are: ')
  print(route_stats.filter(cols).sort_values("service_speed").T)


In [44]:
gtfs_stats(feed)

                                             0      ...                                98965
trip_id                                   49824101  ...                             50704752
arrival_time                              04:30:00  ...                             10:26:00
departure_time                            04:30:00  ...                             10:26:00
stop_id                                      80214  ...                                80139
stop_sequence                                    1  ...                                   19
stop_headsign        802 - North Hollywood Station  ...  806 - Downtown Santa Monica Station
pickup_type                                      0  ...                                    0
drop_off_type                                    0  ...                                    0
shape_dist_traveled                       0.797202  ...                              15.3067

[9 rows x 98966 columns]
                           0         1      

In [29]:
# plot metro routes

metro_route_list = feed.trips.route_id.unique()

m = feed.map_routes(route_ids=metro_route_list)
m

In [0]:
# plot metro routes

bus_route_list = bus_feed.trips.route_id.unique()

n = bus_feed.map_routes(route_ids=bus_route_list)

n.save('la_bus_route.html')
n