In [1]:
qpath = r'../../quetzal/' # path to quetzal here
import sys
sys.path.insert(0, qpath)

data = r'inputs/'

# import class
from quetzal.io.gtfs_reader import importer

  from pandas import Panel


In [2]:
import numpy as np

### Read GTFS

In [3]:
feed = importer.GtfsImporter(path=data + r'bilbao.zip', dist_units='m')
feed = feed.clean()
feed.describe()

Unnamed: 0,indicator,value
0,agencies,[Bilbobus]
1,running_services,[1]
2,timezone,Europe/Madrid
3,start_date,20191210
4,end_date,20200310
5,num_routes,82
6,num_trips,3297
7,num_stops,498
8,num_shapes,0
9,num_frequencies,0


In [4]:
feed.validate()

Unnamed: 0,type,message,table,rows
0,warning,Feed expired,calendar,[]
1,warning,"Repeated pair (trip_id, departure_time)",stop_times,"[128, 4087, 4101, 4115, 4129, 4143, 4157, 4171..."


In [5]:
feed.map_stops(feed.stops.stop_id)

In [6]:
feed = feed.create_shapes()

In [7]:
feed.map_trips(
    feed.trips.groupby('route_id').first().trip_id.head(50)
)

Frequency conversion currently work only for one specific service and date, and on one given time period.  
It computes the average headway over this time period.

### Restrict to one date and merge services

In [8]:
feed = feed.restrict(dates=['20191210'])
feed.group_services()

In [9]:
feed.describe()

Unnamed: 0,indicator,value
0,agencies,[Bilbobus]
1,running_services,[1]
2,timezone,Europe/Madrid
3,start_date,20191210
4,end_date,20191210
5,num_routes,82
6,num_trips,3297
7,num_stops,498
8,num_shapes,124
9,num_frequencies,0


### Build simplified patterns by clustering stops

In [10]:
feed.build_stop_clusters(distance_threshold=300)

In [11]:
print('N stops:', len(feed.stops))
print('N clusters:', len(feed.stops.cluster_id.unique()))

N stops: 498
N clusters: 165


In [12]:
feed.build_patterns(on='cluster_id')

In [13]:
print('N trips:', len(feed.trips))
print('N patterns:', len(feed.trips.pattern_id.unique()))
print('N routes:', len(feed.trips.route_id.unique()))

N trips: 3297
N patterns: 124
N routes: 82


In [14]:
feed.describe()

Unnamed: 0,indicator,value
0,agencies,[Bilbobus]
1,running_services,[1]
2,timezone,Europe/Madrid
3,start_date,20191210
4,end_date,20191210
5,num_routes,82
6,num_trips,3297
7,num_stops,498
8,num_shapes,124
9,num_frequencies,0


### Convert to frequencies

In [15]:
feed.stop_times

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
0,406305,06:15:00,06:15:00,5405,1,,0,0,
1,406305,06:17:00,06:17:00,5106,2,,0,0,
2,406305,06:21:00,06:21:00,5110,3,,0,0,
3,406305,06:23:00,06:23:00,2108,4,,0,0,
4,406305,06:24:00,06:24:00,2104,5,,0,0,
...,...,...,...,...,...,...,...,...,...
57134,436376,14:03:00,14:03:00,5101,8,,0,0,
57135,436376,14:08:00,14:08:00,5704,9,,0,0,
57136,436376,14:11:00,14:11:00,5601,10,,0,0,
57137,436376,14:13:00,14:13:00,5701,11,,0,0,


In [16]:
time_range = ['08:00:00', '10:00:00']  # time format must be HH:MM:SS
feed_f = feed.convert_to_frequencies(time_range=time_range)

100%|██████████| 75/75 [00:00<00:00, 138.48it/s]


In [17]:
feed_f = feed.convert_to_frequencies(time_range=time_range)

100%|██████████| 75/75 [00:00<00:00, 141.64it/s]


In [18]:
feed_f.describe()

Unnamed: 0,indicator,value
0,agencies,[Bilbobus]
1,running_services,[1]
2,timezone,Europe/Madrid
3,start_date,20191210
4,end_date,20191210
5,num_routes,67
6,num_trips,75
7,num_stops,495
8,num_shapes,75
9,num_frequencies,75


### Build nodes and links

In [19]:
feed_f.build_links_and_nodes()

export geometries in epsg: 32630


In [20]:
feed_f.nodes.head(1).T

Unnamed: 0,0
stop_id,2201
stop_code,
stop_name,Anselma de Salces (2)
stop_desc,
zone_id,
stop_url,
location_type,
cluster_id,85
geometry,POINT (506258.4033893289 4790384.018252368)


In [21]:
feed_f.links.head(1).T

Unnamed: 0,0
index,0
a,5405
trip_id,201_0
link_sequence,1
departure_time,29700
pickup_type,0
b,5106
arrival_time,29820
drop_off_type,0
time,120


## All in one
While we recommand to build the nodes and links gradually by exploring the GTFS data first, it is also possible to do it in one line.

In [22]:
feed = importer.GtfsImporter(path=data + r'bilbao.zip', dist_units='m')
feed.describe()

Unnamed: 0,indicator,value
0,agencies,[Bilbobus]
1,running_services,[1]
2,timezone,Europe/Madrid
3,start_date,20191210
4,end_date,20200310
5,num_routes,92
6,num_trips,3297
7,num_stops,498
8,num_shapes,0
9,num_frequencies,0


In [23]:
imp = feed.build(
    date='20191210',
    time_range=['08:00:00', '10:00:00'],
    cluster_distance_threshold=300 # by default: None and no clustering
)

Restricting to date…
Grouping services…
Clustering stops…
Building patterns…


 16%|█▌        | 11/69 [00:00<00:00, 107.84it/s]

Converting to frequencies…


100%|██████████| 69/69 [00:00<00:00, 124.16it/s]


Building links and nodes…
export geometries in epsg: 32630


In [24]:
imp.describe()

Unnamed: 0,indicator,value
0,agencies,[Bilbobus]
1,running_services,[1]
2,timezone,Europe/Madrid
3,start_date,20191210
4,end_date,20191210
5,num_routes,67
6,num_trips,69
7,num_stops,495
8,num_shapes,0
9,num_frequencies,69


In [25]:
imp.links.head()

Unnamed: 0,index,a,trip_id,link_sequence,departure_time,pickup_type,b,arrival_time,drop_off_type,time,...,direction_id,shape_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,geometry
0,0,5405,201_0,1,29700.0,0,5106,29820.0,0,120.0,...,,,27,22_VLT,Sarrikue - Atxuri,,3,,,"LINESTRING (506286.023 4789138.906, 506030.167..."
1,1,5106,201_0,2,29820.0,0,5110,30060.0,0,240.0,...,,,27,22_VLT,Sarrikue - Atxuri,,3,,,"LINESTRING (506030.167 4789312.701, 506269.426..."
2,2,5110,201_0,3,30060.0,0,2108,30180.0,0,120.0,...,,,27,22_VLT,Sarrikue - Atxuri,,3,,,"LINESTRING (506269.426 4789737.628, 506202.215..."
3,3,2108,201_0,4,30180.0,0,2104,30240.0,0,60.0,...,,,27,22_VLT,Sarrikue - Atxuri,,3,,,"LINESTRING (506202.215 4790128.260, 506068.589..."
4,4,2104,201_0,5,30240.0,0,2206,30360.0,0,120.0,...,,,27,22_VLT,Sarrikue - Atxuri,,3,,,"LINESTRING (506068.589 4790271.632, 506071.610..."


In [26]:
imp.nodes.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,zone_id,stop_url,location_type,cluster_id,geometry
0,2201,,Anselma de Salces (2),,,,,85,POINT (506258.403 4790384.018)
1,2417,,Trauko (14),,,,,10,POINT (506528.037 4790435.054)
2,3103,,"Kepa Enbeitia ""Urretxindorra""",,,,,157,POINT (508092.006 4789626.524)
3,6209,,Gregorio la Revilla 1,,,,,56,POINT (504935.494 4790091.282)
4,6210,,Gregorio la Revilla 17,,,,,79,POINT (504867.070 4789866.493)
