In [1]:
qpath = r'../../' # path to quetzal here
data = r'inputs/'
import sys
sys.path.append(qpath)

# import class
from quetzal.io.gtfs_reader import importer

### Read GTFS

In [2]:
feed = importer.GtfsImporter(path=data + r'paris_ratp_full', dist_units='m')
feed = feed.clean()
feed.describe()

Unnamed: 0,indicator,value
0,agencies,[RATP (100)]
1,running_services,"[12657430, 2616486, 12650565, 2672474, 1267239..."
2,timezone,CET
3,start_date,20181003
4,end_date,20190101
5,num_routes,989
6,num_trips,989
7,num_stops,22352
8,num_shapes,0
9,num_frequencies,0


In [3]:
feed.map_stops(
    feed.stops.stop_id.head(10000)  # IDs of the stops to plot
)

### Restrict to given dates

This restricts the feed to only trips and associated data running in one of the given dates.  
The calendar table is emptied and replaced by one calendar_dates row for each service running at one of these dates.

In [4]:
feed_r = feed.restrict(
    dates=['20181106', '20181111'] # the dates must be within the feed start and end dates
)

In [5]:
feed_r.describe()

Unnamed: 0,indicator,value
0,agencies,[RATP (100)]
1,running_services,"[2616486, 2616453, 12672397, 2635601, 2629974,..."
2,timezone,CET
3,start_date,20181106
4,end_date,20181111
5,num_routes,330
6,num_trips,330
7,num_stops,6755
8,num_shapes,0
9,num_frequencies,0


In [6]:
feed_r.calendar  # Empty dataframe

In [7]:
print(len(feed_r.calendar_dates))
feed_r.calendar_dates.head()

222


Unnamed: 0,service_id,date,exception_type
0,2669196,20181106,1
1,2669392,20181106,1
2,12672397,20181106,1
3,2672304,20181106,1
4,12676480,20181106,1


In [8]:
feed_r.map_stops(
    feed_r.stops.stop_id
)

### Simplify services

Once the feed is restricted on a few dates only, it is possible to simplify its services by grouping those operating on the same dates.  
/!\ This makes GTFS edition more complicated afterwards as different trips will run on the same services!

In [9]:
feed_r.group_services()

In [10]:
feed_r.describe()

Unnamed: 0,indicator,value
0,agencies,[RATP (100)]
1,running_services,"[2616486, 12672397, 2699953]"
2,timezone,CET
3,start_date,20181106
4,end_date,20181111
5,num_routes,330
6,num_trips,330
7,num_stops,6755
8,num_shapes,0
9,num_frequencies,0


In [11]:
feed_r.calendar

In [12]:
feed_r.calendar_dates

Unnamed: 0,service_id,date,exception_type
0,12672397,20181106,1
38,2699953,20181106,1
122,2616486,20181111,1
159,2699953,20181111,1


Only 3 services remain: one operating on 20181106, one operating on 20181111 and one operating on both.

In [13]:
feed_r.map_stops(
    feed_r.stops.stop_id
)

### Restrict to a given service

In [14]:
# Pick one service running on the 2018-11-11
s_id = feed_r.calendar_dates[(feed_r.calendar_dates['date']=='20181111')].iloc[0]['service_id']
s_id

'2616486'

In [15]:
feed_s = feed_r.restrict(
    service_ids=[s_id]
)

In [16]:
feed_s.describe()

Unnamed: 0,indicator,value
0,agencies,[RATP (100)]
1,running_services,[2616486]
2,timezone,CET
3,start_date,20181111
4,end_date,20181111
5,num_routes,147
6,num_trips,147
7,num_stops,3425
8,num_shapes,0
9,num_frequencies,0


In [17]:
feed_s.calendar_dates

Unnamed: 0,service_id,date,exception_type
122,2616486,20181111,1


In [18]:
feed_s.map_stops(
    feed_s.stops.stop_id
)

### Restrict to a given time range

In [19]:
time_range = ['06:00:00', '09:00:00']  # time format must be HH:MM:SS
feed_t = feed_s.restrict(time_range=time_range)

In [20]:
feed_t.describe()

Unnamed: 0,indicator,value
0,agencies,[RATP (100)]
1,running_services,[2616486]
2,timezone,CET
3,start_date,20181111
4,end_date,20181111
5,num_routes,13
6,num_trips,13
7,num_stops,299
8,num_shapes,0
9,num_frequencies,0


In [21]:
feed_t.map_stops(feed_t.stops.stop_id)