`Benodigde packages:`<br>
`pip install protobuf==3.19.4 gtfs-realtime-bindings protobuf_to_dict protobuf3-to-dict==0.1.5 requests simplejson pandas geopandas folium urllib3 libprotobuf`<br>

`Maar op Windows doet hij het alleen als:`<br>
`conda install protobuf`<br>

`compiler protoc downloaden:`<br>
`https://github.com/protocolbuffers/protobuf/releases/tag/v3.19.4`<br>

`Python bestanden genereren:`<br>
`protoc --python_out=. *.proto`<br>

`protc.exe en de genegereerde python bestanden in dezelfde map als dit notebook`<br>

`Nog niet gelukt een omgeving te maken vanuit scratch waarin het werkt.`<br>

In [1]:
import requests
import os
import time
import zipfile
import pandas as pd
import geopandas as gpd
import folium
import urllib.request
import gtfs_realtime_OVapi_pb2  # Nodig om de additionele velden te vinden 
import gtfs_realtime_pb2
from protobuf_to_dict import protobuf_to_dict
from datetime import datetime, timedelta

# GTFS static data

In [2]:
def file_age(filename):
    if os.path.exists(filename):
        return int((time.time() - os.path.getmtime(filename)) / 3600)
    else:
        return 99999

def download_url(url, filename):
    '''
    Download the given URL and save under filename.
    If the filename contains a directory, it is assoumed the directory exists.
    
    :param str url: The URL to download
    :param str filename: The filename to save the file under
    '''
    if os.path.exists(filename) and file_age(filename) < 7*24:
        # Cached version exists and is less than a day old
        return
    try:
        print('Downloaf new file')
        with urllib.request.urlopen(url) as resp:
            file_content = resp.read()
        with open(filename, 'wb') as f:
            f.write(file_content)
    except Exception as e:
        print(e)
        
def read_from_zip(zipfn, csvfile):
    with zipfile.ZipFile(zipfn) as z:
        with z.open(csvfile) as f:
            return pd.read_csv(f)

In [3]:
download_url('http://gtfs.ovapi.nl/nl/gtfs-nl.zip', 'gtfs-nl.zip')

fn = 'stops.pcl'
if file_age(fn) < 24:
    stops = pd.read_pickle(fn)
else:
    stops = read_from_zip('gtfs-nl.zip', 'stops.txt')
    stops = stops[['stop_id', 'stop_code', 'stop_name', 'stop_lat', 'stop_lon',
           'location_type', 'parent_station', 'platform_code', 'zone_id']]
    stops.stop_id = stops.stop_id.astype(str)
    stops.to_pickle(fn)
print('Stops         : {}'.format(len(stops)))

fn = 'routes.pcl'
if file_age(fn) < 24:
    routes = pd.read_pickle(fn)
else:
    routes = read_from_zip('gtfs-nl.zip', 'routes.txt')
    routes = routes[['route_id', 'agency_id', 'route_short_name', 'route_long_name', 'route_type']]
    routes.route_id = routes.route_id.astype(str)
    routes.to_pickle(fn)
print('Routes        : {}'.format(len(routes)))

fn = 'trips.pcl'
if file_age(fn) < 24:
    trips = pd.read_pickle(fn)
else:
    trips = read_from_zip('gtfs-nl.zip', 'trips.txt')
    trips = trips[['route_id', 'service_id', 'trip_id', 'trip_headsign', 'trip_short_name', 
                   'trip_long_name', 'direction_id', 'shape_id']] 
    trips.trip_short_name = trips.trip_short_name.astype('Int64')
    trips.shape_id = trips.shape_id.astype('Int64')
    trips.trip_id = trips.trip_id.astype(str)
    trips.route_id = trips.route_id.astype(str)
    trips = trips.merge(routes)
    trips.to_pickle(fn)
print('Trips         : {}'.format(len(trips)))

fn = 'calendar.pcl'
if file_age(fn) < 24:
    calendar = pd.read_pickle(fn)
else:
    calendar = read_from_zip('gtfs-nl.zip', 'calendar_dates.txt')
    calendar.date = calendar.date.astype(str)
    calendar['date'] = calendar.date.str[:4] + '-' + calendar.date.str[4:6] + '-' + calendar.date.str[6:8]
    calendar.to_pickle(fn)
print('Services      : {}'.format(len(calendar)))

fn = 'stoptimes.pcl'
if file_age(fn) < 24:
    stoptimes = pd.read_pickle(fn)
else:
    stoptimes = read_from_zip('gtfs-nl.zip', 'stop_times.txt')
    stoptimes.trip_id = stoptimes.trip_id.astype(str)
    stoptimes.stop_id = stoptimes.stop_id.astype(str)
    stoptimes = stoptimes[['trip_id', 'stop_sequence', 'stop_id', 'stop_headsign', 'arrival_time',
                           'departure_time', 'shape_dist_traveled']]
    stoptimes.to_pickle(fn)
print('Halteringen   : {}'.format(len(stoptimes)))

Stops         : 62582
Routes        : 2487
Trips         : 915609
Services      : 192566
Halteringen   : 17928467


## Filter static data for today

In [4]:
today = datetime.now().strftime('%Y-%m-%d')
calendar = calendar[calendar.date == today]
print('Services **   : {}'.format(len(calendar)))
trips = trips.merge(calendar[['service_id', 'date']], on='service_id')
print('Trips       : {}'.format(len(trips)))
stoptimes = stoptimes[stoptimes.trip_id.isin(trips.trip_id.values)]
print('Halteringen : {}'.format(len(stoptimes)))

Services **   : 917
Trips       : 121029
Halteringen : 2221848


# GTFS RT data

In [5]:
feed = gtfs_realtime_pb2.FeedMessage()

response = requests.get('https://gtfs.ovapi.nl/nl/vehiclePositions.pb', allow_redirects=True)
feed.ParseFromString(response.content)
vehiclePositions = protobuf_to_dict(feed)
print("Vehicle positions : {}".format(len(vehiclePositions['entity'])))

response = requests.get('https://gtfs.ovapi.nl/nl/trainUpdates.pb', allow_redirects=True)
feed.ParseFromString(response.content)
trainUpdates = protobuf_to_dict(feed)
print("Train updates     : {}".format(len(trainUpdates['entity'])))

response = requests.get('https://gtfs.ovapi.nl/nl/tripUpdates.pb', allow_redirects=True)
feed.ParseFromString(response.content)
tripUpdates = protobuf_to_dict(feed)
print("Trip updates      : {}".format(len(tripUpdates['entity'])))

response = requests.get('https://gtfs.ovapi.nl/nl/alerts.pb', allow_redirects=True)
feed.ParseFromString(response.content)
alerts = protobuf_to_dict(feed)
print("Alerts            : {}".format(len(alerts['entity'])))


Vehicle positions : 3311
Train updates     : 1938
Trip updates      : 7465
Alerts            : 209


In [6]:
def convert_times(df, columns):
    for c in columns:
        df[c] = pd.to_datetime(df[c], unit='s', utc=True).map(lambda x: x.tz_convert('Europe/Amsterdam'))
        df[c] = df[c].apply(lambda x: x.replace(tzinfo=None))
    return df

def businessday_to_datetime(date, time):
    try:
        res = datetime.strptime(date, '%Y%m%d')
        hr = int(time[:2])
        if hr >= 24:
            res = res + timedelta(days = 1)
            hr -= 24
        res = res + timedelta(hours=hr, minutes=int(time[3:5]), seconds=int(time[6:8]))
        return res
    except:
        return None

## Create dataframes from updates

In [7]:
# Additional fields are stored under these keys, not the correct names
rtid_keys  = ['___X','1003']
stop_keys  = ['___X','1003']
delay_keys = ['___X','1003']

In [8]:
updates=[]
timestamp = tripUpdates['header']['timestamp']
for tu in tripUpdates['entity']:
#     print(tu)
    uid = tu['id']
    trip_update = tu['trip_update']
    vehicle = trip_update['vehicle']['label'] if 'vehicle' in trip_update else None
    trip = trip_update['trip']
    trip_id = trip['trip_id']
    start_time = trip['start_time'] if 'start_time' in trip else None
    start_date = trip['start_date']
    start_time = businessday_to_datetime(start_date, start_time)
    route_id = trip['route_id']
    direction_id = int(trip['direction_id']) if 'direction_id' in trip else None
    rt_id = trip[rtid_keys[0]][rtid_keys[1]]['realtime_trip_id'] if rtid_keys[0] in trip else None
    for stu in trip_update['stop_time_update'] if 'stop_time_update' in trip_update else []:
        stop_sequence = stu['stop_sequence']
        if 'arrival' in stu:
            arr = stu['arrival']
            arrival_time = arr['time'] if 'time' in arr else None
            arrival_delay = arr['delay'] if 'delay' in arr else None
        else:
            arrival_time = None
            arrival_delay = None
        if 'departure' in stu:
            dep = stu['departure']
            departure_time = dep['time'] if 'time' in dep else None
            departure_delay = dep['delay'] if 'delay' in dep else None
        else:
            departure_time = None
            departure_delay = None
        updates.append({'id': uid, 'RT_id': rt_id, 'trip_id': trip_id, 'start_time': start_time,
                       'route_id': route_id, 'direction_id': direction_id, 'vehicle': vehicle,
                       'stop_sequence': stop_sequence,'arrival_time': arrival_time, 'arrival_delay': arrival_delay,
                                'departure_time': arrival_time, 'departure_delay': departure_delay,
                                'timestamp': timestamp})     
df_trip_updates = pd.DataFrame(updates)
df_trip_updates = convert_times(df_trip_updates, ['departure_time', 'arrival_time'])
df_trip_updates.head(2)

Unnamed: 0,id,RT_id,trip_id,start_time,route_id,direction_id,vehicle,stop_sequence,arrival_time,arrival_delay,departure_time,departure_delay,timestamp
0,2023-01-03:KEOLIS:5070:50661,KEOLIS:5070:50661,161220784,2023-01-03 14:42:00,75567,1.0,1040,1,NaT,,NaT,292.0,1672754745
1,2023-01-03:KEOLIS:5070:50661,KEOLIS:5070:50661,161220784,2023-01-03 14:42:00,75567,1.0,1040,2,2023-01-03 14:48:22,290.0,2023-01-03 14:48:22,290.0,1672754745


In [9]:
updates=[]
timestamp = trainUpdates['header']['timestamp']
for tu in trainUpdates['entity'] :
    trip_id =tu['id']
    trip_update = tu['trip_update']
    trip = trip_update['trip']
    route_id = trip['route_id'] if 'route_id' in trip else None
    direction_id = int(trip['direction_id'])
    additional = trip[rtid_keys[0]][rtid_keys[1]] if rtid_keys[0] in trip else None 
    rt_id = additional['realtime_trip_id'] if 'realtime_trip_id' in additional else None
    train_number = additional['trip_short_name'] if 'trip_short_name' in additional else None
    start_time = trip['start_time']
    start_date = trip['start_date']
    start_time = businessday_to_datetime(start_date, start_time)
    if 'stop_time_update' in trip_update:
        for su in trip_update['stop_time_update']:
            if 'stop_id' in su:
                arrival_time = su['arrival']['time'] if 'arrival' in su else None
                arrival_delay = su['arrival']['delay'] if 'arrival' in su else None
                departure_time = su['departure']['time'] if 'departure' in su else None
                departure_delay = su['departure']['delay'] if 'departure' in su else None
                stop_id = su['stop_id']
                additional = su[stop_keys[0]][stop_keys[1]] if stop_keys[0] in su else None
                if additional:
                    scheduled_track = additional['scheduled_track'] if 'scheduled_track' in additional else None
                    station_id = additional['station_id'] if 'station_id' in additional else None
                else:
                    scheduled_track = station_id = None
                updates.append({'trip_id': trip_id, 'RT_id': rt_id, 'start_time': start_time,
                                'route_id': route_id, 'direction_id': direction_id, 'stop_id': stop_id, 
                                'arrival_time': arrival_time, 'arrival_delay': arrival_delay,
                                'departure_time': arrival_time, 'departure_delay': departure_delay,
                                'timestamp': timestamp, 'station_id': station_id, 
                                'train_number': train_number, 'scheduled_track': scheduled_track})     
df_train_updates = pd.DataFrame(updates)
df_train_updates = convert_times(df_train_updates, ['departure_time', 'arrival_time', 'timestamp'])
df_train_updates['stop_id'] = df_train_updates['stop_id'].astype(str)
df_train_updates['route_id'] = df_train_updates['route_id'].astype(str)
df_train_updates['trip_id'] = df_train_updates['trip_id'].astype(str)
df_train_updates.head(2)

Unnamed: 0,trip_id,RT_id,start_time,route_id,direction_id,stop_id,arrival_time,arrival_delay,departure_time,departure_delay,timestamp,station_id,train_number,scheduled_track
0,161755141,IFF:SPR:5866,2023-01-03 18:53:00,84149,0,2422293,NaT,,NaT,0.0,2023-01-03 15:05:45,avat,5866,2
1,161755141,IFF:SPR:5866,2023-01-03 18:53:00,84149,0,2422272,2023-01-03 18:56:00,0.0,2023-01-03 18:56:00,0.0,2023-01-03 15:05:45,amfs,5866,1


In [10]:
updates=[]
for vp in vehiclePositions['entity'] :
    vid = vp['id']
    vehicle = vp['vehicle']
    trip = vehicle['trip']
    trip_id = trip['trip_id']
    start_time = trip['start_time'] if 'start_time' in trip else None
    start_date = trip['start_date']
    start_time = businessday_to_datetime(start_date, start_time)
    route_id = trip['route_id']
    rt_id = trip[rtid_keys[0]][rtid_keys[1]]['realtime_trip_id'] if rtid_keys[0] in trip else None
    direction_id = int(trip['direction_id']) if direction_id in trip else None
    latitude = vehicle['position']['latitude'] if 'position' in vehicle else None
    longitude = vehicle['position']['longitude'] if 'position' in vehicle else None
    current_stop_seq = vehicle['current_stop_sequence'] if 'current_stop_sequence' in vehicle else None
    timestamp = vehicle['timestamp']
    label = vehicle['vehicle']['label'] if 'vehicle' in vehicle else None
    delay = int(vehicle[delay_keys[0]][delay_keys[1]]['delay']) if delay_keys[0] in vehicle else None
    updates.append({'id': vid, 'RT_id': rt_id, 'trip_id': trip_id, 'start_time': start_time,
                    'route_id': route_id, 'direction_id': direction_id, 'latitude': latitude,
                    'longitude': longitude, 'current_stop_seq':current_stop_seq,
                    'timestamp': timestamp, 'label': label, 'delay': delay})
df_vehicle_positions = pd.DataFrame(updates)
df_vehicle_positions = convert_times(df_vehicle_positions, ['timestamp'])
df_vehicle_positions['current_stop_seq'] = df_vehicle_positions['current_stop_seq'].astype('Int64')
df_vehicle_positions.head(2)

Unnamed: 0,id,RT_id,trip_id,start_time,route_id,direction_id,latitude,longitude,current_stop_seq,timestamp,label,delay
0,2023-01-03:KEOLIS:5070:50661,KEOLIS:5070:50661,161220784,2023-01-03 14:42:00,75567,,52.175816,5.28076,19,2023-01-03 15:05:41,1040,15.0
1,2023-01-03:QBUZZ:u008:2069,QBUZZ:u008:2069,159964312,2023-01-03 14:31:00,67096,,52.065773,5.132402,25,2023-01-03 15:05:37,4814,110.0


In [11]:
updates=[]
timestamp = alerts['header']['timestamp']
causes = {0:'UNKNOWN_CAUSE', 1:'OTHER_CAUSE', 2:'TECHNICAL_PROBLEM', 3:'STRIKE', 4:'DEMONSTRATION',
          5:'ACCIDENT', 6:'HOLIDAY', 7:'WEATHER', 8:'MAINTENANCE', 9:'CONSTRUCTION',
          10:'POLICE_ACTIVITY', 11:'MEDICAL_EMERGENCY'}
effects = {-1: None, 0:'NO_SERVICE', 1:'REDUCED_SERVICE', 2:'SIGNIFICANT_DELAYS', 3:'DETOUR',
           4:'ADDITIONAL_SERVICE', 5:'MODIFIED_SERVICE', 6:'OTHER_EFFECT',
           7:'UNKNOWN_EFFECT', 8:'STOP_MOVED'}
for al in alerts['entity'] :
    aid = al['id']
    alert = al['alert']
    cause = int(alert['cause']) if 'cause' in alert else 0
    effect = int(alert['effect']) if 'effect' in alert else -1
    header_text = alert['header_text']['translation'][0]['text']
    description_text = alert['description_text']['translation'][0]['text']
    for ap in alert['active_period']:
        start = ap['start']
        end = ap['end']
        informed_entity = alert['informed_entity'][0]['stop_id'] if 'informed_entity' in alert else None
        updates.append({'id': aid, 'timestamp': timestamp, 'cause': cause, 'cause_txt': causes[cause], 'effect': effect, 
                        'effect_text': effects[effect], 'period_start': start, 'period_end': end, 
                        'informed_entity': informed_entity, 'header': header_text, 'description': description_text})
df_alerts = pd.DataFrame(updates)
df_alerts = convert_times(df_alerts, ['timestamp', 'period_start', 'period_end'])
df_alerts.head(2)

Unnamed: 0,id,timestamp,cause,cause_txt,effect,effect_text,period_start,period_end,informed_entity,header,description
0,KV15:RET:2015-05-12:53,2023-01-03 15:05:45,1,OTHER_CAUSE,7,UNKNOWN_EFFECT,2015-05-13 00:43:00,2023-12-31 19:54:35,1541226,Rotterdam Airport: bus 33 richting Meijersplei...,Oorzaak : onbekend \nRotterdam Airport: bus 33...
1,KV15:QBUZZ:2022-11-10:53,2023-01-03 15:05:45,9,CONSTRUCTION,8,STOP_MOVED,2022-11-14 07:48:00,2023-02-25 02:43:00,2492641,Halte richting Amersfoort verplaatst naar de d...,Oorzaak : Werkzaamheden Herinrichting Veldmaar...


# Create time departure board

In [12]:
station = 'Arnhem Zuid'

df = df_train_updates.merge(stops, on='stop_id')
df = df.merge(trips, on='trip_id')
df = df[df.stop_name == station]
df.loc[df.departure_time.notnull()].sort_values('departure_time')
df = df[df.platform_code.notnull() & (df['departure_time'] > datetime.now())]
df['departure_time'] = df['departure_time'].apply(lambda x: x.replace(tzinfo=None))

def print_deps(df):
    print("Station {} ({})".format(df.iloc[0]['stop_name'], df.iloc[0]['station_id']))
    print("Time   Delay   Platform         Trein         Rit     Bestemming")
    for idx, dep in df.sort_values('departure_time').head(10).iterrows():
        desc = "{:>5s}   {:>4d}      {:>5s}    {:>10s}  {:10d}     {}"
        desc = desc.format(dep['departure_time'].strftime("%H:%M"),
                          int(dep['departure_delay']),
                          dep['platform_code'],
                          dep['route_short_name'][0:9],
                          dep['trip_short_name'],
                          dep['trip_headsign'])
        print(desc)
    
print_deps(df)

Station Arnhem Zuid (ahz)
Time   Delay   Platform         Trein         Rit     Bestemming
15:14      0          2      Sprinter        7651     Zutphen
15:16      0          1      Sprinter        7652     Wijchen
15:44      0          2      Sprinter        7653     Zutphen
15:46      0          1      Sprinter        7654     Wijchen
16:14      0          2      Sprinter        7655     Zutphen
16:16      0          1      Sprinter        7656     Wijchen
16:44      0          2      Sprinter        7657     Zutphen
16:46      0          1      Sprinter        7658     Wijchen
17:14      0          2      Sprinter        7659     Zutphen
17:16      0          1      Sprinter        7660     Wijchen


# Current locations (Arnhem)

In [13]:
# Geopanda frame for easier filtering
gdf = gpd.GeoDataFrame(df_vehicle_positions, 
                       geometry=gpd.points_from_xy(df_vehicle_positions.longitude, df_vehicle_positions.latitude))
# Filter on bounding box Arnhem
gdf = gdf.cx[5.78:5.98, 51.9:52.1] 
# Add additional data
gdf = gdf.merge(trips, on=['trip_id', 'route_id'])
gdf = gdf.merge(stoptimes, left_on=['trip_id', 'current_stop_seq'], right_on=['trip_id', 'stop_sequence'])
gdf = gdf.merge(stops, on='stop_id')

In [14]:
# Map of Arnhem
map_osm = folium.Map(location=[51.98, 5.89], zoom_start=12)

# Popup formatter
fs = "<table><tr><td>ID</td><td>{}</td></tr>" +\
     "<tr><td></td><td>{}</td></tr>"+\
     "<tr><td>Lijn</td><td>{} : {}</td></tr>"+\
     "<tr><td>Halte</td><td>{}</td></tr>"+\
     "<tr><td>Location</td><td>{:6.4f}, {:6.4f}</td></tr></table>"

for _, veh in gdf.iterrows():
    iframe = folium.IFrame(fs.format(veh['route_id'], veh['agency_id'], veh['route_short_name'], 
                                     veh['trip_headsign'], veh['stop_name'],
                                     veh['latitude'], veh['longitude']))
    popup = folium.Popup(iframe, min_width=300, max_width=300)
    marker = folium.CircleMarker(location=[veh["latitude"], veh["longitude"]], 
                                 popup=popup,
                                 radius=5, fill_color='blue')
    map_osm.add_child(marker)

map_osm

In [15]:
gdf.iloc[43]

id                                        2023-01-03:CXX:A026:1039
RT_id                                                CXX:A026:1039
trip_id                                                  161778180
start_time                                     2023-01-03 15:07:00
route_id                                                      1452
direction_id_x                                                None
latitude                                                 51.984150
longitude                                                 5.900814
current_stop_seq                                                 0
timestamp                                      2023-01-03 15:05:40
label                                                         5349
delay                                                     0.000000
geometry               POINT (5.900814056396484 51.98414993286133)
service_id                                                     621
trip_headsign                                       Dieren Sta

In [16]:
# List largest objects
import sys
def sizeof_fmt(num):
    for unit in ['B','KB','MB','GB']:
        if abs(num) < 1024.0:
            return "%3.1f %s" % (num, unit)
        num /= 1024

for name, size in sorted(((name, sys.getsizeof(value)) for name, value in locals().items()),
                         key= lambda x: -x[1])[:10]:
    print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))

                     stoptimes: 691.5 MB
                         trips:  66.8 MB
               df_trip_updates:  36.0 MB
                         stops:  20.9 MB
              df_train_updates:  14.3 MB
          df_vehicle_positions:   1.3 MB
                        routes: 680.8 KB
                     df_alerts: 145.1 KB
                      calendar:  81.5 KB
                           gdf:  64.5 KB
