# Extract active vehicles
1. Convenience functions for date processing
2. fetch trip_updates
3. Process trip_updates
4. Enrich stop_times with real-time trip_updates
5. add realtime start and end times to trips
6. Get trips that are currently active
7. Get status of the active trips

In [7]:
from pandas import read_csv, DataFrame
from os import path, getcwd

gtfs_filtered_path = path.join(getcwd(), 'gtfs_filtered')
calendar_path = path.join(gtfs_filtered_path, 'calendar.txt')
routes_path = path.join(gtfs_filtered_path, 'routes.txt')
trips_path = path.join(gtfs_filtered_path, 'trips.txt')
stops_path = path.join(gtfs_filtered_path, 'stops.txt')
stop_times_path = path.join(gtfs_filtered_path, 'stop_times.txt')

calendar:DataFrame = read_csv(calendar_path)
routes:DataFrame = read_csv(routes_path)
trips:DataFrame = read_csv(trips_path)
stops:DataFrame = read_csv(stops_path)
stop_times:DataFrame = read_csv(stop_times_path)


relevant_lines = ['22']
relevant_trip_prefixes = [line + "-" for line in relevant_lines]




## 1. convenience functions for gtfs date formats

In [9]:
import datetime

def parseTimeAsDatetimeObject(timestring:str):
    # mod 24, because gtfs defines days as service days that can be longer than 24 hours, so 24:15 is a valid gtfs time
    hour = int(timestring[0:2]) % 24
    minute = int(timestring[3:5])
    second = int(timestring[6:8])
    #print(timestring)
    #print(hour)
    #print(minute) 
    #print(second)
    return datetime.time(hour, minute, second)

def parseDateAsDatetimeObject(datestring:str):
    datestring = str(datestring)
    year = int(datestring[0:4])
    month = int(datestring[4:6])
    day = int(datestring[6:8])
    return datetime.date(year, month, day)

def addSecondsToTimeObject(time:datetime.time, seconds) -> datetime.time:
    datetime_object = datetime.datetime(100,1,1,time.hour, time.minute, time.second)
    delta = datetime.timedelta(seconds=seconds)
    return (datetime_object + delta).time()


def getGtfsWeekdayFromDate(date: datetime.date):
    weekday_number = date.weekday()
    if weekday_number == 0:
        return "monday"
    elif weekday_number == 1:
        return "tuesday"
    elif weekday_number == 2:
        return "wednesday"
    elif weekday_number == 3:
        return "thursday"
    elif weekday_number == 4:
        return "friday"
    elif weekday_number == 5:
        return "saturday"
    else:
        return "sunday"
    

## 2. Fetch trip_updates

Now we want to fetch the trip_updates from the realtime api to later enrich our static schedules with real time delay data.
To do that, we must first authenticate via oauth2 and then call the tripupdates endpoint.

In [11]:
# load env
from dotenv import load_dotenv
from os import getenv
import requests

load_dotenv()

# authenticate with oauth2
client_id = getenv('gtfs_clientID')
client_secret = getenv('gtfs_clientSecret')
resource = getenv('gtfs_resource')
tenant_id = getenv('gtfs_tenantID')
hostname = getenv('gtfs_hostname')


from oauthlib.oauth2 import WebApplicationClient
client = WebApplicationClient(client_id)

# prepare x-www-form-urlencoded body data
data = f'grant_type=client_credentials&resource={resource}&client_id={client_id}&client_secret={client_secret}'

auth_url = f'https://login.microsoftonline.com/{tenant_id}/oauth2/token'
headers= { 'Content-type':'application/x-www-form-urlencoded'}

auth_response = requests.post(auth_url, data=data, headers=headers)
auth = client.parse_request_body_response(auth_response.text)

gtfs_access_token = auth['access_token']

# fetch tripupdates
import json

trip_updates_json_url = f'{hostname}/tripupdates/decoded'
headers = {'Authorization':f'Bearer {gtfs_access_token}'}

trip_updates_response = requests.get(trip_updates_json_url, headers=headers)
trip_updates = json.loads(trip_updates_response.text)['entity']

trip_updates = [trip_update['tripUpdate'] for trip_update in trip_updates]

print(trip_updates[0])
#print(trip_updates['entity'][0]['tripUpdate']['stopTimeUpdate'])







{'trip': {'tripId': '3-302-1003-66120', 'startTime': '18:22:00', 'startDate': '20241022', 'scheduleRelationship': 'SCHEDULED', 'routeId': '3-302-3'}, 'stopTimeUpdate': [{'stopSequence': 14, 'arrival': {'delay': 0}, 'stopId': '247521'}, {'stopSequence': 16, 'arrival': {'delay': 0}, 'stopId': '245123'}, {'stopSequence': 18, 'arrival': {'delay': 0}, 'stopId': '244722'}, {'stopSequence': 21, 'arrival': {'delay': 65}, 'departure': {'delay': 65}, 'stopId': '244902'}]}


## 3. preprocess trip_updates

Firstly, we need to select only trip_updates for our relevant lines to reduce unecessary processing.

In [13]:
print(len(trip_updates))
trip_updates = [trip_update for trip_update in trip_updates if trip_update['trip']['tripId'].startswith(tuple(relevant_trip_prefixes))]
print(len(trip_updates))

209
4


According to gtfs-rt specification, the stopTimeUpdates only include updates of the delay. If a tram is delayed for 30 seconds departing stop 1, arriving at stop 2, departing stop 2 and then gets to stop 3 on time, the stopTimeUpdates will only include one entry for delay 30 (departure) at stop 1 and delay 0 (arrival) stop 3.
To prepare enriching the stop_times with the delays, we simply fill the missing stopTimeUpdates.
We will later use stopSequence to identify a stop, because we can simply calculate the stopSequence for the artificially filled stopTimeUpdated, but can't do it as easily with the stopIds.

In [15]:
# iterate over the trip_updates
for trip_update in trip_updates:
    # find the last stopSequence for the trip
    trip_id = trip_update['trip']['tripId']

    stop_times_for_trip = stop_times.loc[stop_times['trip_id'] == trip_id]

    # skip updates for unknown trips, e.g. emergency services not known to GTFS schedule
    if len(stop_times_for_trip) == 0:
        continue
        
    #print(stop_times_for_trip)
    stop_times_for_trip = stop_times_for_trip.sort_values(by=['stop_sequence'])
    last_stop_sequence = int(stop_times_for_trip.iloc[-1]['stop_sequence']) 

    stop_time_updates = trip_update['stopTimeUpdate']
    stop_time_updates_filled = []

    # fill stop_time_updates for every stopSequence
    current_trip_delay_seconds = 0
    for stop_sequence in range(1,last_stop_sequence + 1):
        
        # check if stopTimeUpdate exists
        existing_stop_time_updates = [stop_time_update for stop_time_update in stop_time_updates if stop_time_update['stopSequence'] == stop_sequence]
        # no stopTimeUpdate exists, generate a new one with current_trip_delay
        if len(existing_stop_time_updates) == 0:
            stop_time_updates_filled.append({'stopSequence': stop_sequence, 
                                             'arrival': {'delay': current_trip_delay_seconds}, 
                                             'departure': {'delay': current_trip_delay_seconds}})
        # otherwise use the delays that already exist, update current_trip delay and fill arrival and departure with current_trip_delay if missing
        
        else:
            # determine arrival_delay
            existing_stop_time_update = existing_stop_time_updates[0]

               
            arrival_delay = existing_stop_time_update['arrival']['delay'] if 'arrival' in existing_stop_time_update else current_trip_delay_seconds
            
            # update current trip delay, if no arrival delay was specified, it virtually stays the same
            current_trip_delay_seconds = arrival_delay

            # determine departure_delay
            existing_stop_time_update = existing_stop_time_updates[0]
            departure_delay = existing_stop_time_update['departure']['delay'] if 'departure' in existing_stop_time_update else current_trip_delay_seconds
            
            # update current trip delay, if no arrival delay was specified, it virtually stays the same
            current_trip_delay_seconds = departure_delay

            stop_time_updates_filled.append({'stopSequence': stop_sequence, 
                                             'arrival': {'delay': arrival_delay}, 
                                             'departure': {'delay': departure_delay}})

    # replace stopTimeUpdate with filled version
    trip_update['stopTimeUpdate'] = stop_time_updates_filled

print(trip_updates[0])

{'trip': {'tripId': '22-1-1-67920', 'startTime': '18:52:00', 'startDate': '20241022', 'scheduleRelationship': 'SCHEDULED', 'routeId': '22-1-22'}, 'stopTimeUpdate': [{'stopSequence': 1, 'arrival': {'delay': 0}, 'departure': {'delay': 139}}, {'stopSequence': 2, 'arrival': {'delay': 139}, 'departure': {'delay': 139}}, {'stopSequence': 3, 'arrival': {'delay': 139}, 'departure': {'delay': 139}}, {'stopSequence': 4, 'arrival': {'delay': 139}, 'departure': {'delay': 109}}, {'stopSequence': 5, 'arrival': {'delay': 109}, 'departure': {'delay': 109}}, {'stopSequence': 6, 'arrival': {'delay': 109}, 'departure': {'delay': 109}}, {'stopSequence': 7, 'arrival': {'delay': 109}, 'departure': {'delay': 109}}, {'stopSequence': 8, 'arrival': {'delay': 109}, 'departure': {'delay': 109}}, {'stopSequence': 9, 'arrival': {'delay': 179}, 'departure': {'delay': 179}}, {'stopSequence': 10, 'arrival': {'delay': 179}, 'departure': {'delay': 179}}, {'stopSequence': 11, 'arrival': {'delay': 179}, 'departure': {'del

## 4. enrich stop_times with realtime delays



Now, we can add the real time delay to the scheduled stop_times.
We create two new columns, arrival_realtime and departure_realtime, and calculate the realtime arrival and departure times using the trip_updates from the previous step. If no trip_update exists, we will simply copy the scheduled times.

In [17]:
def calculateRealtime(stop_time, arrival_or_departure):
    
    trip_id = stop_time['trip_id']
    scheduled_time = stop_time[f'{arrival_or_departure}_time']
    stop_sequence = stop_time['stop_sequence']
    
    # find the corresponding trip_update, if it exists
    trip_updates_for_stop_time = [trip_update for trip_update in trip_updates if trip_update['trip']['tripId'] == trip_id]
    
    # if no trip updates exist, the scheduled time is used instead
    if len(trip_updates_for_stop_time) == 0:
       return scheduled_time
   
    trip_update_for_stop_time = trip_updates_for_stop_time[0]
    
    # find the stopTimeUpdate for this stop
    stop_time_updates_for_stop_time = [stop_time_update for stop_time_update in trip_update_for_stop_time['stopTimeUpdate']]

    # if no stop time updates exist, the scheduled time is used instead
    if len(stop_time_updates_for_stop_time) == 0:
        return scheduled_time

    stop_time_update_for_stop_time = stop_time_updates_for_stop_time[0]

    
    # add delay to scheduled time
    scheduled_time_object = parseTimeAsDatetimeObject(scheduled_time)
    delay = stop_time_update_for_stop_time[arrival_or_departure]['delay']
    # account for artificially added departure delay of 15 seconds from preprocessing 3.
    # => departure delays up to 15 seconds are already accounted for
    if arrival_or_departure == 'departure':
        delay = max(delay - 15,0)
        
    realtime = addSecondsToTimeObject(scheduled_time_object, delay).isoformat()

    return realtime
                                       


arrivals_realtime = [calculateRealtime(stop_time, 'arrival') for i, stop_time in stop_times.iterrows()]
departures_realtime = [calculateRealtime(stop_time, 'departure') for i, stop_time in stop_times.iterrows()]

# add columns to stop_times

stop_times['arrival_realtime'] = arrivals_realtime
stop_times['departure_realtime'] = departures_realtime


print(stop_times[:5])

           trip_id arrival_time departure_time  stop_sequence  stop_id  \
0  22-2-1022-18780     05:13:00       05:13:15              1   114602   
1  22-2-1022-18780     05:14:00       05:14:15              2   116822   
2  22-2-1022-18780     05:15:00       05:16:15              3   119322   
3  22-2-1022-18780     05:17:00       05:17:15              4   679102   
4  22-2-1022-18780     05:18:00       05:18:15              5   679422   

  arrival_realtime departure_realtime  
0         05:13:00           05:13:15  
1         05:14:00           05:14:15  
2         05:15:00           05:16:15  
3         05:17:00           05:17:15  
4         05:18:00           05:18:15  


## 5. add realtime start and end times to trips
To make it easy to identify the active trips, we will now add start and end times to each trip. First, we will create a function to get all the stop_times for a specific `trip_id`. Then we will sort the stop_times and return the first `arrival_time` as trip start and the last `departure_time` as trip end.

In [19]:
def getTripStartRealtime(trip_id:str) -> tuple[str, str]:
    relevant_stop_times = stop_times.loc[stop_times['trip_id'] == trip_id]
    #print('found ',relevant_stop_times.shape[0], 'relevant stop times for trip_id', trip_id)
    
    relevant_stop_times = relevant_stop_times.sort_values(by=['stop_sequence'])
    
    first_stop = relevant_stop_times.iloc[0]
    trip_start_time = first_stop.loc['arrival_realtime']

    return trip_start_time

def getTripEndRealtime(trip_id:str) -> tuple[str, str]:
    relevant_stop_times = stop_times.loc[stop_times['trip_id'] == trip_id]
    #print('found ',relevant_stop_times.shape[0], 'relevant stop times for trip_id', trip_id)
    
    relevant_stop_times = relevant_stop_times.sort_values(by=['stop_sequence'])
    
    last_stop = relevant_stop_times.iloc[-1]
    trip_end_time = last_stop.loc['departure_realtime']
    
    return trip_end_time

example_start = getTripStartRealtime('22-1-1-56520')
example_end = getTripEndRealtime('22-1-1-56520')
print('Trip Start Realtime: ', example_start, '\nTrip End Realtime: ', example_end)

Trip Start Realtime:  15:42:00 
Trip End Realtime:  16:01:15


Now let's add the new columns by using the function we just created.

In [21]:
trips['start_realtime'] = trips.apply(lambda row: getTripStartRealtime(row['trip_id']), axis=1)
trips['end_realtime'] = trips.apply(lambda row: getTripEndRealtime(row['trip_id']), axis=1)

print(trips.head(5))

  route_id          trip_id  \
0  22-2-22  22-2-1022-18780   
1  22-1-22     22-1-1-20520   
2  22-2-22  22-2-1022-25980   
3  22-1-22     22-1-1-27720   
4  22-2-22  22-2-1022-33180   

                                          service_id  trip_short_name  \
0                295-296-297-298-299-302-303-304-305               22   
1        295-296-297-298-299-300-302-303-304-305-307               22   
2                295-296-297-298-299-302-303-304-305               22   
3  295-296-297-298-299-300-301-302-303-304-305-30...               22   
4        295-296-297-298-299-300-302-303-304-305-307               22   

  start_time  end_time start_realtime end_realtime  
0   05:13:00  05:32:15       05:13:00     05:32:15  
1   05:42:00  06:01:15       05:42:00     06:01:15  
2   07:13:00  07:32:15       07:13:00     07:32:15  
3   07:42:00  08:01:15       07:42:00     08:01:15  
4   09:13:00  09:32:15       09:13:00     09:32:15  


## 6. currently active trips

First, we need to get all the trip_ids for currently active trips. Trips are active, if the current time is between the start and end time of the trip and if one of the services, the trip belongs to, runs on the current day.
Let's start by looking at the start and end times of the trips.

In [24]:
print(datetime.datetime.now())

def isTripRowActiveAtCurrentTime(trip_row):
    start_time = parseTimeAsDatetimeObject(trip_row['start_realtime'])
    current_time = datetime.datetime.now().time() 
    end_time = parseTimeAsDatetimeObject(trip_row['end_realtime'])
    #print(start_time, current_time, end_time, start_time <= current_time <= end_time)
    return start_time <= current_time <= end_time
    

# select trips where current time is between start and end time
trips = trips[trips.apply(isTripRowActiveAtCurrentTime, axis=1)]
print("found", trips.shape[0], "trips that run at the current time")
print(trips.head(5))

2024-10-22 19:06:49.413625
found 10 trips that run at the current time
      route_id          trip_id  \
55     22-1-22     22-1-1-68520   
110    22-2-22  22-2-1022-67980   
146    22-2-22  22-2-1022-68580   
177    22-1-22     22-1-1-67920   
265  22-851-22   22-851-1-68640   

                                            service_id  trip_short_name  \
55   295-296-297-298-299-300-301-302-303-304-305-30...               22   
110        295-296-297-298-299-300-302-303-304-305-307               22   
146        295-296-297-298-299-300-302-303-304-305-307               22   
177        295-296-297-298-299-300-302-303-304-305-307               22   
265                                                309               22   

    start_time  end_time start_realtime end_realtime  
55    19:02:00  19:21:15       19:02:00     19:22:01  
110   18:53:00  19:12:15       18:53:00     19:13:03  
146   19:03:00  19:22:15       19:03:00     19:23:10  
177   18:52:00  19:11:15       18:52:00     19:

Secondly, we will check whether the services run on the current day by looking up the services from the `service_id` column in the calendar dataframe.
As soon as we find a `service_id` that runs on the current day, we can stop the search and return true, otherwise we return false.

In [26]:
def isTripRowActiveOnCurrentDay(trip_row):
    current_date = datetime.date.today()
    current_weekday_gtfs = getGtfsWeekdayFromDate(datetime.date.today())

    
    calendar:DataFrame = read_csv(calendar_path)

    # select row from calendar for this service
    calendar = calendar[calendar['service_id'] == trip_row['service_id']]

    # check every calendar entry
    for index, schedule in calendar.iterrows():
        # check if current date is between start_date and end_date (inclusive)
        start_date = parseDateAsDatetimeObject(schedule['start_date'])
        end_date = parseDateAsDatetimeObject(schedule['end_date'])

        duration_check = start_date <= current_date <= end_date

        # check if current weekday is an active day in the schedule
        weekday_check = schedule[current_weekday_gtfs] == 1

        if duration_check and weekday_check:
            return True
                
    return False
    
trips = trips[trips.apply(isTripRowActiveOnCurrentDay, axis=1)]
print(trips.head(5))

    route_id          trip_id  \
55   22-1-22     22-1-1-68520   
110  22-2-22  22-2-1022-67980   
146  22-2-22  22-2-1022-68580   
177  22-1-22     22-1-1-67920   

                                            service_id  trip_short_name  \
55   295-296-297-298-299-300-301-302-303-304-305-30...               22   
110        295-296-297-298-299-300-302-303-304-305-307               22   
146        295-296-297-298-299-300-302-303-304-305-307               22   
177        295-296-297-298-299-300-302-303-304-305-307               22   

    start_time  end_time start_realtime end_realtime  
55    19:02:00  19:21:15       19:02:00     19:22:01  
110   18:53:00  19:12:15       18:53:00     19:13:03  
146   19:03:00  19:22:15       19:03:00     19:23:10  
177   18:52:00  19:11:15       18:52:00     19:13:19  


## 7. Status of active trips
Now that we have identified all the trips that are currently running, we want to know where the trams are on our network. As we later want to represent a vehicle being at a stop as well as a vehicle traveling between stops, we will represent the status of a vehicle (trip) as 

trip_id: <strip_id>, status: IN_TRANSIT_TO / STOPPED_AT, current_stop_id: <stop_id>, previous_stop_id: <stop_id>


Additionally, we want to represent the direction of travel by having one LED line per direction. All stops of each section of track between intersections will be organized in a sorted list per direction, e.g. for the section between Bismarckplatz and Stadtbücherei
Bismarckplatz (A) -> Seegarten (A) -> Stadtbücherei (A)

Each tuple of each direction will have LEDs assigned. The LED's name is composed of the respective stop_ids. e.g. Bismarckplatz (A) (stopId 114601) -> Seegarten(A) (stopId 116821) gets LEDs 114601_116821_T and 114601_116821_S (transit and stopped) assigned. The LED Codes are mapped to their respective hardware addresses. To allow a map creation with only one led for both directions, and also account sections with only one track, the LED-code to LED-hardware address mapping is a many to many mapping.


In [28]:
from pandas import DataFrame

# create status Dataframe for every active trip, then merge the Dataframes
# status, current_stop_id, previous_stop_id
status_df = DataFrame(columns=['status','current_stop_id','previous_stop_id', 'current_stop_name', 'previous_stop_name'])

current_time = datetime.datetime.now().time()

def isStoppedAtStopTime(stop_time):
    return parseTimeAsDatetimeObject(stop_time['arrival_realtime']) <= current_time <= parseTimeAsDatetimeObject(stop_time['departure_realtime'])

# take stop times and iterator to check previous stop
# check if the stop_time at position i of stop_times is currently being traveled to
def isTravelingToStoptime(stop_times, i):
    # loc because i is the pandas index of the row 
    current_stop_time = stop_times.loc[i]

    # if there is no previous stop_time, this is the initial station which cannot be traveled to 
    try:
        previous_stop_time = stop_times.loc[i-1]
    except KeyError:
        return False
    has_arrived_at_stop_time = current_time <= parseTimeAsDatetimeObject(current_stop_time['arrival_realtime'])
    has_departed_previous_stop_time = current_time >= parseTimeAsDatetimeObject(previous_stop_time['departure_realtime'])
    return has_arrived_at_stop_time and has_departed_previous_stop_time

for i, active_trip in trips.iterrows():
    trip_id = active_trip['trip_id']

    stop_times_for_this_trip = stop_times.loc[stop_times['trip_id'] == trip_id]

    

    # find stops, at which the vehicle is currently stopped (should be 0 or 1)
    # vehicle is stopped, if current time is between arrival and departure of a stop
    stop_times_stopped_at = [stop_time for _,stop_time in stop_times_for_this_trip.iterrows() if isStoppedAtStopTime(stop_time)]

    #print(active_trip, stop_times_stopped_at)

    # find stops that the vehicle is currently traveling to (should be 0 or 1)
    # vehicle is traveling to a stop if it has not arrived a stop but already departed the previous stop
    stop_times_traveling_to = [stop_time for i ,stop_time in stop_times_for_this_trip.iterrows() if isTravelingToStoptime(stop_times_for_this_trip, i)]

    #print(trip_id, len(stop_times_stopped_at), len(stop_times_traveling_to))

    status = ''
    current_stop_id = ''
    previous_stop_id = ''


    if len(stop_times_stopped_at) > 0:
        status = 'STOPPED_AT'
        current_stop_id = stop_times_stopped_at[0]['stop_id'] 
        
    elif len(stop_times_traveling_to) > 0:
        status = 'IN_TRANSIT_TO'
        current_stop_id = stop_times_traveling_to[0]['stop_id']
    else: 
        status = 'ERROR'

    current_stop_name = ''
    current_stops = stops.loc[stops['stop_id'] == current_stop_id]

    if len(current_stops) == 0:
        # stop not found
        current_stop_name = 'ERROR'
    else:
        current_stop = current_stops.iloc[0]
        current_stop_name = f'{current_stop['stop_name']} (Steig {current_stop['platform_code']})'
        
    
    
    import pandas as pd
    status_row = pd.Series({'status': status, 
                  'current_stop_id': current_stop_id, 
                  'previous_stop_id': previous_stop_id, 
                  'current_stop_name': current_stop_name, 
                  'previous_stop_name': ''})
    print(active_trip)
    print(status_row)

route_id                                                     22-1-22
trip_id                                                 22-1-1-68520
service_id         295-296-297-298-299-300-301-302-303-304-305-30...
trip_short_name                                                   22
start_time                                                  19:02:00
end_time                                                    19:21:15
start_realtime                                              19:02:00
end_realtime                                                19:22:01
Name: 55, dtype: object
status                             IN_TRANSIT_TO
current_stop_id                           126101
previous_stop_id                                
current_stop_name     Kranichweg/Stotz (Steig A)
previous_stop_name                              
dtype: object
route_id                                               22-2-22
trip_id                                        22-2-1022-67980
service_id         295-296-297-298-299-