# Compute Statuscodes
1. check where each train is at the moment
2. collect trip ids of trains that are somewhere on the map right now
3. check between which stations each trip is
4. find the stretch each trip is on
5. calculate the position of each train on their respective stretch
6. lookup the stretch segment each stretch is on
7. put the statuscode together and output

In [68]:
import datetime
from dotenv import load_dotenv
from os import getenv
import pandas as pd

load_dotenv()

STOPTIMES_PATH = 'stoptimes.csv'

# chatgpt generiert lol
import datetime
def dateToDBDate(date: datetime.date) -> str:
    """
    Wandelt ein datetime.date Objekt in einen DB-Date-String "YYMMDD" um.
    """
    return date.strftime("%y%m%d")


def datetimeToDBDatetime(dt: datetime.datetime) -> str:
    """
    Wandelt ein datetime.datetime Objekt in einen DB-Datetime-String "YYMMDDHHMM" um.
    """
    return dt.strftime("%y%m%d%H%M")


def DBDatetimeToDatetime(dbDate: str) -> datetime.datetime:
    """
    Wandelt einen DB-Datetime-String "YYMMDDHHMM" in ein datetime.datetime Objekt um.
    """
    return datetime.datetime.strptime(dbDate, "%y%m%d%H%M")


def DBDateToDate(dbDate: str) -> datetime.date:
    """
    Wandelt einen DB-Date-String "YYMMDD" in ein datetime.date Objekt um.
    """
    return datetime.datetime.strptime(dbDate, "%y%m%d").date()

def datetimeToDBDateAndHourTuple(dt: datetime.datetime):
    date = dt.strftime("%y%m%d")
    hour = dt.strftime("%H")
    return (date, hour)
    
       
print(dateToDBDate(datetime.date(2025, 8, 10)))
print(datetimeToDBDatetime(datetime.datetime(2025, 8, 10, 12, 22)))
print(DBDateToDate("250810"))
print(DBDatetimeToDatetime("2508101222"))
print(datetimeToDBDateAndHourTuple(datetime.datetime(2025, 8, 10, 12, 22)))

250810
2508101222
2025-08-10
2025-08-10 12:22:00
('250810', '12')


In [69]:
df_stoptimes = pd.read_csv(STOPTIMES_PATH, dtype=str)


# OPTIONALLY FETCH CHANGES HERE



# remove request_timestamp and request_uic because they were only needed in the df_stoptimes.csv for the preprocessing
# to identify outdated and cached data and during change application to match the changes to the stoptime entries.
# But they are not needed anymore and as the stoptimes.csv is only read but not overwritten in this script, they can go
df_stoptimes = df_stoptimes.drop(labels=[ 'request_timestamp'], axis=1)


In [70]:
# convert arrival and departure to datetimes
df_stoptimes['arrival'] = df_stoptimes['arrival_dbdatetime'].map(DBDatetimeToDatetime)
df_stoptimes['departure'] = df_stoptimes['departure_dbdatetime'].map(DBDatetimeToDatetime)

# remove old date columns
df_stoptimes = df_stoptimes.drop(['arrival_dbdatetime', 'departure_dbdatetime'], axis=1)

In [None]:
# find active trip ids

current_datetime = datetime.datetime.now()
    
# train is departed, when the departure time was before now
df_stoptimes['has_departed_station'] = df_stoptimes['departure'] < current_datetime

# find tripids with one stop time and remove them, as for identifying a position on a stretch at least two stops are needed.
tripids_with_one_stoptime_mask = df_stoptimes.groupby("trip_id")["trip_id"].transform("count") == 1
df_stoptimes = df_stoptimes.drop(df_stoptimes[tripids_with_one_stoptime_mask].index)

# find tripids where all stops have been departed and remove them, as that means the trip has ended
tripids_ended_mask = df_stoptimes.groupby("trip_id")["has_departed_station"].transform("all")
df_stoptimes = df_stoptimes.drop(df_stoptimes[tripids_ended_mask].index)

# find tripids where not any stops have been departed and remove them, as that means the trip has not started yet
tripids_not_started_mask = ~df_stoptimes.groupby("trip_id")["has_departed_station"].transform("any")
df_stoptimes = df_stoptimes.drop(df_stoptimes[tripids_not_started_mask].index)

# only the active trips remain

no_active_trips = df_stoptimes['trip_id'].unique().shape[0]
print(f"found {no_active_trips} active trips")

found 11 active trips


In [72]:
# find the two stations, between which each train is traveling (standing at a station until departure counts to being between the two stations. 
# The stopping time animation is handled by calculating where the train is between the two stations and 
# standing at the second station simply gets mapped to having travelled 100% of the time)
# therefore the previous station is the last of the departed stations and the next station is the first of the undeparted stations

previous_stations = df_stoptimes[df_stoptimes['has_departed_station'] == True].groupby(by=['trip_id'], as_index=False).last()
next_stations = df_stoptimes[df_stoptimes['has_departed_station'] == False].groupby(by=['trip_id'], as_index=False).first()

# has_departed_station now not needed anymore
previous_stations = previous_stations.drop(labels=['has_departed_station'], axis=1)
next_stations = next_stations.drop(labels=['has_departed_station'], axis=1)

df_trip_statuses = pd.merge(how='inner', left=previous_stations, right=next_stations, on=['trip_id', 'category', 'number'], suffixes=("_previous", "_next"))

df_trip_statuses.to_csv('temp.csv', index=False)

In [None]:
# identify which stretch the trip is on

# identify on how many timed % the trip is on