In [5]:
import math
import pandas as pd
import numpy as np
import pandas as pd
import math
from datetime import datetime, timedelta

# Match-Infos einlesen
dfMatches = pd.read_csv("matchInfos.csv")
dfMatches.loc["date"] = pd.to_datetime(dfMatches["date"])
# only select 14. Spieltag der ersten BL (for testing)
dfMatches = dfMatches[(dfMatches["matchday"] == "14. Spieltag") & (dfMatches["league"] == "bl1")]


# Fahrplan-Daten einlesen
all_stops_df = pd.read_csv("gtfs/stops.txt")
all_stops_df["stop_name"] = all_stops_df["stop_name"].str.replace(r"\bHbf\b", "Hauptbahnhof", case=False, regex=True)
trips_df = pd.read_csv("gtfs/trips.txt")
stop_times_df = pd.read_csv("gtfs/stop_times.txt")
routes_df = pd.read_csv("gtfs/routes.txt")
calendar_df = pd.read_csv("gtfs/calendar.txt") 

In [54]:


# -----------------------------
# Haversine Funktionen
# -----------------------------
def haversine_vectorized(lat, lon, lats, lons):
    """
    Vektorisiertes Haversine für Arrays.
    lat, lon: einzelne Position (float)
    lats, lons: Arrays mit Stop-Koordinaten
    Returns: Array mit Entfernungen in km
    """
    R = 6371  # Erdradius in km
    phi1 = np.radians(lat)
    phi2 = np.radians(lats)
    delta_phi = np.radians(lats - lat)
    delta_lambda = np.radians(lons - lon)
    a = np.sin(delta_phi / 2) ** 2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

def find_closest_stop(lat, lon, all_stops_df):
    """
    Findet den nächsten Stop zum Punkt (lat, lon) mithilfe von vektorisierten Haversine-Berechnungen.
    """
    lats = all_stops_df['stop_lat'].to_numpy()
    lons = all_stops_df['stop_lon'].to_numpy()
    dists = haversine_vectorized(lat, lon, lats, lons)
    idx = np.argmin(dists)
    closest_stop = all_stops_df.iloc[idx].to_dict()
    closest_stop['distance_km'] = dists[idx]
    return closest_stop

# -----------------------------
# Hauptfunktion
# -----------------------------
def get_train_connections_for_game(game_info_df, 
                                   all_stops_df,
                                   trips_df,
                                   stop_times_df,
                                   routes_df,
                                   calendar_df, 
                                   window_hours=3, 
                                   trip_type='departure'):
    """
    Liefert Zugverbindungen für ein Spiel basierend auf Start/Ziel-Koordinaten, 
    GTFS-Daten (stops, trips, stop_times, routes, calendar) und Zeitfenster.
    """
    # -----------------------------
    # Start/Ziel bestimmen
    # -----------------------------
    if trip_type.lower() == "departure":
        lat_start, lon_start = game_info_df["home_lat"], game_info_df["home_lon"]
        lat_end, lon_end = game_info_df["away_lat"], game_info_df["away_lon"]
    else:  # 'arrival'
        lat_start, lon_start = game_info_df["away_lat"], game_info_df["away_lon"]
        lat_end, lon_end = game_info_df["home_lat"], game_info_df["home_lon"]

    # Datum
    date = pd.to_datetime(game_info_df["date"])
    date_only = date.date()

    # -----------------------------
    # Nächste Bahnhöfe
    # -----------------------------
    station_start = find_closest_stop(lat_start, lon_start, all_stops_df)
    station_end   = find_closest_stop(lat_end, lon_end, all_stops_df)
    start_ids = [station_start['stop_id']]
    end_ids   = [station_end['stop_id']]
    print(station_start, station_end)

    # -----------------------------
    # Aktive Trips
    # -----------------------------
    weekday = date.strftime("%A").lower()
    active_service_ids = calendar_df[calendar_df[weekday] == 1]['service_id'].tolist()
    active_trips = trips_df[trips_df['service_id'].isin(active_service_ids)]
    stop_times_active = stop_times_df[stop_times_df['trip_id'].isin(active_trips['trip_id'])]
    print(active_trips)
    # -----------------------------
    # Trips zwischen Start und Ziel
    # -----------------------------
    stop_times_start = stop_times_active[stop_times_active['stop_id'].isin(start_ids)]
    stop_times_end   = stop_times_active[stop_times_active['stop_id'].isin(end_ids)]
    common_trips = set(stop_times_start['trip_id']).intersection(set(stop_times_end['trip_id']))
    print(common_trips)
    # -----------------------------
    # Zeitfenster
    # -----------------------------
    input_time = datetime.combine(date_only, datetime.min.time())  # Mit 00:00 als Basis
    if trip_type.lower() == 'arrival':
        window_start = input_time - timedelta(hours=(2 + window_hours))
        window_end   = input_time + timedelta(hours=2)
    else:  # departure
        window_start = input_time + timedelta(hours=2)
        window_end   = input_time + timedelta(hours=(2 + window_hours))

    # -----------------------------
    # Stopps sammeln
    # -----------------------------
    rows = []
    for trip_id in common_trips:
        trip_times = stop_times_active[stop_times_active['trip_id'] == trip_id].sort_values('stop_sequence')
        try:
            start_idx = trip_times[trip_times['stop_id'].isin(start_ids)].index[0]
            end_idx   = trip_times[trip_times['stop_id'].isin(end_ids)].index[0]
        except IndexError:
            continue
        if start_idx >= end_idx:
            continue
        stops_between = trip_times.loc[start_idx:end_idx]

        # Start/End Zeit
        start_time_str = stops_between.iloc[0]['departure_time']
        end_time_str   = stops_between.iloc[-1]['arrival_time']
        start_dt = datetime.strptime(start_time_str, "%H:%M:%S").replace(year=date_only.year, month=date_only.month, day=date_only.day)
        end_dt   = datetime.strptime(end_time_str, "%H:%M:%S").replace(year=date_only.year, month=date_only.month, day=date_only.day)

        if trip_type.lower() == 'departure' and not (window_start <= start_dt <= window_end):
            continue
        if trip_type.lower() == 'arrival' and not (window_start <= end_dt <= window_end):
            continue

        # Route Name
        route_id = trips_df.loc[trips_df['trip_id'] == trip_id, 'route_id'].values[0]
        route_info = routes_df[routes_df['route_id'] == route_id]
        if 'route_long_name' in routes_df.columns and not route_info['route_long_name'].isna().all():
            route_name = route_info['route_long_name'].values[0]
        elif 'route_short_name' in routes_df.columns and not route_info['route_short_name'].isna().all():
            route_name = route_info['route_short_name'].values[0]
        else:
            route_name = str(route_id)

        # Stopps sammeln
        for _, stop_row in stops_between.iterrows():
            stop_info = all_stops_df[all_stops_df['stop_id'] == stop_row['stop_id']].iloc[0]
            rows.append({
                'match_id': game_info_df.get('match_id', None),
                'trip_id': trip_id,
                'route': route_name,
                'stop_sequence': stop_row['stop_sequence'],
                'stop_name': stop_info['stop_name'],
                'stop_lon': stop_info['stop_lon'],
                'stop_lat': stop_info['stop_lat'],
                'arrival': stop_row.get('arrival_time', ''),
                'departure': stop_row.get('departure_time', ''),
                'date': date
            })

    return pd.DataFrame(rows)


In [58]:
connections_df = get_train_connections_for_game(   
    game_info_df=dfMatches.iloc[1],  # Einzelnes Spiel
    all_stops_df=all_stops_df,
    trips_df=trips_df,
    stop_times_df=stop_times_df,
    routes_df=routes_df,
    calendar_df=calendar_df,
    window_hours=3,     
    trip_type='departure' 
)


{'stop_name': 'Frankfurt(Main)Hauptbahnhof', 'parent_station': 338152.0, 'stop_id': 661728, 'stop_lat': 50.10668, 'stop_lon': 8.662828, 'location_type': nan, 'platform_code': nan, 'distance_km': np.float64(0.017866605872475638)} {'stop_name': 'Augsburg Hauptbahnhof', 'parent_station': 333168.0, 'stop_id': 320503, 'stop_lat': 48.36544, 'stop_lon': 10.88557, 'location_type': nan, 'platform_code': nan, 'distance_km': np.float64(0.06050611860187763)}
      route_id  service_id  trip_id
3            1         258   242095
6            1         261   818557
7            1         261   862840
8            1         272   816857
14           1         445   476607
...        ...         ...      ...
4823        94         733   945164
4833        94         798   801548
4844        94         879  1169227
4845        94         879    35159
4846        94         879   641331

[1679 rows x 3 columns]
{173826, 724612, 620166, 426876, 150153, 910606, 1391376, 1067671, 1068568, 912924, 452510, 

In [59]:
connections_df.head()

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# --- Haversine ---
def haversine_vectorized(lat, lon, lats, lons):
    R = 6371
    phi1 = np.radians(lat)
    phi2 = np.radians(lats)
    delta_phi = np.radians(lats - lat)
    delta_lambda = np.radians(lons - lon)
    a = np.sin(delta_phi/2)**2 + np.cos(phi1)*np.cos(phi2)*np.sin(delta_lambda/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

def find_closest_stop(lat, lon, all_stops_df):
    lats = all_stops_df['stop_lat'].to_numpy()
    lons = all_stops_df['stop_lon'].to_numpy()
    dists = haversine_vectorized(lat, lon, lats, lons)
    idx = np.argmin(dists)
    closest_stop = all_stops_df.iloc[idx].to_dict()
    closest_stop['distance_km'] = dists[idx]
    return closest_stop

# --- Optimierte Verbindungsfindung mit Zeiten ---
def find_connections_with_times(game_info, all_stops_df, trips_df, stop_times_df, calendar_df,
                                max_transfers=2, trip_type='departure'):
    """
    Liefert Verbindungen inkl. Abfahrts- und Ankunftszeiten für Start, Umstiege und Ziel.
    """
    # Start / Ziel
    if trip_type.lower() == 'departure':
        lat_start, lon_start = game_info['home_lat'], game_info['home_lon']
        lat_end, lon_end = game_info['away_lat'], game_info['away_lon']
    else:
        lat_start, lon_start = game_info['away_lat'], game_info['away_lon']
        lat_end, lon_end = game_info['home_lat'], game_info['home_lon']

    start_stop = find_closest_stop(lat_start, lon_start, all_stops_df)
    end_stop = find_closest_stop(lat_end, lon_end, all_stops_df)
    start_ids = [start_stop['stop_id']]
    end_ids = [end_stop['stop_id']]

    date = pd.to_datetime(game_info['date']).date()

    # Aktive Trips
    weekday = pd.to_datetime(game_info['date']).strftime('%A').lower()
    active_service_ids = calendar_df[calendar_df[weekday] == 1]['service_id'].tolist()
    active_trips = trips_df[trips_df['service_id'].isin(active_service_ids)]
    stop_times_active = stop_times_df[stop_times_df['trip_id'].isin(active_trips['trip_id'])]

    # Relevante Trips
    relevant_trips = stop_times_active[stop_times_active['stop_id'].isin(start_ids + end_ids)]['trip_id'].unique()
    stop_times_filtered = stop_times_active[stop_times_active['trip_id'].isin(relevant_trips)]

    # Trip → Stop → Zeiten
    trip_dict = {}
    for trip_id, group in stop_times_filtered.groupby('trip_id'):
        trip_dict[trip_id] = group.set_index('stop_id')[['departure_time', 'arrival_time']].to_dict('index')

    # Direkte Trips prüfen
    direct_trips = []
    for trip_id, stops in trip_dict.items():
        stop_ids = list(stops.keys())
        if start_ids[0] in stop_ids and end_ids[0] in stop_ids:
            idx_start = stop_ids.index(start_ids[0])
            idx_end = stop_ids.index(end_ids[0])
            if idx_start < idx_end:
                direct_trips.append(trip_id)

    results = []

    # --- Direkte Verbindungen ---
    for trip_id in direct_trips:
        stop_ids = list(trip_dict[trip_id].keys())
        idx_start = stop_ids.index(start_ids[0])
        idx_end = stop_ids.index(end_ids[0])
        path_stops = stop_ids[idx_start:idx_end+1]

        times = []
        for sid in path_stops:
            times.append({
                'stop_id': sid,
                'stop_name': all_stops_df.loc[all_stops_df['stop_id']==sid, 'stop_name'].values[0],
                'departure_time': trip_dict[trip_id][sid]['departure_time'],
                'arrival_time': trip_dict[trip_id][sid]['arrival_time']
            })
        results.append({
            'trips': [trip_id],
            'transfers': 0,
            'stop_times': times
        })

    # --- 1 Umstieg ---
    start_trips = {tid: stops for tid, stops in trip_dict.items() if start_ids[0] in stops and tid not in direct_trips}
    end_trips = {tid: stops for tid, stops in trip_dict.items() if end_ids[0] in stops and tid not in direct_trips}

    for s_tid, s_stops in start_trips.items():
        for e_tid, e_stops in end_trips.items():
            s_keys = list(s_stops.keys())
            e_keys = list(e_stops.keys())
            common_stops = set(s_keys[1:]) & set(e_keys[:-1])
            for umstieg in common_stops:
                # Pfad aufbauen
                s_ids = list(s_stops.keys())
                e_ids = list(e_stops.keys())
                idx_start = s_ids.index(start_ids[0])
                idx_umstieg_s = s_ids.index(umstieg)
                idx_umstieg_e = e_ids.index(umstieg)
                idx_end = e_ids.index(end_ids[0])

                path_ids = s_ids[idx_start:idx_umstieg_s+1] + e_ids[idx_umstieg_e+1:idx_end+1]

                times = []
                # Start → Umstieg
                for sid in s_ids[idx_start:idx_umstieg_s+1]:
                    times.append({
                        'stop_id': sid,
                        'stop_name': all_stops_df.loc[all_stops_df['stop_id']==sid, 'stop_name'].values[0],
                        'departure_time': s_stops[sid]['departure_time'],
                        'arrival_time': s_stops[sid]['arrival_time']
                    })
                # Umstieg → Ziel
                for sid in e_ids[idx_umstieg_e+1:idx_end+1]:
                    times.append({
                        'stop_id': sid,
                        'stop_name': all_stops_df.loc[all_stops_df['stop_id']==sid, 'stop_name'].values[0],
                        'departure_time': e_stops[sid]['departure_time'],
                        'arrival_time': e_stops[sid]['arrival_time']
                    })
                results.append({
                    'trips': [s_tid, e_tid],
                    'transfers': 1,
                    'stop_times': times
                })

    return pd.DataFrame(results)


In [22]:
connections_df = find_connections_with_times(
    game_info=dfMatches.iloc[1],
    all_stops_df=all_stops_df,
    trips_df=trips_df,
    stop_times_df=stop_times_df,
    calendar_df=calendar_df,
    max_transfers=1,   # nur 1 Umstieg für schnelle Berechnung
    trip_type='departure'
)

# Ausgabe Stopps und Zeiten für die erste Verbindung
if not connections_df.empty:
    for stop in connections_df.iloc[0]['stop_times']:
        print(stop['stop_name'], stop['departure_time'], stop['arrival_time'])


TypeError: 'dict_keys' object is not subscriptable

In [19]:
connections_df.head()


In [None]:
# --- Anwendung auf alle Zeilen ---
# df_try: DataFrame mit allen Matches
dfs = df_try.apply(lambda row: get_train_connections_for_row(row, "gtfs", window_hours=3, trip_type='departure'), axis=1)
df_all = pd.concat(dfs.tolist(), ignore_index=True)

In [13]:
dfMatches

Unnamed: 0,match_id,league,date,matchday,team_home,team_away,stadium,capacity,home_city,away_city,home_station,home_lat,home_lon,away_station,away_lat,away_lon
117,77379,bl1,2025-12-12T20:30:00,14. Spieltag,1. FC Union Berlin,RB Leipzig,Stadion An der Alten Försterei,22012,Berlin,Leipzig,Berlin Hauptbahnhof,52.524945,13.369661,Leipzig Hauptbahnhof,51.346552,12.383086
118,77375,bl1,2025-12-13T15:30:00,14. Spieltag,Eintracht Frankfurt,FC Augsburg,Deutsche Bank Park,59500,Frankfurt (Main),Augsburg,Frankfurt (Main) Hauptbahnhof,50.106654,8.662581,Augsburg Hauptbahnhof,48.365595,10.886355
119,77378,bl1,2025-12-13T15:30:00,14. Spieltag,Borussia Mönchengladbach,VfL Wolfsburg,Stadion im Borussia-Park,54042,Mönchengladbach,Wolfsburg,Mönchengladbach Hauptbahnhof,51.16322,6.439669,Wolfsburg Hauptbahnhof,52.429161,10.787807
120,77380,bl1,2025-12-13T15:30:00,14. Spieltag,FC St. Pauli,1. FC Heidenheim 1846,Millerntor-Stadion,29546,Hamburg,Heidenheim,Hamburg Hauptbahnhof,53.553199,10.006436,Heidenheim (Zentrum),48.676764,10.152923
121,77381,bl1,2025-12-13T15:30:00,14. Spieltag,TSG Hoffenheim,Hamburger SV,PreZero Arena,30150,Sinsheim,Hamburg,Sinsheim Hauptbahnhof,49.250168,8.8755,Hamburg Hauptbahnhof,53.553199,10.006436
122,77374,bl1,2025-12-13T18:30:00,14. Spieltag,Bayer 04 Leverkusen,1. FC Köln,BayArena,30210,Leverkusen,Köln,Leverkusen (Zentrum),51.032474,6.988119,Köln Hauptbahnhof,50.942784,6.959071
123,77376,bl1,2025-12-14T15:30:00,14. Spieltag,SC Freiburg,Borussia Dortmund,Europa-Park Stadion,34700,Freiburg,Dortmund,Freiburg Hauptbahnhof,47.99825,7.84234,Dortmund Hauptbahnhof,51.516939,7.460503
124,77373,bl1,2025-12-14T17:30:00,14. Spieltag,FC Bayern München,1. FSV Mainz 05,Allianz Arena,75000,München,Mainz,München Hauptbahnhof,48.140725,11.556943,Mainz Hauptbahnhof,50.001113,8.258723
125,77377,bl1,2025-12-14T19:30:00,14. Spieltag,SV Werder Bremen,VfB Stuttgart,Weserstadion,42100,Bremen,Stuttgart,Bremen Hauptbahnhof,53.083146,8.813542,Stuttgart Hauptbahnhof,48.784266,9.182117


In [None]:
import plotly.express as px

def plot_train_routes_plotly(df):
    """
    Plottet die Züge auf einer Karte mit Plotly.
    df muss die Spalten 'trip_id', 'stop_lat', 'stop_lon', 'stop_name', 'arrival', 'departure', 'stop_sequence' enthalten.
    """
    if df.empty:
        print("Keine Daten zum Plotten")
        return None
    
    # Sortieren nach trip_id und stop_sequence
    df = df.sort_values(['trip_id', 'stop_sequence'])
    
    # Popups für jeden Stopp
    df['popup'] = df.apply(lambda row: f"{row['stop_name']}<br>{row['arrival']} → {row['departure']}", axis=1)
    
    # Linien zwischen Stopps innerhalb eines Zuges
    fig = px.line_map(
        df, 
        lat="stop_lat", 
        lon="stop_lon", 
        color="match_id", 
        line_group="trip_id",  # jede Linie für einen Zug
        hover_name="stop_name",
        hover_data={"arrival": True, "departure": True, "stop_lat": False, "stop_lon": False},
        zoom=6,
        height=600
    )
    
    # Marker für jeden Stopp
    fig.add_scattermapbox(
        lat=df['stop_lat'],
        lon=df['stop_lon'],
        mode='markers+text',
        marker=dict(size=8, color='blue'),
        text=df['stop_name'],
        textposition="top right",
        hovertext=df['popup'],
        hoverinfo="text"
    )
    
    # Mapbox Style
    fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    
    return fig


In [None]:
fig = plot_train_routes_plotly(df_all)
fig.show()
