In [1]:
import pandas as pd
import geopandas as gpd
import requests
from shapely.geometry import Point

https://www.michaeldittrich.de/ibnr/online.php

In [2]:
# Function to get station details from the V5 API
def get_station_data(station_id):
    url = f"https://v5.db.transport.rest/stops/{station_id}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None

# List of station IDs 
station_ids = [
    8011160,  # Berlin Hbf
    8000207,  # Cologne Hbf
    8002549,  # Hamburg Hbf
    8000096,  # Stuttgart Hbf
    8100003,  # Vienna Westbahnhof
    8100002,  # Salzburg Hbf
    8300046,  # Milano Centrale
    8300239,  # Napoli Centrale
    8300094,  # Venezia Santa Lucia
    5400014,  # Praha hl. n.
    5600207,  # Bratislava hl. st.
    8800004,  # Bruxelles Midi
    5600007,  # Kosice
]
station_data_list = []

# Loop through the station IDs 
for station_id in station_ids:
    station_data = get_station_data(station_id)
    if station_data:
        station_info = {
            'id': station_data.get('id'),
            'name': station_data.get('name'),
            'type': station_data.get('type'),
            'longitude': station_data['location'].get('longitude'),
            'latitude': station_data['location'].get('latitude'),
            'location': station_data.get('location')  # Keeping the original location dict
        }
        station_data_list.append(station_info)

station_db_data = pd.DataFrame(station_data_list)
station_db_data['geometry'] = station_db_data.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
geo_station_db_data = gpd.GeoDataFrame(station_db_data, geometry='geometry')
geo_station_db_data.set_crs(epsg=4326, inplace=True)
geo_station_db_data = geo_station_db_data.drop_duplicates(subset="id") # Drop duplicates
geo_station_db_data

Unnamed: 0,id,name,type,longitude,latitude,location,geometry
0,8011160,Berlin Hbf,station,13.369629,52.524924,"{'type': 'location', 'id': '8011160', 'latitud...",POINT (13.36963 52.52492)
1,8000207,Köln Hbf,station,6.959197,50.942823,"{'type': 'location', 'id': '8000207', 'latitud...",POINT (6.95920 50.94282)
2,8002549,Hamburg Hbf,station,10.00636,53.553533,"{'type': 'location', 'id': '8002549', 'latitud...",POINT (10.00636 53.55353)
3,8000096,Stuttgart Hbf,station,9.182589,48.785052,"{'type': 'location', 'id': '8000096', 'latitud...",POINT (9.18259 48.78505)
4,8100003,Wien Westbahnhof,station,16.337256,48.196753,"{'type': 'location', 'id': '8100003', 'latitud...",POINT (16.33726 48.19675)
5,8100002,Salzburg Hbf,station,13.045514,47.813848,"{'type': 'location', 'id': '8100002', 'latitud...",POINT (13.04551 47.81385)
6,8300046,Milano Centrale,stop,9.204828,45.487143,"{'type': 'location', 'id': '8300046', 'latitud...",POINT (9.20483 45.48714)
7,8300239,Napoli Centrale,stop,14.273072,40.852827,"{'type': 'location', 'id': '8300239', 'latitud...",POINT (14.27307 40.85283)
8,8300094,Venezia Santa Lucia,stop,12.320462,45.441397,"{'type': 'location', 'id': '8300094', 'latitud...",POINT (12.32046 45.44140)
9,5400014,Praha hl.n.,station,14.436038,50.083058,"{'type': 'location', 'id': '5400014', 'latitud...",POINT (14.43604 50.08306)


In [3]:
geo_station_db_data.explore()

In [4]:
from shapely.geometry import Point, LineString

# Define connections based on start and end station IDs
connections = [
    (8011160, 8000207),  # Berlin - Cologne
    (8011160, 8002549),  # Berlin - Hamburg
    (8011160, 8000096),  # Berlin - Stuttgart
    (8100003, 8100002),  # Vienna Westbahnhof - Salzburg
    (8300046, 8300239),  # Milano Centrale - Napoli Centrale
    (5400014, 5600207),  # Praha hl. n. - Bratislava hl. st.
    (8800004, 5400014),  # Bruxelles Midi - Praha hl. n.
    (5400014, 5600007)   # Praha hl. n. - Kosice
]

# Create a list for connection geometries (lines)
connection_data = []
for start_id, end_id in connections:
    start_station = geo_station_db_data[geo_station_db_data['id'] == str(start_id)]
    end_station = geo_station_db_data[geo_station_db_data['id'] == str(end_id)]
    
    # If both stations are found, create a line geometry
    if not start_station.empty and not end_station.empty:
        line = LineString([start_station.geometry.values[0], end_station.geometry.values[0]])
        connection_data.append({
            'start_id': start_id,
            'end_id': end_id,
            'start_name': start_station['name'].values[0],
            'end_name': end_station['name'].values[0],
            'geometry': line
        })

        
# Define the carrier for each connection
carriers = [
    "FlixTrain",    # Berlin - Cologne
    "FlixTrain",    # Berlin - Hamburg
    "FlixTrain",    # Berlin - Stuttgart
    "WESTbahn",     # Vienna Westbahnhof - Salzburg
    "Trenitalia",        # Milano Centrale - Napoli Centrale
    "RegioJet",     # Praha hl. n. - Bratislava hl. st.
    "European Sleeper",  # Bruxelles Midi - Berlin
    "LEO Express"   # Praha hl. n. - Kosice
]

# Convert connections to a GeoDataFrame
geo_connections = gpd.GeoDataFrame(connection_data, geometry='geometry')

geo_connections['carrier'] = carriers        

geo_connections.set_crs(epsg=4326, inplace=True)
geo_connections

Unnamed: 0,start_id,end_id,start_name,end_name,geometry,carrier
0,8011160,8000207,Berlin Hbf,Köln Hbf,"LINESTRING (13.36963 52.52492, 6.95920 50.94282)",FlixTrain
1,8011160,8002549,Berlin Hbf,Hamburg Hbf,"LINESTRING (13.36963 52.52492, 10.00636 53.55353)",FlixTrain
2,8011160,8000096,Berlin Hbf,Stuttgart Hbf,"LINESTRING (13.36963 52.52492, 9.18259 48.78505)",FlixTrain
3,8100003,8100002,Wien Westbahnhof,Salzburg Hbf,"LINESTRING (16.33726 48.19675, 13.04551 47.81385)",WESTbahn
4,8300046,8300239,Milano Centrale,Napoli Centrale,"LINESTRING (9.20483 45.48714, 14.27307 40.85283)",Trenitalia
5,5400014,5600207,Praha hl.n.,Bratislava hl.st.,"LINESTRING (14.43604 50.08306, 17.10646 48.15891)",RegioJet
6,8800004,5400014,Bruxelles Midi,Praha hl.n.,"LINESTRING (4.33570 50.83537, 14.43604 50.08306)",European Sleeper
7,5400014,5600007,Praha hl.n.,Kosice,"LINESTRING (14.43604 50.08306, 21.26872 48.72292)",LEO Express


----

## Direct Connections

In [5]:
import requests
import time
from shapely.geometry import LineString

# Function to fetch polyline for a journey and return it as a LineString (for direct trains only)
def get_journey_polyline(station_id, destination_id, search_string, departure_time):
    journey_url = f"https://v5.db.transport.rest/journeys?from={station_id}&to={destination_id}"
    journey_params = {'stopovers': True, 'polylines': True, 
                      'results': 10, 'transfers': 0, 'departure': departure_time}  
    journey_response = requests.get(journey_url, params=journey_params)

    if journey_response.status_code == 200:
        journeys = journey_response.json().get('journeys', [])
        
        if not journeys:
            print(f"No direct journeys found for {station_id} to {destination_id}.")
            return None
        
        for journey in journeys:
            for leg in journey.get('legs', []):
                operator_name = leg.get('line', {}).get('operator', {}).get('name', '')
                if search_string in operator_name:
                    print(f"Direct journey found with operator containing '{search_string}': {operator_name}")
                    
                    # Try to get polyline data from the leg
                    polyline_data = leg.get('polyline', {}).get('features', [])
                    if polyline_data:
                        # Collect the coordinates from the polyline features
                        coordinates = [feature['geometry']['coordinates'] for feature in polyline_data]
                        
                        # Convert the coordinates into a LineString
                        line_string = LineString(coordinates)
                        return line_string
                    else:
                        print(f"No polyline data found for {station_id} to {destination_id}. Resorting to stopovers.")
                        return None
    else:
        print(f"Error fetching journey: {journey_response.status_code} - {journey_response.text}")
        return None

# Fallback to stopover-based LineString creation if no polyline is available (for direct trains only)
def get_journey_stopover_linestring(station_id, destination_id, search_string):
    # Fallback to using stopovers if polyline isn't available
    stopover_ids = get_stopover_ids(station_id, destination_id, search_string)
    
    if stopover_ids:
        line_string = create_linestring_from_stopovers(stopover_ids)
        return line_string
    else:
        print(f"No stopover data found for {station_id} to {destination_id}.")
        return None

# Function to get stopover IDs for a direct train journey
def get_stopover_ids(station_id, destination_id, search_string):
    journey_url = f"https://v5.db.transport.rest/journeys?from={station_id}&to={destination_id}"
    journey_params = {'stopovers': True, 'results': 10, 'transfers': 0}  # transfers=0 ensures direct trains
    journey_response = requests.get(journey_url, params=journey_params)

    if journey_response.status_code == 200:
        journeys = journey_response.json().get('journeys', [])
        
        if not journeys:
            print(f"No direct journeys found for {station_id} to {destination_id}.")
            return []
        
        for journey in journeys:
            for leg in journey.get('legs', []):
                operator_name = leg.get('line', {}).get('operator', {}).get('name', '')
                if search_string in operator_name:
                    print(f"Direct journey found with operator containing '{search_string}': {operator_name}")
                    stopovers = leg.get('stopovers', [])
                    stopover_ids = [stopover['stop']['id'] for stopover in stopovers if 'stop' in stopover]
                    return stopover_ids
    else:
        print(f"Error fetching stopovers: {journey_response.status_code} - {journey_response.text}")
        return []

# Function to create a LineString from stopover data
def create_linestring_from_stopovers(stopover_ids):
    station_data_list = []
    coordinates = []

    for station_id in stopover_ids:
        station_data = get_station_data(station_id)
        if station_data:
            station_info = {
                'id': station_data.get('id'),
                'name': station_data.get('name'),
                'type': station_data.get('type'),
                'longitude': station_data['location'].get('longitude'),
                'latitude': station_data['location'].get('latitude'),
                'location': station_data.get('location')  # Keeping the original location dict
            }
            station_data_list.append(station_info)

            # Collecting coordinates for LineString
            coordinates.append((station_info['longitude'], station_info['latitude']))

    # Create a LineString using Shapely
    if coordinates:
        line = LineString(coordinates)
        print(f"Created LineString from stopovers: {line}")
        return line
    else:
        print("No valid coordinates found.")
        return None

# Function to fetch station data
def get_station_data(station_id):
    url = f"https://v5.db.transport.rest/stops/{station_id}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching station data for ID {station_id}: {response.status_code}")
        return None

# Function to update geometries with polyline or stopover fallback for direct trains
# Only redo the process if the existing geometry has 3 or fewer points
def update_geometries_with_fallback(geo_connections):
    for index, row in geo_connections.iterrows():
        # Check if the existing geometry has 3 or fewer points
        existing_geometry = row['geometry']
        if existing_geometry and isinstance(existing_geometry, LineString) and len(existing_geometry.coords) > 3:
            print(f"Skipping journey {row['start_id']} to {row['end_id']} with carrier {row['carrier']} - existing geometry has sufficient points.")
            continue
        
        # Extract the relevant data
        station_id = row['start_id']
        destination_id = row['end_id']
        carrier_name = row['carrier']  # Use this as the 'desired string'
        
        # First, try to get the polyline as a LineString
        line_string = get_journey_polyline(station_id, destination_id, carrier_name, departure_time='tomorrow at 8am')
        
        # If polyline is unavailable, fallback to using stopovers
        if not line_string:
            line_string = get_journey_stopover_linestring(station_id, destination_id, carrier_name)
        
        # Update the geometry column if a LineString is found
        if line_string:
            geo_connections.at[index, 'geometry'] = line_string
        else:
            print(f"No valid geometry found for direct journey {station_id} to {destination_id} with carrier {carrier_name}.")

    return geo_connections

# Example usage:
# Assuming geo_connections is your input GeoDataFrame
geo_connections = update_geometries_with_fallback(geo_connections)

No stopover data found for 8011160 to 8000207.
No valid geometry found for direct journey 8011160 to 8000207 with carrier FlixTrain.
Direct journey found with operator containing 'FlixTrain': FlixTrain
Created LineString from stopovers: LINESTRING (13.368892 52.52585, 11.854767 52.594671, 10.419873 53.249742, 10.00636 53.553533)
Direct journey found with operator containing 'FlixTrain': FlixTrain
Direct journey found with operator containing 'WESTbahn': WESTbahn
Direct journey found with operator containing 'Trenitalia': Trenitalia
Direct journey found with operator containing 'RegioJet': RegioJet
Direct journey found with operator containing 'European Sleeper': European Sleeper
Direct journey found with operator containing 'LEO Express': LEO Express


In [6]:
geo_connections.drop([0, 1, 4], inplace=True)
geo_connections

Unnamed: 0,start_id,end_id,start_name,end_name,geometry,carrier
2,8011160,8000096,Berlin Hbf,Stuttgart Hbf,"LINESTRING (13.36889 52.52585, 13.36963 52.524...",FlixTrain
3,8100003,8100002,Wien Westbahnhof,Salzburg Hbf,"LINESTRING (16.33726 48.19676, 16.26125 48.197...",WESTbahn
5,5400014,5600207,Praha hl.n.,Bratislava hl.st.,"LINESTRING (14.43604 50.08306, 15.75621 50.031...",RegioJet
6,8800004,5400014,Bruxelles Midi,Praha hl.n.,"LINESTRING (4.33570 50.83538, 4.42117 51.21581...",European Sleeper
7,5400014,5600007,Praha hl.n.,Kosice,"LINESTRING (14.43604 50.08306, 14.50230 50.100...",LEO Express


In [7]:
import requests
from shapely.geometry import LineString

def update_geo_connections(geo_connections, start_id, destination_id, carrier, departure_time_x = 'tomorrow at 8pm'):
    """
    Function to update the geo_connections table with a new entry.
    
    Parameters:
    - geo_connections: The current geo_connections DataFrame.
    - start_id: The start station ID.
    - destination_id: The destination station ID.
    - carrier: The carrier name (string).
    
    Returns:
    - Updated geo_connections DataFrame.
    """
    
    # Fetch the journey polyline
    line_string = get_journey_polyline(start_id, destination_id, carrier, departure_time=departure_time_x)
    
    # If polyline is unavailable, fallback to using stopovers
    if not line_string:
        line_string = get_journey_stopover_linestring(start_id, destination_id, carrier)
    
    if line_string:
        # Fetch station names using get_station_data for start and destination
        start_station = get_station_data(start_id)
        destination_station = get_station_data(destination_id)
        
        start_name = start_station.get('name', 'Unknown') if start_station else 'Unknown'
        destination_name = destination_station.get('name', 'Unknown') if destination_station else 'Unknown'
        
        # Create a new row with the relevant information
        new_entry = {
            'start_id': start_id,
            'end_id': destination_id,
            'start_name': start_name,
            'end_name': destination_name,
            'geometry': line_string,
            'carrier': carrier
        }
        
        # Append the new entry to the geo_connections DataFrame
        geo_connections = pd.concat([geo_connections, pd.DataFrame([new_entry])], ignore_index=True)
        print(f"Added new entry: {start_name} to {destination_name} ({carrier})")
    else:
        print(f"Failed to find a valid journey between {start_id} and {destination_id} with carrier {carrier}.")
    
    return geo_connections

# Example usage:
geo_connections = update_geo_connections(geo_connections, # Vienna - Praha
                                         start_id=8103000, destination_id=5400014, carrier="RegioJet")

Direct journey found with operator containing 'RegioJet': RegioJet
Added new entry: Wien Hbf to Praha hl.n. (RegioJet)


In [8]:
# Add Italo routes
geo_connections = update_geo_connections(geo_connections, # Venice - Naples
                                         start_id=8300094, destination_id=8300239, carrier="Trenitalia")
geo_connections = update_geo_connections(geo_connections, # Venice - Torino
                                         start_id=8300094, destination_id=8396006, carrier="Trenitalia")
geo_connections = update_geo_connections(geo_connections, # Reggio Calabria - Torino
                                         start_id=8300337, destination_id=8396006, carrier="Trenitalia")

Direct journey found with operator containing 'Trenitalia': Trenitalia
Added new entry: Venezia Santa Lucia to Napoli Centrale (Trenitalia)
Direct journey found with operator containing 'Trenitalia': Trenitalia
Added new entry: Venezia Santa Lucia to TORINO (Trenitalia)
Direct journey found with operator containing 'Trenitalia': Trenitalia
Added new entry: Reggio di Calabria Centrale to TORINO (Trenitalia)


In [9]:
geo_connections = update_geo_connections(geo_connections, # Brussels - Paris
                                         start_id=8800004, destination_id=8700014, carrier="EUROSTAR"), 

Direct journey found with operator containing 'EUROSTAR': EUROSTAR
Added new entry: Bruxelles Midi to Paris Nord (EUROSTAR)


In [10]:
geo_connections = update_geo_connections(geo_connections[0], # Brussels - Amsterdam
                                         start_id=8800004, destination_id=8400058, carrier="EUROSTAR")

Direct journey found with operator containing 'EUROSTAR': EUROSTAR
Added new entry: Bruxelles Midi to Amsterdam Centraal (EUROSTAR)


In [11]:
# Berlin - Hamburg
geo_connections = update_geo_connections(geo_connections, # Berlin - Hamburg
                                         start_id=8011160, destination_id=8002549, carrier="FlixTrain",
                                        departure_time_x = 'tomorrow at 1pm')

Direct journey found with operator containing 'FlixTrain': FlixTrain
Added new entry: Berlin Hbf to Hamburg Hbf (FlixTrain)


In [12]:
# Brussels - Dortmund
geo_connections = update_geo_connections(geo_connections,
                                         start_id=8800004, destination_id=8000080, carrier="SNCF Voyages Deutschland",
                                        departure_time_x = 'today 17:20')

Direct journey found with operator containing 'SNCF Voyages Deutschland': SNCF Voyages Deutschland
Added new entry: Bruxelles Midi to Dortmund Hbf (SNCF Voyages Deutschland)


In [13]:
# Praha - Budapest

# Coordinates for Vienna and Budapest (approximate)
vienna_coords = [16.3738, 48.2082]  # Longitude, Latitude of Vienna
gyor_coords = [17.6376, 47.6875]  # Longitude, Latitude of Győr
budapest_coords = [19.0402, 47.4979]  # Longitude, Latitude of Budapest

# Create a simple LineString between Vienna and Budapest
vienna_budapest_line = LineString([vienna_coords, gyor_coords, budapest_coords])

# Create a new entry for the geo_connections DataFrame
new_entry = {
    'start_id': 8100003,  # Example station ID for Vienna
    'end_id': 5500008,  # Example station ID for Budapest
    'start_name': 'Vienna',
    'end_name': 'Budapest',
    'geometry': vienna_budapest_line,
    'carrier': 'RegioJet'  
}

# Append the new entry to the geo_connections DataFrame
geo_connections = pd.concat([geo_connections, pd.DataFrame([new_entry])], ignore_index=True)

In [14]:
# Hamburg - Köln
geo_connections = update_geo_connections(geo_connections, 
                                         start_id=8002549, destination_id=8096022, carrier="FlixTrain",
                                        departure_time_x = 'thursday at 2pm')

Direct journey found with operator containing 'FlixTrain': FlixTrain
Added new entry: Hamburg Hbf to KÖLN (FlixTrain)


In [15]:
# Hamburg - Leipzig
geo_connections = update_geo_connections(geo_connections, 
                                         start_id=8002549, destination_id=8010205, carrier="FlixTrain",
                                        departure_time_x = 'thursday at 11am')

Direct journey found with operator containing 'FlixTrain': FlixTrain
Added new entry: Hamburg Hbf to Leipzig Hbf (FlixTrain)


In [16]:
# Köln - Dresden
geo_connections = update_geo_connections(geo_connections, 
                                         start_id=8096022, destination_id=8010085, carrier="FlixTrain",
                                        departure_time_x = 'thursday at 4pm')

Direct journey found with operator containing 'FlixTrain': FlixTrain
Added new entry: KÖLN to Dresden Hbf (FlixTrain)


In [17]:
import folium

# Initialize a folium map centered in Europe
m = folium.Map(location=[50.0, 10.0], zoom_start=5, tiles="CartoDB positron")

# Define color mapping for each carrier
carrier_colors = {
    "FlixTrain": "green",
    "WESTbahn": "blue",
    "Trenitalia": "red",
    "RegioJet": "yellow",
    "European Sleeper": "black",
    "LEO Express": "#f78021",
    "EUROSTAR": "#ad1535",
    "SNCF Voyages Deutschland": "#ad1535"
}

# Add each connection to the map with the full polyline and specified color by carrier
for _, row in geo_connections.iterrows():
    # Extract coordinates from the LineString
    polyline_coords = [[lat, lon] for lon, lat in row['geometry'].coords]
    
    # Create a polyline for the full journey
    line = folium.PolyLine(
        locations=polyline_coords,  # Full list of coordinates (lat, lon)
        color=carrier_colors.get(row['carrier'], 'gray'),  # Default color is gray if carrier not in the dictionary
        weight=2,
        tooltip=f"{row['start_name']} to {row['end_name']} ({row['carrier']})"
    )
    line.add_to(m)
    
# Add a legend (custom HTML block)
legend_html = '''
     <div style="
     position: fixed; 
     bottom: 50px; left: 50px; width: 180px; height: 230px; 
     background-color: white; border:2px solid grey; z-index:9999; font-size:14px;
     padding: 10px; border-radius: 10px;
     box-shadow: 3px 3px 5px rgba(0,0,0,0.4);">
     <h4 style="margin-bottom: 10px; text-align: center;">Private Operators</h4>
     <p style="margin: 0;">
     <i style="background:green; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;FlixTrain<br>
     <i style="background:blue; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;WESTbahn<br>
     <i style="background:red; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;Italotreno<br>
     <i style="background:yellow; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;RegioJet<br>
     <i style="background:black; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;European Sleeper<br>
     <i style="background:orange; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;LEO Express<br>
     <i style="background:#ad1535; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;Eurostar/Thalys
     </p>
     </div>
     '''

m.get_root().html.add_child(folium.Element(legend_html))

# Display
m

In [20]:
import folium

# Initialize a folium map centered in Europe
m = folium.Map(location=[50.0, 10.0], zoom_start=5, tiles="CartoDB positron")

# Define color mapping for each carrier
carrier_colors = {
    "FlixTrain": "green",
    "WESTbahn": "blue",
    "Trenitalia": "red",
    "RegioJet": "yellow",
    "European Sleeper": "black",
    "LEO Express": "#f78021",
    "EUROSTAR": "#ad1535",
    "SNCF Voyages Deutschland": "#ad1535"
}

# Add each connection as a toggleable layer
for _, row in geo_connections.iterrows():
    # Extract coordinates from the LineString
    polyline_coords = [[lat, lon] for lon, lat in row['geometry'].coords]
    
    # Create a FeatureGroup for this journey
    feature_group = folium.FeatureGroup(name=f"{row['start_name']} to {row['end_name']} ({row['carrier']})")
    
    # Create a polyline for the journey
    line = folium.PolyLine(
        locations=polyline_coords,  # Full list of coordinates (lat, lon)
        color=carrier_colors.get(row['carrier'], 'gray'),  # Default color is gray if carrier not in the dictionary
        weight=2,
        tooltip=f"{row['start_name']} to {row['end_name']} ({row['carrier']})"
    )
    line.add_to(feature_group)
    feature_group.add_to(m)

# Add a legend (custom HTML block)
legend_html = '''
     <div style="
     position: fixed; 
     bottom: 50px; left: 50px; width: 180px; height: 230px; 
     background-color: white; border:2px solid grey; z-index:9999; font-size:14px;
     padding: 10px; border-radius: 10px;
     box-shadow: 3px 3px 5px rgba(0,0,0,0.4);">
     <h4 style="margin-bottom: 10px; text-align: center;">Private Operators</h4>
     <p style="margin: 0;">
     <i style="background:green; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;FlixTrain<br>
     <i style="background:blue; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;WESTbahn<br>
     <i style="background:red; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;Trenitalia<br>
     <i style="background:yellow; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;RegioJet<br>
     <i style="background:black; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;European Sleeper<br>
     <i style="background:#f78021; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;LEO Express<br>
     <i style="background:#ad1535; width: 30px; height: 3px; display: inline-block;"></i>&nbsp;Eurostar/Thalys
     </p>
     </div>
     '''

m.get_root().html.add_child(folium.Element(legend_html))

# Add a layer control to toggle the journeys
folium.LayerControl().add_to(m)

m.save("private_carriers.html")

# Display the map
m

In [24]:
geo_connections["year_est"] = [2018, # Berlin - Stuttgart
                               2012, # Westbahn
                               2012, # Regiojet
                               2023, # European Sleeper
                               2016, # LeoExpress
                               2017, # Regiojet
                               2015, # Trenitalia
                               2015, # Trenitalia
                               2015, # Trenitalia
                               2000, # Eurostar
                               2000, # Eurostar
                               2021, # Berlin - Hamburg
                               2016, # SNCF 
                               2017, # Regiojet
                               2018, # Hamburg - Köln
                               2021, # Leipzig - Hamburg
                               2018  # Berlin - Hamburg
                              ]
geo_connections

Unnamed: 0,start_id,end_id,start_name,end_name,geometry,carrier,year_est
0,8011160,8000096,Berlin Hbf,Stuttgart Hbf,"LINESTRING (13.36889 52.52585, 13.36963 52.524...",FlixTrain,2018
1,8100003,8100002,Wien Westbahnhof,Salzburg Hbf,"LINESTRING (16.33726 48.19676, 16.26125 48.197...",WESTbahn,2012
2,5400014,5600207,Praha hl.n.,Bratislava hl.st.,"LINESTRING (14.43604 50.08306, 15.75621 50.031...",RegioJet,2012
3,8800004,5400014,Bruxelles Midi,Praha hl.n.,"LINESTRING (4.33570 50.83538, 4.42117 51.21581...",European Sleeper,2023
4,5400014,5600007,Praha hl.n.,Kosice,"LINESTRING (14.43604 50.08306, 14.50230 50.100...",LEO Express,2016
5,8103000,5400014,Wien Hbf,Praha hl.n.,"LINESTRING (16.37711 48.18511, 16.86837 48.712...",RegioJet,2017
6,8300094,8300239,Venezia Santa Lucia,Napoli Centrale,"LINESTRING (12.32046 45.44140, 12.23190 45.481...",Trenitalia,2015
7,8300094,8396006,Venezia Santa Lucia,TORINO,"LINESTRING (12.32046 45.44140, 12.23190 45.481...",Trenitalia,2015
8,8300337,8396006,Reggio di Calabria Centrale,TORINO,"LINESTRING (15.63579 38.10412, 15.64965 38.114...",Trenitalia,2015
9,8800004,8700014,Bruxelles Midi,Paris Nord,"LINESTRING (4.33570 50.83538, 2.35493 48.88089)",EUROSTAR,2000


In [25]:
geo_connections[geo_connections.year_est<=2014]

Unnamed: 0,start_id,end_id,start_name,end_name,geometry,carrier,year_est
1,8100003,8100002,Wien Westbahnhof,Salzburg Hbf,"LINESTRING (16.33726 48.19676, 16.26125 48.197...",WESTbahn,2012
2,5400014,5600207,Praha hl.n.,Bratislava hl.st.,"LINESTRING (14.43604 50.08306, 15.75621 50.031...",RegioJet,2012
9,8800004,8700014,Bruxelles Midi,Paris Nord,"LINESTRING (4.33570 50.83538, 2.35493 48.88089)",EUROSTAR,2000
10,8800004,8400058,Bruxelles Midi,Amsterdam Centraal,"LINESTRING (4.33570 50.83538, 4.42117 51.21581...",EUROSTAR,2000


In [26]:
geo_connections[(geo_connections.year_est>2014)&(geo_connections.year_est<=2016)]

Unnamed: 0,start_id,end_id,start_name,end_name,geometry,carrier,year_est
4,5400014,5600007,Praha hl.n.,Kosice,"LINESTRING (14.43604 50.08306, 14.50230 50.100...",LEO Express,2016
6,8300094,8300239,Venezia Santa Lucia,Napoli Centrale,"LINESTRING (12.32046 45.44140, 12.23190 45.481...",Trenitalia,2015
7,8300094,8396006,Venezia Santa Lucia,TORINO,"LINESTRING (12.32046 45.44140, 12.23190 45.481...",Trenitalia,2015
8,8300337,8396006,Reggio di Calabria Centrale,TORINO,"LINESTRING (15.63579 38.10412, 15.64965 38.114...",Trenitalia,2015
12,8800004,8000080,Bruxelles Midi,Dortmund Hbf,"LINESTRING (4.33570 50.83538, 5.56648 50.62436...",SNCF Voyages Deutschland,2016


In [27]:
geo_connections[(geo_connections.year_est>2016)&(geo_connections.year_est<=2018)]

Unnamed: 0,start_id,end_id,start_name,end_name,geometry,carrier,year_est
0,8011160,8000096,Berlin Hbf,Stuttgart Hbf,"LINESTRING (13.36889 52.52585, 13.36963 52.524...",FlixTrain,2018
5,8103000,5400014,Wien Hbf,Praha hl.n.,"LINESTRING (16.37711 48.18511, 16.86837 48.712...",RegioJet,2017
13,8100003,5500008,Vienna,Budapest,"LINESTRING (16.37380 48.20820, 17.63760 47.687...",RegioJet,2017
14,8002549,8096022,Hamburg Hbf,KÖLN,"LINESTRING (10.00636 53.55354, 10.00700 53.553...",FlixTrain,2018
16,8096022,8010085,KÖLN,Dresden Hbf,"LINESTRING (6.95920 50.94283, 6.95970 50.94304...",FlixTrain,2018


In [30]:
geo_connections[(geo_connections.year_est>2018)&(geo_connections.year_est<=2023)]

Unnamed: 0,start_id,end_id,start_name,end_name,geometry,carrier,year_est
3,8800004,5400014,Bruxelles Midi,Praha hl.n.,"LINESTRING (4.33570 50.83538, 4.42117 51.21581...",European Sleeper,2023
11,8011160,8002549,Berlin Hbf,Hamburg Hbf,"LINESTRING (13.36889 52.52585, 13.36723 52.528...",FlixTrain,2021
15,8002549,8010205,Hamburg Hbf,Leipzig Hbf,"LINESTRING (10.00636 53.55354, 10.00712 53.553...",FlixTrain,2021


In [42]:
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx
from matplotlib.lines import Line2D

# Ensure the GeoDataFrame has a CRS
if geo_connections.crs is None:
    geo_connections = geo_connections.set_crs(epsg=4326)  # Assuming input is in WGS 84

# Convert to Web Mercator for basemap compatibility
geo_connections = geo_connections.to_crs(epsg=3857)

# Subsets of the data based on year_est
subsets = {
    "2014": geo_connections[geo_connections.year_est <= 2014],
    "2016": geo_connections[geo_connections.year_est <= 2016],
    "2018": geo_connections[geo_connections.year_est <= 2018],
    "2023": geo_connections[geo_connections.year_est <= 2023]
}

# Define the output filenames for the plots
output_files = {
    "2014": "connections_2014.png",
    "2016": "connections_2016.png",
    "2018": "connections_2018.png",
    "2023": "connections_2023.png"
}

# Carrier color mapping
carrier_colors = {
    "FlixTrain": "green",
    "WESTbahn": "blue",
    "Trenitalia": "red",
    "RegioJet": "yellow",
    "European Sleeper": "black",
    "LEO Express": "#f78021",
    "EUROSTAR": "#ad1535",
    "SNCF Voyages Deutschland": "#ad1535"
}

# Determine a common bounding box for all maps (buffered more for additional context)
all_geometries = geo_connections['geometry']
minx, miny, maxx, maxy = all_geometries.total_bounds
buffer = 100000  # Increase the buffer to 100 km for better map context
minx, miny, maxx, maxy = minx - buffer, miny - buffer, maxx + buffer, maxy + buffer

# Create a custom legend
legend_elements = [
    Line2D([0], [0], color=color, lw=2, label=carrier)
    for carrier, color in carrier_colors.items()
]

# Generate plots for each subset
for year, data in subsets.items():
    # Set up the plot with a wider and taller figure
    fig, ax = plt.subplots(figsize=(16, 12))  # Wider and taller plot dimensions
    
    # Plot each connection with its respective color
    for _, row in data.iterrows():
        geom = row['geometry']
        carrier = row['carrier']
        color = carrier_colors.get(carrier, 'gray')
        ax.plot(*geom.xy, color=color, linewidth=1.5, alpha=0.8)

    # Set consistent limits and proportions for all maps
    ax.set_xlim(minx, maxx)
    ax.set_ylim(miny, maxy)
    
    # Add basemap
    ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron, crs=data.crs, zoom=6)
    
    # Add legend to the bottom left corner
    ax.legend(
        handles=legend_elements,
        loc='lower left',
        fontsize=10,
        title="Carriers",
        title_fontsize=12,
        frameon=True,
        fancybox=True,
        framealpha=0.9,
        borderpad=1
    )
    
    # Set title to just the year
    ax.set_title(year, fontsize=18, weight='bold')
    ax.axis("off")  # Remove axes for a cleaner look
    
    # Save the plot as a .png file
    plt.savefig(output_files[year], dpi=300, bbox_inches="tight")
    plt.close(fig)

print("Static maps with wider context, simplified titles, and consistent formatting generated.")

Static maps with wider context, simplified titles, and consistent formatting generated.


In [None]:
station_id = 5400014
destination_id = 5500008

journey_url = f"https://v5.db.transport.rest/journeys?from={station_id}&to={destination_id}"
journey_params = {'stopovers': True, 'polylines': True, 'results': 10, 'departure': 'tomorrow 6am'}
journey_response = requests.get(journey_url, params=journey_params)
journey_response.json()
journey_response.json()['journeys'][0]['legs'][0]['line']['operator']['name']

In [None]:
import requests
import time
import pandas as pd
from shapely.geometry import LineString

# Function to get station data (including coordinates)
def get_station_data(station_id):
    url = f"https://v5.db.transport.rest/stops/{station_id}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching station data for ID {station_id}: {response.status_code}")
        return None

# Function to get stopover IDs for a given journey
def get_stopover_ids(station_id, destination_id, search_string):
    journey_url = f"https://v5.db.transport.rest/journeys?from={station_id}&to={destination_id}"
    journey_params = {'stopovers': True, 'results': 10}
    stopover_ids = []

    journey_found = False
    iteration = 0

    while not journey_found:
        journey_response = requests.get(journey_url, params=journey_params)
        
        if journey_response.status_code == 200:
            journeys = journey_response.json().get('journeys', [])
            
            if not journeys:
                print(f"No journeys found in iteration {iteration}. Exiting loop.")
                break

            for journey in journeys:
                for leg in journey.get('legs', []):
                    operator_name = leg.get('line', {}).get('operator', {}).get('name', '')
                    if search_string in operator_name:
                        print(f"Journey found with operator containing '{search_string}': {operator_name}")
                        stopovers = leg.get('stopovers', [])
                        stopover_ids = [stopover['stop']['id'] for stopover in stopovers if 'stop' in stopover]
                        journey_found = True
                        break
                if journey_found:
                    break
        else:
            print(f"Error: {journey_response.status_code} - {journey_response.text}")
            break

        iteration += 1
        time.sleep(3)  # Delay to avoid overloading the API
    
    return stopover_ids

# Function to fetch coordinates and create a LineString
def create_linestring_from_stopovers(stopover_ids):
    station_data_list = []
    coordinates = []

    for station_id in stopover_ids:
        station_data = get_station_data(station_id)
        if station_data:
            station_info = {
                'id': station_data.get('id'),
                'name': station_data.get('name'),
                'type': station_data.get('type'),
                'longitude': station_data['location'].get('longitude'),
                'latitude': station_data['location'].get('latitude'),
                'location': station_data.get('location')  # Keeping the original location dict
            }
            station_data_list.append(station_info)

            # Collecting coordinates for LineString
            coordinates.append((station_info['longitude'], station_info['latitude']))

    # Create a LineString using Shapely
    if coordinates:
        line = LineString(coordinates)
        print(f"Created LineString: {line}")
        return line
    else:
        print("No valid coordinates found.")
        return None

# Example usage
station_id = 8011160  # Berlin
destination_id = 8000207  # Köln
search_string = "DB"  # Replace with the string you're looking for in operator name

# Get stopover IDs
stopover_ids = get_stopover_ids(station_id, destination_id, search_string)

# Create LineString from stopovers
if stopover_ids:
    line_string = create_linestring_from_stopovers(stopover_ids)
else:
    print("No stopovers found.")

In [None]:
station_id = 8011160
destination_id = 8000207
journey_url = f"https://v5.db.transport.rest/journeys?from={station_id}&to={destination_id}"
journey_params = {'stopovers': True, 'polylines': True, 'results': 10}
journey_response = requests.get(journey_url, params=journey_params)
journey_response.json()
journey_response.json()['journeys'][6]['legs'][0]['line']['operator']['name']

In [None]:
journey_response.json()

In [None]:
import requests
from shapely.geometry import LineString

# Function to fetch polyline for a journey and return it as a LineString
def get_journey_polyline(station_id, destination_id, search_string):
    journey_url = f"https://v5.db.transport.rest/journeys?from={station_id}&to={destination_id}"
    journey_params = {'stopovers': True, 'polylines': True, 'results': 10, 'departure': 'tomorrow at 8am'}
    journey_response = requests.get(journey_url, params=journey_params)

    if journey_response.status_code == 200:
        journeys = journey_response.json().get('journeys', [])
        
        if not journeys:
            print(f"No journeys found for {station_id} to {destination_id}.")
            return None
        
        for journey in journeys:
            for leg in journey.get('legs', []):
                operator_name = leg.get('line', {}).get('operator', {}).get('name', '')
                if search_string in operator_name:
                    print(f"Journey from '{station_id}' to '{destination_id}' found with operator containing '{search_string}': {operator_name}")
                    
                    # Extract polyline data from the leg
                    polyline_data = leg.get('polyline', {}).get('features', [])
                    if polyline_data:
                        # Collect the coordinates from the polyline features
                        coordinates = [feature['geometry']['coordinates'] for feature in polyline_data]
                        
                        # Convert the coordinates into a LineString
                        line_string = LineString(coordinates)
                        return line_string
                    else:
                        print("No polyline data found.")
                        return None
    else:
        print(f"Error fetching journey: {journey_response.status_code} - {journey_response.text}")
        return None

In [None]:
get_journey_polyline(8011160, 8000207, "DB")

In [None]:
# Function to update geometries with polylines in the GeoDataFrame
def update_geometries_with_polylines(geo_connections):
    for index, row in geo_connections.iterrows():
        # Extract the relevant data
        station_id = row['start_id']
        destination_id = row['end_id']
        carrier_name = row['carrier']  # Use this as the 'desired string'
        
        # Fetch the polyline as a LineString
        line_string = get_journey_polyline(station_id, destination_id, carrier_name)
        
        if line_string:
            # Update the geometry column with the new LineString
            geo_connections.at[index, 'geometry'] = line_string
        else:
            print(f"No polyline found for journey {station_id} to {destination_id} with carrier {carrier_name}")

    return geo_connections

# Example usage:
# Assuming geo_connections is your input GeoDataFrame
geo_connections = update_geometries_with_polylines(geo_connections)

In [None]:
geo_connections.explore()

In [None]:
import folium

# Initialize a folium map centered in Europe
m = folium.Map(location=[50.0, 10.0], zoom_start=5, tiles="CartoDB positron")

# Define color mapping for each carrier
carrier_colors = {
    "FlixTrain": "green",
    "WESTbahn": "blue",
    "Trenitalia": "red",
    "RegioJet": "yellow",
    "European Sleeper": "black",
    "LEO Express": "orange"
}

# Add each connection to the map with the specified color by carrier
for _, row in geo_connections.iterrows():
    line = folium.PolyLine(
        locations=[
            [row['geometry'].coords[0][1], row['geometry'].coords[0][0]],  # Start point (lat, long)
            [row['geometry'].coords[1][1], row['geometry'].coords[1][0]]   # End point (lat, long)
        ],
        color=carrier_colors[row['carrier']],
        weight=2,
        tooltip=f"{row['start_name']} to {row['end_name']} ({row['carrier']})"
    )
    line.add_to(m)

# Display the map
m