In [1]:
import requests
import pandas as pd
import datetime
from datetime import datetime, timedelta, time
import networkx as nx



In [2]:
API_KEY = "e0903a87-b8bc-4c0c-ad76-b3ebcd6c9341"

# Gares

Dans cette partie on récupère toutes les gares SNCF pour les grandes lignes et les lignes régionales

In [157]:
def get_stop_points(base_url, api_key):
    """
    Fonction pour récupérer tous les stop_points 
    """
    all_stop_points = []
    page = 0

    #requêtes page par page
    while True:
        params = {
            "count": 1000, 
            "start_page": page}

        r = requests.get(base_url, params=params, auth=(api_key, ""))
        data = r.json()

        stop_points = data.get("stop_points", [])

        if not stop_points:
            break 

        all_stop_points.extend(stop_points)
        page += 1

    return pd.json_normalize(all_stop_points)


In [158]:
def process_station_data(df):
    """
    Fonction pour traiter les données des stations :
    - Extraction des informations administratives
    - Filtrage des données (stations non françaises, non ferroviaires)
    """
    
    # Extraction des informations administratives
    df["admin"] = df["administrative_regions"].apply(
        lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None
    )
    df["admin_id"] = df["admin"].apply(lambda x: x.get("id") if isinstance(x, dict) else None)
    df["admin_zip"] = df["admin"].apply(lambda x: x.get("zip_code") if isinstance(x, dict) else None)
    df["admin_label"] = df["admin"].apply(lambda x: x.get("label") if isinstance(x, dict) else None)
    df["code_insee"] = df["admin"].apply(lambda x: x.get("insee") if isinstance(x, dict) else None)

    # Suppression des colonnes inutiles
    df = df.drop(columns=["administrative_regions", "admin", "links", "equipments"])

    # Retirer la première ligne si elle est vide
    df = df.drop(0, errors='ignore')

    # Retirer les stations non françaises
    #df = df.loc[lambda x: x['admin_id'].str.contains('fr', na=False)]

    # Retirer les lignes non ferroviaires (bus)
    df = df[df['id'].str.contains("Train", na=False)]

    #modifier l'id pour ne garder que les chiffres
    df['id'] = df['id'].apply(lambda x: x.split(':')[2])

    return df[['id','name','coord.lon','coord.lat']]


In [159]:
df_stop_points = get_stop_points('https://api.navitia.io/v1/coverage/sncf/stop_points?', API_KEY)
df_stop_points = process_station_data(df_stop_points).drop_duplicates(subset=['id'])

In [160]:
df_stop_points.head()

Unnamed: 0,id,name,coord.lon,coord.lat
1,87757005,,0.0,0.0
3,80153452,Aachen Hbf,6.0912,50.7675
5,87313759,Abancourt,1.774297,49.685621
9,87317362,Abbeville,1.824538,50.102175
18,87342048,Achiet,2.780112,50.131919


# Routes

In [241]:
url_routes_tgv = "https://api.navitia.io/v1/coverage/sncf/physical_modes/physical_mode:LongDistanceTrain/routes"
url_routes_ter = "https://api.navitia.io/v1/coverage/sncf/physical_modes/physical_mode:Train/routes"

In [242]:
def get_all_routes(url, API_KEY):
    """
    Fonction pour récupérer toutes les routes
    """
    all_routes = []
    page = 0
    
    while True:
        params = {"count": 1000, "start_page": page}
        response = requests.get(url, params=params, auth=(API_KEY, ""))
                
        data = response.json()

        routes = data.get("routes", [])

        if not routes:
            break

        for route in routes:
            all_routes.append({
                "id": route.get("id"),
                "name": route.get("name"),
                "direction.name": route.get("direction", {}).get("name"),
                "line.commercial_mode.name": route.get("line", {}).get("commercial_mode", {}).get("name"),
            })
            
        page += 1 

    return all_routes

In [246]:
df_routes = pd.concat([pd.json_normalize(get_all_routes(url_routes_tgv, API_KEY)),
                        pd.json_normalize(get_all_routes(url_routes_ter, API_KEY))]) 

In [249]:
#fonction qui récupère la liste des stop_points à partir de la route
# LISTE PAS DANS L'ORDRE DE PASSAGE
def get_stop_points_of_route(route_id):
    '''
    fonction pour récupérer les stop points d'une route donnée
    '''
    url = f"https://api.navitia.io/v1/coverage/sncf/routes/{route_id}/stop_points"
    data = requests.get(url, auth=(API_KEY, "")).json()
    
    stop_point_ids = [stop_point.get("id").split(':')[2] for stop_point in data.get("stop_points", [])]
    return stop_point_ids

In [250]:
df_routes['liste_stop_points'] = df_routes['id'].apply(get_stop_points_of_route)
df_routes['len_liste_stop_points'] = df_routes['liste_stop_points'].apply(len)

In [251]:
# fonction qui récupère la liste des stop_points à partir du schedule de la route
# LISTE DANS L'ORDRE DE PASSAGE
def get_stop_from_schedule(route_id):
    url = f"https://api.navitia.io/v1/coverage/sncf/routes/{route_id}/route_schedules"
    try:
        data = requests.get(url, auth=(API_KEY, "")).json()
        
        if not data.get("route_schedules"):
            return []
        
        rows = data["route_schedules"][0].get("table", {}).get("rows", [])
        stop_points = []
        seen_id = set()
        for row in rows:
            stop_id = row.get("stop_point", {}).get("id", "").split(':')[2]
            if stop_id in seen_id:
                break
            seen_id.add(stop_id)
            stop_points.append(stop_id)

        return stop_points
    except Exception as e:
        return []

In [284]:
url = f"https://api.navitia.io/v1/coverage/sncf/routes/route:SNCF:CSR:807100/route_schedules?direction_type=backward"
data = requests.get(url, auth=(API_KEY, "")).json()


#rows = data["route_schedules"][0].get("table", {}).get("rows", [])

In [282]:
stop_points = []
seen_id = set()
for row in rows:
    stop_id = row.get("stop_point", {}).get("id", "").split(':')[2]
    if stop_id in seen_id:
        break
    seen_id.add(stop_id)
    stop_points.append(stop_id)

In [279]:
stop_points

['87113001', '87212027', '80143099', '80143198', '80143255', '80143503']

In [252]:
df_routes['stop_points'] = df_routes['id'].apply(get_stop_from_schedule)

In [260]:
df_routes.loc[lambda x: x.name.str.contains('Strasbourg')]

Unnamed: 0,id,name,direction.name,line.commercial_mode.name,liste_stop_points,len_liste_stop_points,stop_points,nb_stop_points
81,route:SNCF:CSR:333100,Paris Est - Strasbourg,Paris Est (Paris),OUIGO,"[87192039, 87113001, 87212027]",3,"[87212027, 87192039, 87113001]",3
121,route:SNCF:CSR:061300,Bordeaux Saint-Jean - Strasbourg,Bordeaux Saint-Jean (Bordeaux),TGV INOUI,"[87271494, 87484006, 87583005, 87581009, 87171...",13,"[87212027, 87142109, 87147322, 87171926, 87271...",11
125,route:SNCF:CSR:005400,Bruxelles-Midi / Brussel-Zuid - Strasbourg,Strasbourg (Strasbourg),TGV INOUI,"[87271494, 88140010, 87171926, 87223263, 87142...",9,"[87212027, 87142109, 87147322, 87171926, 87271...",8
147,route:SNCF:CSR:061100,Nantes - Strasbourg,Strasbourg (Strasbourg),TGV INOUI,"[87271494, 87484006, 87171926, 87396002, 87142...",11,"[87212027, 87142109, 87147322, 87171926, 87271...",11
149,route:SNCF:CSR:071100,Nice - Strasbourg,Marseille Saint-Charles (Marseille),TGV INOUI,"[87319012, 87757674, 87318964, 87713545, 87300...",24,"[87756056, 87757674, 87757625, 87757526, 87755...",24
173,route:SNCF:CSR:061200,Rennes - Strasbourg,Strasbourg (Strasbourg),TGV INOUI,"[87271494, 87476200, 87171926, 87396002, 87476...",13,"[87212027, 87142109, 87147322, 87171926, 87271...",13
175,route:SNCF:CSR:807100,Strasbourg - Paris Est,Paris Est (Paris),TGV INOUI,"[87171926, 80143503, 80143198, 87142109, 87147...",9,"[87113001, 87212027, 80143099, 80143198, 80143...",6


In [267]:
# afficher l'ordre pour un exemple
for id in df_routes.iloc[175]['stop_points']:
    print(df_stop_points.loc[df_stop_points['id'] == id])

            id       name coord.lon  coord.lat
4114  87113001  Paris Est  2.359296  48.876793
            id        name coord.lon  coord.lat
5404  87212027  Strasbourg  7.734793  48.584532
            id       name coord.lon coord.lat
4007  80143099  Offenburg    7.9468   48.4765
            id             name coord.lon coord.lat
2623  80143198  Lahr (Schwarzw)    7.8353   48.3406
            id                   name coord.lon coord.lat
4499  80143255  Ringsheim Europa-Park    7.7732   48.2489
            id                     name coord.lon coord.lat
1880  80143503  Freiburg (Breisgau) Hbf    7.8416   47.9979


In [255]:
df_routes['nb_stop_points'] = df_routes['stop_points'].apply(len)
df_routes.loc[lambda x: x.nb_stop_points != x.len_liste_stop_points].head()

Unnamed: 0,id,name,direction.name,line.commercial_mode.name,liste_stop_points,len_liste_stop_points,stop_points,nb_stop_points
5,route:SNCF:FR:Line::CB496029-4727-4221-A130-52...,K14,Quimper (Quimper),Aléop,"[87476200, 87481051, 87481820, 87481804, 87476...",20,"[87481002, 87481838, 87481846, 87471300, 87476...",12
7,route:SNCF:FR:Line::c21468e7-fa90-4c2a-81bc-7f...,K39,Caen (Caen),Aléop,"[87444711, 87444539, 87396549, 87396572, 87444...",22,"[87444000, 87444208, 87444216, 87444539, 87444...",15
8,route:SNCF:FR:Line::C44E558F-5178-474C-9A86-E2...,P1,Tours (Tours),Aléop,"[87481192, 87484006, 87484352, 87571836, 87484...",21,"[87481002, 87481192, 87484006, 87701532, 87487...",9
9,route:SNCF:FR:Line::D8F5033E-0343-41AB-A29B-E9...,P3,Nogent-le-Rotrou (Nogent-le-Rotrou),Aléop,"[87394205, 87394270, 87396275, 87394007, 87394...",22,"[87396002, 87396275, 87396283, 87396291, 87396...",9
11,route:SNCF:FR:Line::124908e5-9f85-4eaf-876e-45...,P30,Tours (Tours),Aléop,"[87444711, 87396549, 87396572, 87396606, 87396...",22,"[87444711, 87396093, 87396077, 87396051, 87396...",16


## méthode brutale : une route par combinaison d'arrêt existante

In [None]:
# fonction qui récupère la liste des combinaisons d'arrêt à partir du schedule de la route
def get_stop_from_schedule(route_id):
    url = f"https://api.navitia.io/v1/coverage/sncf/routes/{route_id}/route_schedules"
    try:
        data = requests.get(url, auth=(API_KEY, "")).json()
        if not data.get("route_schedules"):
            return []
        route_schedules = data.get("route_schedules", [])        
        stop_combinations = set()
        for schedule in route_schedules:
            stop_times = schedule.get("table", {}).get("rows", [])

            for row in stop_times:
                pattern = []

                for column in row.get("date_times"):
                    stop_point = column.get("stop_point", {})
                    stop_id = stop_points.get("id", "").split(':')[2]
                    stop_name = stop_point.get("name")
                    pattern.append((stop_id, stop_name))
                stop_combinations.add(tuple(pattern))
        return [list(combination) for combination in stop_combinations]
    except Exception as e:
        return []

# Trains et horaires

on cherche à récupérer les trains pour une route donnée (Paris -> Strasbourg par exemple) \
pour ce faire on requête l'objet vehicle_journeys 

In [74]:
def get_vehicle_journeys(date_start, route_id):
    '''
    fonction pour récupérer les vehicle_journeys d'une route donnée à une date donnée
    '''
    url = f"https://api.navitia.io/v1/coverage/sncf/routes/{route_id}/vehicle_journeys"
    params = {
        "from_datetime": date_start.strftime("%Y%m%dT%H%M%S"),
        "to_datetime": (date_start + pd.Timedelta(days=1)).strftime("%Y%m%dT%H%M%S"),
    }
    data = requests.get(url, params=params, auth=(API_KEY, "")).json()
    return data.get("vehicle_journeys", [])

In [90]:
trains = pd.json_normalize(get_vehicle_journeys(pd.to_datetime("2025-12-14 00:00:00"), 'route:SNCF:CSR:333100'))

In [96]:
def extract_stops_with_times(df):
    """
    Extrait les arrêts et leurs horaires pour chaque trajet.
    
    Args:
        df : DataFrame contenant une colonne 'stop_times'.
    
    Returns:
        list: Une liste contenant les arrêts pour chaque trajet.
    """
    all_trips_stops = []

    for stop_times in df['stop_times']:
        trip_stops = []

        for stop in stop_times:
            stop_info = {
                'stop_id': stop['stop_point']['id'],
                'stop_name': stop['stop_point']['name'],
                'arrival_time': stop.get('arrival_time', None),  
                'departure_time': stop.get('departure_time', None),  
                'pickup_allowed': stop.get('pickup_allowed', False),
                'drop_off_allowed': stop.get('drop_off_allowed', False),
            }
            trip_stops.append(stop_info)

        all_trips_stops.append(trip_stops)

    return all_trips_stops

In [91]:
trains['stops'] = extract_stops_with_times(trains)

In [98]:
trains['stops'][1]

[{'stop_id': 'stop_point:SNCF:87212027:LongDistanceTrain',
  'stop_name': 'Strasbourg',
  'arrival_time': '125000',
  'departure_time': '125000',
  'pickup_allowed': True,
  'drop_off_allowed': False},
 {'stop_id': 'stop_point:SNCF:87192039:LongDistanceTrain',
  'stop_name': 'Metz',
  'arrival_time': '134100',
  'departure_time': '135800',
  'pickup_allowed': True,
  'drop_off_allowed': True},
 {'stop_id': 'stop_point:SNCF:87113001:LongDistanceTrain',
  'stop_name': 'Paris Est',
  'arrival_time': '152100',
  'departure_time': '152100',
  'pickup_allowed': False,
  'drop_off_allowed': True}]

In [None]:
def decompose_time(hhmmss: str) -> tuple[int, int, int]:
    hh = int(hhmmss[0:2])
    mm = int(hhmmss[2:4])
    ss = int(hhmmss[4:6])
    return hh, mm, ss

def get_stop_times(trajet):
    date = trajet['id'].split(':')[2]
    date = datetime.strptime(date, "%Y-%m-%d")

    last_time_in_seconds = None
    stops = []

    for st in trajet["stop_times"]:
        raw_arr = st.get("arrival_time")
        raw_dep = st.get("departure_time")

        # convertir HHMMSS en (h, m, s)
        h_arr, m_arr, s_arr = decompose_time(raw_arr) if raw_arr else (0, 0, 0)
        h_dep, m_dep, s_dep = decompose_time(raw_dep) if raw_dep else (0, 0, 0)

        # calculer secondes depuis 00:00:00
        arr_seconds = h_arr * 3600 + m_arr * 60 + s_arr

        # si on détecte que l'heure "recule", c’est qu’on a traversé minuit
        if last_time_in_seconds is not None and arr_seconds < last_time_in_seconds:
            date += timedelta(days=1)

        # construire les datetime complets
        arrival_dt = datetime.combine(date, datetime.min.time()).replace(
            hour=h_arr, minute=m_arr, second=s_arr
        )
        departure_dt = datetime.combine(date, datetime.min.time()).replace(
            hour=h_dep, minute=m_dep, second=s_dep
        )

        stops.append({
            "stop_point_id": st["stop_point"]["id"],
            "stop_name": st["stop_point"]["name"],
            "arrival_dt": arrival_dt,
            "departure_dt": departure_dt,
        })

        # mettre à jour la dernière heure du dernier arrêt parcouru 
        last_time_in_seconds = arr_seconds
        
    return stops

In [None]:
get_stop_times(trains.iloc[6])

# Tous les trains 

In [None]:
#récupérer tous les journeys pour une date donnée
def get_all_journeys(since):
    """"
        Fonction pour récupérer tous les vehicle_journeys à partir de la date 'since' jusqu'à 'since + 24h'
        Args:
            since (datetime): date de début pour la récupération des vehicle_journeys
    """
    until = since + timedelta(hours=24)
    since_str = since.strftime("%Y%m%dT%H%M%S")
    until_str = until.strftime("%Y%m%dT%H%M%S")

    page = 0
    headers = {"Authorization": API_KEY}
    all_vj = []
    
    while True:
        
        url = f"https://api.navitia.io/v1/coverage/sncf/physical_modes/physical_mode:LongDistanceTrain/vehicle_journeys?since={since_str}&until={until_str}&count=500&start_page={page}"
        r = requests.get(url, headers=headers)
        
        if r.status_code != 200:
            break

        data = r.json()

        if "vehicle_journeys" in data:
            all_vj.extend(data["vehicle_journeys"])

        if not data.get("vehicle_journeys"):
            break

        page += 1
        
    return all_vj

In [None]:
since_date = datetime(2025, 12, 5, 0, 0, 0)
vehicle_journeys = get_all_journeys(since_date)

In [None]:
df_vehicle_journeys = pd.json_normalize(vehicle_journeys)

In [None]:
df_vehicle_journeys

In [None]:
df_vehicle_journeys['stop_times'][0]

In [88]:
def extract_stops_with_times(df_vehicle_journeys):
    """
    Extrait les arrêts et leurs horaires pour chaque trajet.
    
    Args:
        df_vehicle_journeys (DataFrame): DataFrame contenant une colonne 'stop_times'.
    
    Returns:
        list: Une liste contenant les arrêts pour chaque trajet.
    """
    all_trips_stops = []

    for stop_times in df_vehicle_journeys['stop_times']:
        trip_stops = []

        for stop in stop_times:
            stop_info = {
                'stop_id': stop['stop_point']['id'],
                'stop_name': stop['stop_point']['name'],
                'arrival_time': stop.get('arrival_time', None),  
                'departure_time': stop.get('departure_time', None),  
                'pickup_allowed': stop.get('pickup_allowed', False),
                'drop_off_allowed': stop.get('drop_off_allowed', False),
            }
            trip_stops.append(stop_info)

        all_trips_stops.append(trip_stops)

    return all_trips_stops

In [None]:
df_vehicle_journeys['stops'] = extract_stops_with_times(df_vehicle_journeys)

# Graph

In [None]:
def build_graph_from_vehicle_journeys(df_vehicle_journeys):
    """Créer un graphe orienté à partir des vehicle_journeys"""
    G = nx.DiGraph()

    # Parcourir chaque vehicle_journey
    for _, row in df_vehicle_journeys.iterrows():
        stop_times = row['stop_times']  # Liste des arrêts pour ce train

        # Parcourir les gares consécutives pour créer des arêtes
        for i in range(len(stop_times) - 1):
            current_stop = stop_times[i]
            next_stop = stop_times[i + 1]

            from_stop_id = current_stop['stop_point']['id']
            from_stop_name = current_stop['stop_point']['name']
            to_stop_id = next_stop['stop_point']['id']
            to_stop_name = next_stop['stop_point']['name']

            # Calculer la durée de trajet en secondes
            departure_time = current_stop['departure_time']
            arrival_time = next_stop['arrival_time']
            departure_seconds = hhmmss_to_seconds(departure_time)
            arrival_seconds = hhmmss_to_seconds(arrival_time)
            duration = arrival_seconds - departure_seconds

            # Ajouter les nœuds (gares) au graphe
            G.add_node(from_stop_id, name=from_stop_name)
            G.add_node(to_stop_id, name=to_stop_name)

            # Ajouter une arête entre les deux gares
            G.add_edge(
                from_stop_id,
                to_stop_id,
                weight=duration,  # pondération par rapport au temps de trajet
                train=row['id'],
                departure_time=departure_time,
                arrival_time=arrival_time 
            )

    return G

In [None]:
G = build_graph_from_vehicle_journeys(df_vehicle_journeys)

print(f"Nombre de nœuds : {G.number_of_nodes()}")
print(f"Nombre d'arêtes : {G.number_of_edges()}")

In [None]:
G.nodes