In [157]:
import requests
import pandas as pd
import datetime
from datetime import datetime, timedelta

In [None]:
API_KEY = ""

# Gares

In [11]:
def get_stop_points(base_url, api_key):
    """
    Fonction pour récupérer tous les stop_points 
    """
    all_stop_points = []
    page = 0

    #requêtes page par page
    while True:
        params = {
            "count": 1000, 
            "start_page": page}

        r = requests.get(base_url, params=params, auth=(api_key, ""))
        data = r.json()

        stop_points = data.get("stop_points", [])

        if not stop_points:
            break 

        all_stop_points.extend(stop_points)
        page += 1

    return pd.json_normalize(all_stop_points)


In [190]:
# récupérer les stop_points des longues distances et train régionaux
df_stations_1 = get_stop_points('https://api.navitia.io/v1/coverage/sncf/physical_modes/physical_mode:LongDistanceTrain/stop_points?', API_KEY)
df_stations_2 = get_stop_points('https://api.navitia.io/v1/coverage/sncf/physical_modes/physical_mode:Train/stop_points?', API_KEY)

In [191]:
def process_station_data(df):
    """
    Fonction pour traiter les données des stations :
    - Extraction des informations administratives
    - Filtrage des données (stations non françaises, non ferroviaires)
    """
    
    # Extraction des informations administratives
    df["admin"] = df["administrative_regions"].apply(
        lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None
    )
    df["admin_id"] = df["admin"].apply(lambda x: x.get("id") if isinstance(x, dict) else None)
    df["admin_zip"] = df["admin"].apply(lambda x: x.get("zip_code") if isinstance(x, dict) else None)
    df["admin_label"] = df["admin"].apply(lambda x: x.get("label") if isinstance(x, dict) else None)
    df["code_insee"] = df["admin"].apply(lambda x: x.get("insee") if isinstance(x, dict) else None)

    # Suppression des colonnes inutiles
    df = df.drop(columns=["administrative_regions", "admin", "links", "equipments"])

    # Retirer la première ligne si elle est vide
    df = df.drop(0, errors='ignore')

    # Retirer les stations non françaises
    df = df.loc[lambda x: x['admin_id'].str.contains('fr', na=False)]

    # Retirer les lignes non ferroviaires (bus)
    df = df[df['id'].str.contains("Train", na=False)]

    return df


In [193]:
df_stations_2 = process_station_data(df_stations_2)
df_stations_1 = process_station_data(df_stations_1)

In [194]:
#il y a des stations TGV qui ne sont pas des LongDistanceTrain, on les retire
df_stations_1 = df_stations_1.loc[df_stations_1['id'].str.contains("LongDistanceTrain", na=False)]

In [195]:
df_stations_2.loc[lambda x: x['id'].str.contains('LongDistanceTrain', na=False)]

Unnamed: 0,id,name,label,coord.lon,coord.lat,stop_area.id,stop_area.name,stop_area.codes,stop_area.timezone,stop_area.label,stop_area.coord.lon,stop_area.coord.lat,stop_area.links,admin_id,admin_zip,admin_label,code_insee
3,stop_point:SNCF:87342048:LongDistanceTrain,Achiet,Achiet (Achiet-le-Grand),2.780112,50.131919,stop_area:SNCF:87342048,Achiet,"[{'type': 'source', 'value': '87342048'}, {'ty...",Europe/Paris,Achiet (Achiet-le-Grand),2.780112,50.131919,[],admin:fr:62005,62121,Achiet-le-Grand (62121),62005
560,stop_point:SNCF:87781161:LongDistanceTrain,Coursan,Coursan (Coursan),3.050946,43.233731,stop_area:SNCF:87781161,Coursan,"[{'type': 'source', 'value': '87781161'}, {'ty...",Europe/Paris,Coursan (Coursan),3.050946,43.233731,[],admin:fr:11106,11110,Coursan (11110),11106
1362,stop_point:SNCF:87781294:LongDistanceTrain,Marseillan Plage,Marseillan Plage (Marseillan),3.535629,43.318258,stop_area:SNCF:87781294,Marseillan Plage,"[{'type': 'source', 'value': '87781294'}, {'ty...",Europe/Paris,Marseillan Plage (Marseillan),3.535629,43.318258,[],admin:fr:34150,34340,Marseillan (34340),34150
1435,stop_point:SNCF:87747329:LongDistanceTrain,Moirans,Moirans (Moirans),5.581934,45.32216,stop_area:SNCF:87747329,Moirans,"[{'type': 'source', 'value': '87747329'}, {'ty...",Europe/Paris,Moirans (Moirans),5.581934,45.32216,[],admin:fr:38239,38430,Moirans (38430),38239
2309,stop_point:SNCF:87781260:LongDistanceTrain,Vias,Vias (Vias),3.425356,43.315653,stop_area:SNCF:87781260,Vias,"[{'type': 'source', 'value': '87781260'}, {'ty...",Europe/Paris,Vias (Vias),3.425356,43.315653,[],admin:fr:34332,34450,Vias (34450),34332


In [197]:
df_stations_1.head()

Unnamed: 0,id,name,label,coord.lon,coord.lat,stop_area.id,stop_area.name,stop_area.codes,stop_area.timezone,stop_area.label,stop_area.coord.lon,stop_area.coord.lat,stop_area.links,admin_id,admin_zip,admin_label,code_insee
3,stop_point:SNCF:87313759:LongDistanceTrain,Abancourt,Abancourt (Abancourt),1.774297,49.685621,stop_area:SNCF:87313759,Abancourt,"[{'type': 'source', 'value': '87313759'}, {'ty...",Europe/Paris,Abancourt (Abancourt),1.774297,49.685621,[],admin:fr:60001,60220,Abancourt (60220),60001
4,stop_point:SNCF:87342048:LongDistanceTrain,Achiet,Achiet (Achiet-le-Grand),2.780112,50.131919,stop_area:SNCF:87342048,Achiet,"[{'type': 'source', 'value': '87342048'}, {'ty...",Europe/Paris,Achiet (Achiet-le-Grand),2.780112,50.131919,[],admin:fr:62005,62121,Achiet-le-Grand (62121),62005
5,stop_point:SNCF:87271494:LongDistanceTrain,Aéroport Charles de Gaulle 2 TGV,Aéroport Charles de Gaulle 2 TGV (Tremblay-en-...,2.570812,49.003618,stop_area:SNCF:87271494,Aéroport Charles de Gaulle 2 TGV,"[{'type': 'source', 'value': '87271494'}, {'ty...",Europe/Paris,Aéroport Charles de Gaulle 2 TGV (Tremblay-en-...,2.570812,49.003618,[],admin:fr:93073,93290,Tremblay-en-France (93290),93073
6,stop_point:SNCF:87781278:LongDistanceTrain,Agde,Agde (Agde),3.466203,43.31728,stop_area:SNCF:87781278,Agde,"[{'type': 'source', 'value': '87781278'}, {'ty...",Europe/Paris,Agde (Agde),3.466203,43.31728,[],admin:fr:34003,34300,Agde (34300),34003
8,stop_point:SNCF:87586008:LongDistanceTrain,Agen,Agen (Agen),0.620867,44.207967,stop_area:SNCF:87586008,Agen,"[{'type': 'source', 'value': '87586008'}, {'ty...",Europe/Paris,Agen (Agen),0.620867,44.207967,[],admin:fr:47001,47000,Agen (47000),47001


# Lignes 

In [63]:
url_lines = "https://api.navitia.io/v1/coverage/sncf/physical_modes/physical_mode:LongDistanceTrain/lines"

In [64]:
def get_all_lines(url, API_KEY):
    """
    Fonction pour récupérer toutes les lignes
    """
    all_lines = []
    page = 0
    
    while True:
        params = {"count": 1000, "start_page": page}
        response = requests.get(url, params=params, auth=(API_KEY, ""))
                
        data = response.json()

        lines = data.get("lines", [])

        if not lines:
            break

        all_lines.extend(lines)
        page += 1 

    return all_lines

In [65]:
lignes = get_all_lines(url_lines, API_KEY)
df_lignes = pd.json_normalize(get_all_lines(url_lines, API_KEY))

In [66]:
df_lignes['commercial_mode.name'].unique()

array(['Aléop', 'BreizhGo', 'DB SNCF', 'Eurostar', 'FLUO', 'Intercités',
       'Intercités de nuit', 'LEX', 'MOBIGO', 'NOMAD', 'OUIGO',
       'OUIGO Train Classique', 'REGIONAURA', 'Rémi', 'Rémi Exp', 'SNCF',
       'TER', 'TER HDF', 'TER NA', 'TGV INOUI', 'TGV Lyria', 'ZOU !',
       'liO', 'additional service'], dtype=object)

In [71]:
def get_routes_of_line(line_id):
    '''
    fonction pour récupérer les routes d'une ligne donnée
    '''
    url = f"https://api.navitia.io/v1/coverage/sncf/lines/{line_id}/routes"
    data = requests.get(url, auth=(API_KEY, "")).json()
    return data.get("routes", [])

def get_stop_points_of_route(route_id):
    '''
    fonction pour récupérer les stop points d'une route donnée
    '''
    url = f"https://api.navitia.io/v1/coverage/sncf/routes/{route_id}/stop_points"
    data = requests.get(url, auth=(API_KEY, "")).json()
    return data.get("stop_points", [])

In [None]:
def get_all_stop_points(all_lines):
    '''
    fonction qui récupère tous les arrêts de toutes les routes
    '''
    all_rows = []

    for line in all_lines:
        line_id = line["id"]
        line_name = line.get("name", "")
        routes = get_routes_of_line(line_id)

        for route in routes:
            route_id = route["id"]
            route_name = route.get("name", "")

            stop_points = get_stop_points_of_route(route_id)

            for sp in stop_points:
                all_rows.append({
                    "line_id": line_id,
                    "line_name": line_name,
                    "route_id": route_id,
                    "route_name": route_name,
                    "stop_point_id": sp["id"],
                    "stop_point_name": sp.get("name", ""),
                    "lon": float(sp["coord"]["lon"]),
                    "lat": float(sp["coord"]["lat"]),
                })

    return pd.DataFrame(all_rows)

In [72]:
arrets = get_all_stop_points(lignes)

In [112]:
#retirer les bus et autres non ferroviaires
arrets = arrets.loc[lambda x: (x.stop_point_id.str.contains('Train'))] 

In [None]:
arrets.loc[lambda x: x.line_name == 'Paris Est - Strasbourg']

Unnamed: 0,line_id,line_name,route_id,route_name,stop_point_id,stop_point_name,lon,lat
992,line:SNCF:CSR:333100,Paris Est - Strasbourg,route:SNCF:CSR:333100,Paris Est - Strasbourg,stop_point:SNCF:87192039:LongDistanceTrain,Metz,6.177052,49.109466
993,line:SNCF:CSR:333100,Paris Est - Strasbourg,route:SNCF:CSR:333100,Paris Est - Strasbourg,stop_point:SNCF:87113001:LongDistanceTrain,Paris Est,2.359296,48.876793
994,line:SNCF:CSR:333100,Paris Est - Strasbourg,route:SNCF:CSR:333100,Paris Est - Strasbourg,stop_point:SNCF:87212027:LongDistanceTrain,Strasbourg,7.734793,48.584532


In [None]:
#tous les arrêts des lignes Paris - Strasbourg
arrets.loc[lambda x: x.line_name.str.contains('Paris') & x.line_name.str.contains('Strasbourg')]

Unnamed: 0,line_id,line_name,route_id,route_name,stop_point_id,stop_point_name,lon,lat
992,line:SNCF:CSR:333100,Paris Est - Strasbourg,route:SNCF:CSR:333100,Paris Est - Strasbourg,stop_point:SNCF:87192039:LongDistanceTrain,Metz,6.177052,49.109466
993,line:SNCF:CSR:333100,Paris Est - Strasbourg,route:SNCF:CSR:333100,Paris Est - Strasbourg,stop_point:SNCF:87113001:LongDistanceTrain,Paris Est,2.359296,48.876793
994,line:SNCF:CSR:333100,Paris Est - Strasbourg,route:SNCF:CSR:333100,Paris Est - Strasbourg,stop_point:SNCF:87212027:LongDistanceTrain,Strasbourg,7.734793,48.584532
2239,line:SNCF:CSR:807100,Strasbourg - Paris Est,route:SNCF:CSR:807100,Strasbourg - Paris Est,stop_point:SNCF:87171926:LongDistanceTrain,Champagne-Ardenne TGV,3.994523,49.214769
2240,line:SNCF:CSR:807100,Strasbourg - Paris Est,route:SNCF:CSR:807100,Strasbourg - Paris Est,stop_point:SNCF:87182014:LongDistanceTrain,Colmar,7.347756,48.073527
2241,line:SNCF:CSR:807100,Strasbourg - Paris Est,route:SNCF:CSR:807100,Strasbourg - Paris Est,stop_point:SNCF:80143503:LongDistanceTrain,Freiburg (Breisgau) Hbf,7.8416,47.9979
2242,line:SNCF:CSR:807100,Strasbourg - Paris Est,route:SNCF:CSR:807100,Strasbourg - Paris Est,stop_point:SNCF:80143198:LongDistanceTrain,Lahr (Schwarzw),7.8353,48.3406
2243,line:SNCF:CSR:807100,Strasbourg - Paris Est,route:SNCF:CSR:807100,Strasbourg - Paris Est,stop_point:SNCF:87142109:LongDistanceTrain,Lorraine TGV,6.169778,48.947713
2244,line:SNCF:CSR:807100,Strasbourg - Paris Est,route:SNCF:CSR:807100,Strasbourg - Paris Est,stop_point:SNCF:87147322:LongDistanceTrain,Meuse TGV,5.271024,48.978583
2245,line:SNCF:CSR:807100,Strasbourg - Paris Est,route:SNCF:CSR:807100,Strasbourg - Paris Est,stop_point:SNCF:80143099:LongDistanceTrain,Offenburg,7.9468,48.4765


# Trains et horaires

on cherche à récupérer les trains pour une route donnée (Paris -> Strasbourg par exemple) \
pour ce faire on requête l'objet vehicle_journeys 

In [137]:
def get_vehicle_journeys(date_start, route_id):
    '''
    fonction pour récupérer les vehicle_journeys d'une route donnée à une date donnée
    '''
    url = f"https://api.navitia.io/v1/coverage/sncf/routes/{route_id}/vehicle_journeys"
    params = {
        "from_datetime": date_start.strftime("%Y%m%dT%H%M%S"),
        "to_datetime": (date_start + pd.Timedelta(days=1)).strftime("%Y%m%dT%H%M%S"),
    }
    data = requests.get(url, params=params, auth=(API_KEY, "")).json()
    return data.get("vehicle_journeys", [])

In [149]:
trains = pd.json_normalize(get_vehicle_journeys(pd.to_datetime("2025-11-29 00:00:00"), 'route:SNCF:CSR:333100'))

In [144]:
trains['stop_times'] = trains['stop_times'].apply(lambda x: x[0] if isinstance(x, list) else [])

In [205]:
trains.head()

Unnamed: 0,id,name,stop_times,codes,calendars,disruptions,headsign,journey_pattern.id,journey_pattern.name,validity_pattern.beginning_date,validity_pattern.days,trip.id,trip.name
0,vehicle_journey:SNCF:2025-11-28:7691:1187:Long...,7691,"[{'arrival_time': '100800', 'utc_arrival_time'...","[{'type': 'rt_piv', 'value': '2025-11-28:7691:...","[{'week_pattern': {'monday': False, 'tuesday':...",[],7691,journey_pattern:2850,journey_pattern:2850,20251127,0000000000000000000000000000000000000000000000...,SNCF:2025-11-28:7691:1187:LongDistanceTrain,7691
1,vehicle_journey:SNCF:2025-11-28:7692:1187:Long...,7692,"[{'arrival_time': '125000', 'utc_arrival_time'...","[{'type': 'rt_piv', 'value': '2025-11-28:7692:...","[{'week_pattern': {'monday': False, 'tuesday':...","[{'templated': False, 'rel': 'disruptions', 'i...",7692,journey_pattern:2851,journey_pattern:2851,20251127,0000000000000000000000000000000000000000000000...,SNCF:2025-11-28:7692:1187:LongDistanceTrain,7692
2,vehicle_journey:SNCF:2025-11-28:7693:1187:Long...,7693,"[{'arrival_time': '161600', 'utc_arrival_time'...","[{'type': 'rt_piv', 'value': '2025-11-28:7693:...","[{'week_pattern': {'monday': False, 'tuesday':...","[{'templated': False, 'rel': 'disruptions', 'i...",7693,journey_pattern:2852,journey_pattern:2852,20251127,0000000000000000000000000000000000000000000000...,SNCF:2025-11-28:7693:1187:LongDistanceTrain,7693
3,vehicle_journey:SNCF:2025-11-28:7694:1187:Long...,7694,"[{'arrival_time': '195700', 'utc_arrival_time'...","[{'type': 'rt_piv', 'value': '2025-11-28:7694:...","[{'week_pattern': {'monday': False, 'tuesday':...",[],7694,journey_pattern:2851,journey_pattern:2851,20251127,0000000000000000000000000000000000000000000000...,SNCF:2025-11-28:7694:1187:LongDistanceTrain,7694
4,vehicle_journey:SNCF:2025-11-29:7691:1187:Long...,7691,"[{'arrival_time': '100800', 'utc_arrival_time'...","[{'type': 'rt_piv', 'value': '2025-11-29:7691:...","[{'week_pattern': {'monday': False, 'tuesday':...","[{'templated': False, 'rel': 'disruptions', 'i...",7691,journey_pattern:2850,journey_pattern:2850,20251127,0000000000000000000000000000000000000000000000...,SNCF:2025-11-29:7691:1187:LongDistanceTrain,7691


In [None]:
def decompose_time(hhmmss: str) -> tuple[int, int, int]:
    hh = int(hhmmss[0:2])
    mm = int(hhmmss[2:4])
    ss = int(hhmmss[4:6])
    return hh, mm, ss

def get_stop_times(trajet):
    date = trajet['id'].split(':')[2]
    date = datetime.strptime(date, "%Y-%m-%d")

    last_time_in_seconds = None
    stops = []

    for st in trajet["stop_times"]:
        raw_arr = st.get("arrival_time")
        raw_dep = st.get("departure_time")

        # convertir HHMMSS en (h, m, s)
        h_arr, m_arr, s_arr = decompose_time(raw_arr) if raw_arr else (0, 0, 0)
        h_dep, m_dep, s_dep = decompose_time(raw_dep) if raw_dep else (0, 0, 0)

        # calculer secondes depuis 00:00:00
        arr_seconds = h_arr * 3600 + m_arr * 60 + s_arr

        # si on détecte que l'heure "recule", c’est qu’on a traversé minuit
        if last_time_in_seconds is not None and arr_seconds < last_time_in_seconds:
            date += timedelta(days=1)

        # construire les datetime complets
        arrival_dt = datetime.combine(date, datetime.min.time()).replace(
            hour=h_arr, minute=m_arr, second=s_arr
        )
        departure_dt = datetime.combine(date, datetime.min.time()).replace(
            hour=h_dep, minute=m_dep, second=s_dep
        )

        stops.append({
            "stop_point_id": st["stop_point"]["id"],
            "stop_name": st["stop_point"]["name"],
            "arrival_dt": arrival_dt,
            "departure_dt": departure_dt,
        })

        # mettre à jour la dernière heure du dernier arrêt parcouru 
        last_time_in_seconds = arr_seconds
        
    return stops

In [187]:
get_stop_times(trains.iloc[3])

[{'stop_point_id': 'stop_point:SNCF:87212027:LongDistanceTrain',
  'stop_name': 'Strasbourg',
  'arrival_dt': datetime.datetime(2025, 11, 28, 19, 57),
  'departure_dt': datetime.datetime(2025, 11, 28, 19, 57)},
 {'stop_point_id': 'stop_point:SNCF:87192039:LongDistanceTrain',
  'stop_name': 'Metz',
  'arrival_dt': datetime.datetime(2025, 11, 28, 20, 47),
  'departure_dt': datetime.datetime(2025, 11, 28, 21, 1)},
 {'stop_point_id': 'stop_point:SNCF:87113001:LongDistanceTrain',
  'stop_name': 'Paris Est',
  'arrival_dt': datetime.datetime(2025, 11, 28, 22, 24),
  'departure_dt': datetime.datetime(2025, 11, 28, 22, 24)}]