üöå Projet MDM - Mobilit√© Durable en Montagne ‚õ∞Ô∏è

*Author : Laurent Sorba*

*Date : 04/07/2025*

**Description :**

This notebook filters and extracts public transportation data (GTFS format) according to a department number. In our case specifically for the Is√®re department (code 38) in France.
The notebook downloads GTFS (General Transit Feed Specification) data from various transportation providers and uses precise administrative boundaries to spatially filter bus stops and routes that operate within the department boundaries.

See https://github.com/data-for-good-grenoble/mobilite_durable/issues/13

## D√©finition des fonctions

In [None]:
import io
import json
import zipfile
from pathlib import Path

import geopandas as gpd
import pandas as pd
import requests
from shapely.geometry import Point


# 1. T√©l√©charger et extraire les donn√©es GTFS
def download_gtfs_data(gtfs_url):
    """T√©l√©charge et extrait les donn√©es GTFS"""
    print(f"=== T√©l√©charge les donn√©es GTFS depuis {gtfs_url} ===")
    response = requests.get(gtfs_url)
    with zipfile.ZipFile(io.BytesIO(response.content)) as zip_file:
        # Extraire les fichiers n√©cessaires
        stops_df = pd.read_csv(zip_file.open("stops.txt"))
        print(f"Charg√© {len(stops_df)} arr√™ts")
        routes_df = pd.read_csv(zip_file.open("routes.txt"))
        print(f"Charg√© {len(routes_df)} lignes")

        # Autres fichiers GTFS si n√©cessaire
        try:
            stop_times_df = pd.read_csv(zip_file.open("stop_times.txt"))
            print(f"(Charg√© {len(routes_df)} stop_times)")
            trips_df = pd.read_csv(zip_file.open("trips.txt"))
            print(f"(Charg√© {len(routes_df)} trips)")
        except KeyError:
            stop_times_df = None
            trips_df = None

    return stops_df, routes_df, stop_times_df, trips_df


# 2. Filtrer les arr√™ts par d√©partement de l'Is√®re par d√©faut
def filter_stops_by_department(stops_df, department_code="38"):
    """Filtre les arr√™ts par code d√©partement"""
    return filter_with_administrative_boundaries(stops_df, department_code)


# 3. Filtrer les lignes de transport correspondant
def filter_routes_by_stops(routes_df, filtered_stops, trips_df=None, stop_times_df=None):
    """Filtre les lignes qui desservent les arr√™ts filtr√©s"""
    if trips_df is not None and stop_times_df is not None:
        # Trouver les trips qui passent par les arr√™ts filtr√©s
        department_stop_ids = filtered_stops["stop_id"].unique()
        trips_with_department_stops = stop_times_df[
            stop_times_df["stop_id"].isin(department_stop_ids)
        ]["trip_id"].unique()

        # Trouver les routes correspondantes
        routes_in_department = trips_df[trips_df["trip_id"].isin(trips_with_department_stops)][
            "route_id"
        ].unique()

        filtered_routes = routes_df[routes_df["route_id"].isin(routes_in_department)]
    else:
        # Si pas de donn√©es de correspondance, retourner toutes les routes
        filtered_routes = routes_df

    return filtered_routes


def filter_with_administrative_boundaries(stops_df, department_code):
    """
    Filtre les arr√™ts de transport en utilisant les limites administratives pr√©cises
    du d√©partement de l'Is√®re
    """

    # 1. Charger les limites administratives du d√©partement
    department_boundary = download_department_boundary(department_code)

    if department_boundary is None:
        print(
            "Impossible de t√©l√©charger les limites administratives. Utilisation du filtrage par coordonn√©es."
        )
        exit

    # 2. Convertir les arr√™ts en GeoDataFrame
    stops_gdf = create_stops_geodataframe(stops_df)

    # 3. Filtrer les arr√™ts qui se trouvent dans le d√©partement
    filtered_stops = spatial_filter_stops(stops_gdf, department_boundary)

    return filtered_stops


def download_department_boundary(department_code="38"):
    """
    Charge ou t√©l√©charge les limites administratives du d√©partement de l'Is√®re
    """

    # Utiliser les donn√©es de data.gouv.fr
    try:
        boundary = load_department_geometry(department_code)
        if boundary is not None:
            return boundary
    except Exception as e:
        print(f"Erreur avec data.gouv.fr: {e}")

    # Fallback : Utiliser des coordonn√©es pr√©d√©finies
    try:
        boundary = create_isere_boundary_from_coords()
        return boundary
    except Exception as e:
        print(f"Erreur avec coordonn√©es pr√©d√©finies: {e}")

    return None


def download_from_data_gouv(file_path):
    """
    T√©l√©charge depuis data.gouv.fr - Contours des d√©partements fran√ßais
    """
    # URL des contours des d√©partements fran√ßais
    url = "https://www.data.gouv.fr/fr/datasets/r/90b9341a-e1f7-4d75-a73c-bbc010c7feeb"
    print(f"=== T√©l√©charge les contours des d√©partements fran√ßais {url} ===")
    try:
        # T√©l√©charger le GeoJSON
        response = requests.get(url, timeout=30)
        response.raise_for_status()

        # R√©cup√©rer le contenu JSON
        geojson_data = response.json()

        # Cr√©er le r√©pertoire s'il n'existe pas
        file_path = Path(file_path)
        file_path.parent.mkdir(parents=True, exist_ok=True)

        # Sauvegarder dans le fichier
        with open(file_path, "w", encoding="utf-8") as f:
            json.dump(geojson_data, f, ensure_ascii=False, indent=2)

        print(f"Fichier sauvegard√© avec succ√®s : {file_path}")
        print(f"Nombre de d√©partements : {len(geojson_data['features'])}")

    except Exception as e:
        print(f"Erreur lors du t√©l√©chargement depuis data.gouv.fr: {e}")
        return None


def load_department_geometry(department_code="38"):
    """
    T√©l√©charge depuis data.gouv.fr - Contours des d√©partements fran√ßais
    """
    # URL des contours des d√©partements fran√ßais
    file = "../data/transportdatagouv/contour-des-departements.geojson"
    print(f"=== Charge les contours des d√©partements fran√ßais depuis le fichier {file} ===")

    try:
        # Test if file exists
        if not Path(file).is_file():
            print(f"-> Le fichier n'existe pas, t√©l√©chargement n√©cessaire.")
            download_from_data_gouv(file)

        # Charger le GeoJSON
        france_departments = gpd.read_file(file)

        # Filtrer sur un d√©partement
        department_boundary = france_departments[france_departments["code"] == department_code]

        if not department_boundary.empty:
            return department_boundary.iloc[0].geometry
        else:
            print(f"D√©partement {department_code} non trouv√© dans les donn√©es")
            return None

    except Exception as e:
        print(f"Erreur lors du chargement des contours des d√©partements: {e}")
        return None


def create_isere_boundary_from_coords():
    """
    Cr√©e une approximation des limites de l'Is√®re √† partir de coordonn√©es connues
    """
    from shapely.geometry import Polygon

    print(f"=== Fallback: Utiliser des coordonn√©es pr√©d√©finies de l'Is√®re ===")

    # Coordonn√©es approximatives des limites de l'Is√®re
    isere_coords = [
        (5.2, 44.8),  # Sud-Ouest
        (6.3, 44.8),  # Sud-Est
        (6.3, 45.9),  # Nord-Est
        (5.2, 45.9),  # Nord-Ouest
        (5.2, 44.8),  # Fermeture du polygone
    ]

    boundary = Polygon(isere_coords)
    return boundary


def create_stops_geodataframe(stops_df):
    """
    Convertit le DataFrame des arr√™ts en GeoDataFrame
    """
    # Cr√©er des objets Point √† partir des coordonn√©es
    geometry = [Point(xy) for xy in zip(stops_df["stop_lon"], stops_df["stop_lat"])]

    # Cr√©er le GeoDataFrame
    stops_gdf = gpd.GeoDataFrame(
        stops_df,
        geometry=geometry,
        crs="EPSG:4326",  # WGS84
    )

    return stops_gdf


def spatial_filter_stops(stops_gdf, department_boundary):
    """
    Filtre spatialement les arr√™ts qui se trouvent dans les limites du d√©partement
    """
    print(
        f"=== Filtre spatialement les arr√™ts qui se trouvent dans les limites du d√©partement ==="
    )
    # Cr√©er un GeoDataFrame pour la limite du d√©partement
    if hasattr(department_boundary, "crs"):
        boundary_gdf = gpd.GeoDataFrame(
            [1], geometry=[department_boundary], crs=department_boundary.crs
        )
    else:
        boundary_gdf = gpd.GeoDataFrame([1], geometry=[department_boundary], crs="EPSG:4326")

    # S'assurer que les deux GeoDataFrames ont le m√™me CRS
    if stops_gdf.crs != boundary_gdf.crs:
        stops_gdf = stops_gdf.to_crs(boundary_gdf.crs)

    # Filtrage spatial : garder les arr√™ts qui intersectent avec la limite
    filtered_stops = gpd.sjoin(stops_gdf, boundary_gdf, how="inner", predicate="within")

    # Supprimer les colonnes ajout√©es par sjoin
    filtered_stops = filtered_stops.drop(columns=["index_right"], errors="ignore")

    return filtered_stops

## D√©finition des variables

In [None]:
pd.set_option("display.max_columns", None)

# URLs des donn√©es GTFS contenant l'Is√®re
gtfs_sources = {
    # Definition https://transport.data.gouv.fr/datasets/agregat-oura
    "agregat-oura": "https://api.oura3.cityway.fr/dataflow/offre-tc/download?provider=OURA&dataFormat=GTFS&dataProfil=OPENDATA",
    # Definition https://transport.data.gouv.fr/datasets/reseau-cars-region-isere-38
    "reseau-cars-region-isere-38": "https://www.itinisere.fr/fr/donnees-open-data/169/OpenData/Download?fileName=CG38.GTFS.zip",
    # Definition https://transport.data.gouv.fr/datasets/horaires-theoriques-du-reseau-tag
    "TAG_Grenoble": "https://data.mobilites-m.fr/api/gtfs/SEM",
}

department_code = "38"
data_set = "agregat-oura"
gtfs_url = gtfs_sources[data_set]

In [None]:
print(f"=== Filtrage avec limites administratives du '{department_code}' sur '{data_set}' ===")

### T√©l√©charger les donn√©es GTFS

In [None]:
stops_df, routes_df, stop_times_df, trips_df = download_gtfs_data(gtfs_url)

### Filtrer les arr√™ts

In [None]:
filtered_stops = filter_stops_by_department(stops_df, department_code)
print(
    f"Nombre d'arr√™ts dans le {department_code}: {len(filtered_stops)} sur un total de {len(stops_df)}"
)

### Filtrer les lignes

In [None]:
filtered_routes = filter_routes_by_stops(routes_df, filtered_stops, trips_df, stop_times_df)
print(
    f"Nombre de lignes dans le {department_code}: {len(filtered_routes)} sur un total de {len(routes_df)}"
)

## R√©sultats

In [None]:
# Exemples d'arr√™ts
print("\nPremiers arr√™ts filtr√©s:")
print(filtered_stops[["stop_id", "stop_code", "stop_name", "stop_lat", "stop_lon"]].head(10))

In [None]:
# Exemples de lignes
print("\nPremi√®res lignes filtr√©es:")
print(filtered_routes[["route_id", "route_short_name", "route_long_name"]].head(10))