In [5]:
import json
import pandas as pd
from pathlib import Path

# On part du dossier où est le notebook: PROJET/.../zfe-scm
HERE = Path().resolve()
print("cwd =", HERE)

# Le dossier racine du projet = parent de zfe-scm
PROJECT_ROOT = HERE.parent
DATA_DIR = PROJECT_ROOT / "data"
print("data dir =", DATA_DIR)

# --- chemins complets ---
aires_path = DATA_DIR / "aires.geojson"
voies_path = DATA_DIR / "voies.geojson"
zfe_ids_path = DATA_DIR / "zfe_ids.csv"

print(aires_path.exists(), voies_path.exists(), zfe_ids_path.exists())

# --- charger les fichiers ---
with aires_path.open(encoding="utf-8") as f:
    aires_gj = json.load(f)

with voies_path.open(encoding="utf-8") as f:
    voies_gj = json.load(f)

zfe_ids = pd.read_csv(zfe_ids_path, sep=";")

print("aires keys:", aires_gj.keys())
print("voies keys:", voies_gj.keys())
print("zfe_ids shape:", zfe_ids.shape)
zfe_ids.head()


cwd = C:\Users\Pierre\Desktop\Projet Python pour la Data Science\zfe-scm
data dir = C:\Users\Pierre\Desktop\Projet Python pour la Data Science\data
True True True
aires keys: dict_keys(['type', 'features'])
voies keys: dict_keys(['type', 'features'])
zfe_ids shape: (43, 5)


Unnamed: 0,siren,code,epci_principal,autres_siren,forme_juridique
0,248000531,AMIENS,Amiens Métropole,,Métropole
1,244900015,ANGERS,Angers Loire Métropole,,Métropole
2,200066793,ANNECY,Grand Annecy,,Communauté d'agglomération
3,200011773,ANNEMASSE,Annemasse agglo,,Communauté d'agglomération
4,248400251,AVIGNON,Grand Avignon,,Métropole


In [6]:
def flatten_geojson(gj):
    rows = []
    for feat in gj["features"]:
        props = feat.get("properties", {}).copy()
        pub = feat.get("publisher", {})
        for k, v in pub.items():
            props[f"publisher_{k}"] = v
        rows.append(props)
    return pd.DataFrame(rows)

aires_df = flatten_geojson(aires_gj)
voies_df = flatten_geojson(voies_gj)

aires_df.head(), voies_df.head()


(                                 Name autobus_autocars_critair  \
 0  Montpellier Méditerranée Métropole                       V2   
 1                                 NaN                     None   
 2                                 NaN                       V4   
 3                                 NaN                       V5   
 4                                 NaN                     None   
 
   autobus_autocars_horaires  date_debut    date_fin deux_rm_critair  \
 0                      24/7  2022-07-01        None            None   
 1                      None  2025-01-01  2026-12-31              NC   
 2                      24/7  2022-09-01  2032-09-01              V4   
 3                      24/7  2025-01-01  2034-12-31              V5   
 4                      None  2022-03-01         NaN            None   
 
   deux_rm_horaires                  id pl_critair pl_horaires  ...  \
 0             24/4   243400017-ZFE-001         V3        24/7  ...   
 1             None 

In [7]:
aires_df.to_csv(DATA_DIR / "aires_flat.csv", index=False)
voies_df.to_csv(DATA_DIR / "voies_flat.csv", index=False)

print("aires_flat :", aires_df.shape)
print("voies_flat :", voies_df.shape)

aires_flat : (37, 47)
voies_flat : (8072, 32)


In [9]:
import pandas as pd
from pathlib import Path

ROOT = Path().resolve().parent   # on est dans zfe-scm, on remonte
DATA = ROOT / "data"

aires = pd.read_csv(DATA / "aires_flat.csv")
voies = pd.read_csv(DATA / "voies_flat.csv")
zfe_ids = pd.read_csv(DATA / "zfe_ids.csv", sep=";")

print("aires:", aires.shape, "voies:", voies.shape)

# 1) Colonnes qu'on garde
aires_keep = [
    "publisher_zfe_id",
    "publisher_nom",
    "publisher_siren",
    "publisher_forme_juridique",
    "id",
    "date_debut",
    "date_fin",
    "vp_critair", "vp_horaires",
    "vul_critair", "vul_horaires",
    "pl_critair", "pl_horaires",
    "autobus_autocars_critair", "autobus_autocars_horaires",
    "deux_rm_critair", "deux_rm_horaires",
    "url_arrete",
    "url_site_information",
]

voies_keep = [
    "publisher_zfe_id",
    "publisher_nom",
    "publisher_siren",
    "publisher_forme_juridique",
    "id",
    "osm_id",
    "ref",
    "one_way",
    "date_debut",
    "date_fin",
    "vp_critair", "vp_horaires",
    "vul_critair", "vul_horaires",
    "pl_critair", "pl_horaires",
    "autobus_autocars_critair", "autobus_autocars_horaires",
    "deux_rm_critair", "deux_rm_horaires",
    "zfe_derogation",
    "url_arrete",
    "url_site",
    "url_site_information",
]

aires_clean = aires[aires_keep].copy()
voies_clean = voies[voies_keep].copy()

# 2) Convertir dates en datetime
for df in (aires_clean, voies_clean):
    for col in ["date_debut", "date_fin"]:
        df[col] = pd.to_datetime(df[col], errors="coerce")

# 3) Table méta ZFE (1 ligne par ZFE)
zfe_meta = (
    aires_clean
    .groupby(["publisher_zfe_id", "publisher_siren", "publisher_nom"], as_index=False)
    .agg(
        first_date_debut=("date_debut", "min"),
        last_date_debut=("date_debut", "max"),
        first_date_fin=("date_fin", lambda s: s.dropna().min() if s.notna().any() else pd.NaT),
        n_aires=("id", "nunique"),
        has_vp_restriction=("vp_critair", lambda s: s.notna().any())
    )
)

# 4) Jointure avec zfe_ids pour récupérer epci_principal & forme_juridique
zfe_ids["siren"] = zfe_ids["siren"].astype(int)
zfe_meta = zfe_meta.merge(
    zfe_ids,
    left_on="publisher_siren",
    right_on="siren",
    how="left",
)

# 5) Sauvegarde
aires_clean.to_csv(DATA / "aires_clean.csv", index=False)
voies_clean.to_csv(DATA / "voies_clean.csv", index=False)
zfe_meta.to_csv(DATA / "zfe_meta.csv", index=False)

print("aires_clean:", aires_clean.shape)
print("voies_clean:", voies_clean.shape)
print("zfe_meta:", zfe_meta.shape)

zfe_meta.sort_values("first_date_debut")


aires: (37, 47) voies: (8072, 32)
aires_clean: (37, 19)
voies_clean: (8072, 24)
zfe_meta: (19, 13)


Unnamed: 0,publisher_zfe_id,publisher_siren,publisher_nom,first_date_debut,last_date_debut,first_date_fin,n_aires,has_vp_restriction,siren,code,epci_principal,autres_siren,forme_juridique
6,GRENOBLE,253800825,Syndicat Mixte des Mobilités de l'Aire Grenobl...,2019-05-02,2019-05-02,NaT,1,False,253800825.0,GRENOBLE,Grenoble Alpes métropole,,Métropole
12,PARIS,217500016,Ville de Paris,2021-06-01,2021-06-01,NaT,1,True,217500016.0,PARIS,Ville de Paris,,Autre collectivité territoriale
13,REIMS,200067213,CU du Grand Reims,2022-01-01,2023-01-01,2022-12-31,2,True,200067213.0,REIMS,Grand Reims,,Métropole
16,SAINT-ETIENNE,244200770,Saint-Étienne Métropole,2022-01-31,2027-01-01,2024-12-31,3,False,244200770.0,SAINT-ETIENNE,Saint-Étienne Métropole,,Métropole
11,NICE,200030195,Métropole Nice Côte d'Azur,2022-01-31,2024-01-31,2022-12-31,3,True,200030195.0,NICE,Métropole Nice Côte d'Azur,,Métropole
18,TOULOUSE,243100518,Toulouse Métropole,2022-03-01,2024-01-01,NaT,4,True,243100518.0,TOULOUSE,Toulouse Métropole,253100986.0,Métropole
9,MONTPELLIER,243400017,Montpellier Méditerranée Métropole,2022-07-01,2022-07-01,NaT,1,True,243400017.0,MONTPELLIER,Montpellier Méditerranée Métropole,,Métropole
8,MARSEILLE-AIX EN PROVENCE,200054807,Métropole d'Aix-Marseille-Provence,2022-09-01,2023-09-01,2032-08-31,2,True,200054807.0,MARSEILLE-AIX EN PROVENCE,Métropole Aix-Marseille-Provence,,Métropole
15,ROUEN,200023414,Métropole Rouen Normandie,2022-09-01,2022-09-01,2032-09-01,1,True,200023414.0,ROUEN,Métropole Rouen Normandie,,Métropole
17,STRASBOURG,246700488,Eurométropole de Strasbourg,2023-01-01,2028-01-01,NaT,7,True,246700488.0,STRASBOURG,Eurométropole de Strasbourg,,Métropole


In [10]:
import json
from pathlib import Path

import pandas as pd
from shapely.geometry import shape, Point

# chemins
HERE = Path().resolve()          # zfe-scm/
ROOT = HERE.parent               # dossier racine
DATA = ROOT / "data"

# 1) Charger la ZFE Grenoble depuis aires.geojson
with open(DATA / "aires.geojson", encoding="utf-8") as f:
    gj = json.load(f)

grenoble_feats = [feat for feat in gj["features"]
                  if feat["publisher"]["zfe_id"] == "GRENOBLE"]

len(grenoble_feats), grenoble_feats[0]["publisher"]["nom"]


(1, "Syndicat Mixte des Mobilités de l'Aire Grenobloise")

In [1]:
from pathlib import Path
import pandas as pd
import json
from shapely.geometry import shape, Point

# ---------- chemins ----------
HERE = Path().resolve()          # dossier zfe-scm
ROOT = HERE.parent               # racine du projet
DATA = ROOT / "data"

# adapte le nom du fichier si tu l'as renommé
poll_path = DATA / "Export Moy. journalière - 20251204215149 - 2016-02-05 00_00 - 2024-02-05 21_00.csv"
aires_path = DATA / "aires.geojson"

# ---------- 1) charger le CSV et filtrer NO2 ----------
df_raw = pd.read_csv(poll_path, sep=";", engine="python")

# colonnes de dates
date_debut_col = [c for c in df_raw.columns if "Date de début" in c][0]
df_raw["date"] = pd.to_datetime(df_raw[date_debut_col])

# on garde uniquement NO2
df_no2 = df_raw[df_raw["Polluant"] == "NO2"].copy()

# on construit un dataset propre
no2 = (
    df_no2[[
        "date",
        "code site",
        "nom site",
        "type d'implantation",
        "type d'influence",
        "valeur",
        "Latitude",
        "Longitude",
    ]]
    .rename(columns={
        "code site": "station_id",
        "nom site": "station_name",
        "type d'implantation": "station_env",
        "type d'influence": "station_influence",
        "valeur": "no2_ug_m3",
        "Latitude": "lat",
        "Longitude": "lon",
    })
    .sort_values(["station_id", "date"])
)

no2["zone"] = "GRENOBLE"

print("Aperçu NO2 :")
display(no2.head())
print("Stations :", no2["station_id"].unique())

# ---------- 2) charger la ZFE Grenoble ----------
with aires_path.open(encoding="utf-8") as f:
    gj = json.load(f)

grenoble_feats = [feat for feat in gj["features"]
                  if feat["publisher"]["zfe_id"] == "GRENOBLE"]

# au cas où il y aurait plusieurs polygones, on les unionne
zfe_geom = shape(grenoble_feats[0]["geometry"])

# ---------- 3) vérifier que Les Frênes et Boulevards sont dans la ZFE ----------
coords = (
    no2.groupby(["station_id", "station_name"])[["lat", "lon"]]
    .first()
    .reset_index()
)

for _, row in coords.iterrows():
    pt = Point(row["lon"], row["lat"])
    inside = zfe_geom.contains(pt)
    print(f"{row['station_id']} ({row['station_name']}) dans la ZFE GRENOBLE ? -> {inside}")

# (optionnel) sauvegarder le dataset propre pour plus tard
out_path = DATA / "pollution_grenoble_no2_daily_clean.csv"
no2.to_csv(out_path, index=False)
print("✅ Dataset NO2 propre sauvegardé ->", out_path)


Aperçu NO2 :


Unnamed: 0,date,station_id,station_name,station_env,station_influence,no2_ug_m3,lat,lon,zone
0,2016-02-05,FR15043,Grenoble Les Frenes,Urbaine,Fond,27.0,45.1619,5.7356,GRENOBLE
2,2016-02-06,FR15043,Grenoble Les Frenes,Urbaine,Fond,17.0,45.1619,5.7356,GRENOBLE
4,2016-02-07,FR15043,Grenoble Les Frenes,Urbaine,Fond,15.0,45.1619,5.7356,GRENOBLE
6,2016-02-08,FR15043,Grenoble Les Frenes,Urbaine,Fond,26.0,45.1619,5.7356,GRENOBLE
8,2016-02-09,FR15043,Grenoble Les Frenes,Urbaine,Fond,15.0,45.1619,5.7356,GRENOBLE


Stations : ['FR15043' 'FR15046']
FR15043 (Grenoble Les Frenes) dans la ZFE GRENOBLE ? -> True
FR15046 (Grenoble Boulevards) dans la ZFE GRENOBLE ? -> True
✅ Dataset NO2 propre sauvegardé -> C:\Users\Pierre\Desktop\Projet Python pour la Data Science\data\pollution_grenoble_no2_daily_clean.csv


In [1]:
from pathlib import Path
import pandas as pd
import json
from shapely.geometry import shape, Point
from shapely.ops import unary_union

# ---------- chemins ----------
HERE = Path().resolve()      # dossier zfe-scm
ROOT = HERE.parent           # racine du projet
DATA = ROOT / "data"

poll_path = DATA / "Export Moy. journalière - 20251205011655 - 2016-02-05 00_00 - 2024-02-05 00_00.csv"
aires_path = DATA / "aires.geojson"

print("Fichier pollution :", poll_path.exists(), poll_path)

# ---------- 1) charger le CSV et filtrer NO2 ----------
df_raw = pd.read_csv(poll_path, sep=";", engine="python")

# repérer la colonne "Date de début" (il peut y avoir un caractère caché)
date_debut_col = [c for c in df_raw.columns if "Date de début" in c][0]
df_raw["date"] = pd.to_datetime(df_raw[date_debut_col])

# garder uniquement NO2
df_no2 = df_raw[df_raw["Polluant"] == "NO2"].copy()

# dataset "propre" : colonnes utiles + renommage
no2 = (
    df_no2[[
        "date",
        "code site",
        "nom site",
        "type d'implantation",
        "type d'influence",
        "valeur",
        "Latitude",
        "Longitude",
    ]]
    .rename(columns={
        "code site": "station_id",
        "nom site": "station_name",
        "type d'implantation": "station_env",
        "type d'influence": "station_influence",
        "valeur": "no2_ug_m3",
        "Latitude": "lat",
        "Longitude": "lon",
    })
    .sort_values(["station_id", "date"])
)

print("Aperçu NO2 propre :")
display(no2.head())
print("Stations dans le fichier :", no2["station_id"].unique())

# ---------- 2) table méta des stations ----------
stations_meta = (
    no2.groupby(["station_id", "station_name", "station_env", "station_influence"])[["lat", "lon"]]
    .first()
    .reset_index()
)

print("Table stations_meta :")
display(stations_meta)

# ---------- 3) charger la ZFE Grenoble ----------
with aires_path.open(encoding="utf-8") as f:
    gj = json.load(f)

grenoble_feats = [feat for feat in gj["features"]
                  if feat["publisher"]["zfe_id"] == "GRENOBLE"]

geoms = [shape(feat["geometry"]) for feat in grenoble_feats]
zfe_grenoble = unary_union(geoms)

# ---------- 4) vérifier pour chaque station si elle est dans la ZFE ----------
def is_in_zfe(row):
    pt = Point(row["lon"], row["lat"])
    return zfe_grenoble.contains(pt)

stations_meta["in_zfe_grenoble"] = stations_meta.apply(is_in_zfe, axis=1)

print("Stations et appartenance à la ZFE Grenoble :")
display(stations_meta)

# ---------- 5) sauvegarde des datasets propres ----------
no2_out = DATA / "no2_all_stations_daily_clean.csv"
stations_out = DATA / "no2_stations_meta.csv"

no2.to_csv(no2_out, index=False)
stations_meta.to_csv(stations_out, index=False)

print("✅ NO2 daily propre sauvegardé ->", no2_out)
print("✅ Méta stations sauvegardée ->", stations_out)


Fichier pollution : True C:\Users\Pierre\Desktop\Projet Python pour la Data Science\data\Export Moy. journalière - 20251205011655 - 2016-02-05 00_00 - 2024-02-05 00_00.csv
Aperçu NO2 propre :


Unnamed: 0,date,station_id,station_name,station_env,station_influence,no2_ug_m3,lat,lon
0,2016-02-05,FR15018,Voiron Urbain,Urbaine,Fond,30.0,45.360176,5.589419
10,2016-02-06,FR15018,Voiron Urbain,Urbaine,Fond,16.0,45.360176,5.589419
20,2016-02-07,FR15018,Voiron Urbain,Urbaine,Fond,16.0,45.360176,5.589419
30,2016-02-08,FR15018,Voiron Urbain,Urbaine,Fond,32.0,45.360176,5.589419
40,2016-02-09,FR15018,Voiron Urbain,Urbaine,Fond,13.0,45.360176,5.589419


Stations dans le fichier : ['FR15018' 'FR15045' 'FR15048' 'FR24038' 'FR27007' 'FR33102' 'FR33111'
 'FR33203' 'FR36002' 'FR36019']
Table stations_meta :


Unnamed: 0,station_id,station_name,station_env,station_influence,lat,lon
0,FR15018,Voiron Urbain,Urbaine,Fond,45.360176,5.589419
1,FR15045,Grenoble PeriurbSud,Périurbaine,Fond,45.0578,5.6767
2,FR15048,Gresivaudan Periurb,Périurbaine,Fond,45.2797,5.8822
3,FR24038,GAP JEAN JAURES,Urbaine,Trafic,44.548647,6.067237
4,FR27007,Bourgoin-Jallieu,Urbaine,Fond,45.60822,5.270872
5,FR33102,CHAMBERY LE HAUT,Urbaine,Fond,45.59667,5.918611
6,FR33111,SAINT JEAN,Urbaine,Fond,45.274723,6.349722
7,FR33203,ANNECY Rocade,Urbaine,Trafic,45.9097,6.11825
8,FR36002,Valence Urb. Centre,Urbaine,Fond,44.933514,4.893481
9,FR36019,Romans-sur-Isère,Urbaine,Fond,45.0425,5.0443


Stations et appartenance à la ZFE Grenoble :


Unnamed: 0,station_id,station_name,station_env,station_influence,lat,lon,in_zfe_grenoble
0,FR15018,Voiron Urbain,Urbaine,Fond,45.360176,5.589419,False
1,FR15045,Grenoble PeriurbSud,Périurbaine,Fond,45.0578,5.6767,False
2,FR15048,Gresivaudan Periurb,Périurbaine,Fond,45.2797,5.8822,False
3,FR24038,GAP JEAN JAURES,Urbaine,Trafic,44.548647,6.067237,False
4,FR27007,Bourgoin-Jallieu,Urbaine,Fond,45.60822,5.270872,False
5,FR33102,CHAMBERY LE HAUT,Urbaine,Fond,45.59667,5.918611,False
6,FR33111,SAINT JEAN,Urbaine,Fond,45.274723,6.349722,False
7,FR33203,ANNECY Rocade,Urbaine,Trafic,45.9097,6.11825,False
8,FR36002,Valence Urb. Centre,Urbaine,Fond,44.933514,4.893481,False
9,FR36019,Romans-sur-Isère,Urbaine,Fond,45.0425,5.0443,False


✅ NO2 daily propre sauvegardé -> C:\Users\Pierre\Desktop\Projet Python pour la Data Science\data\no2_all_stations_daily_clean.csv
✅ Méta stations sauvegardée -> C:\Users\Pierre\Desktop\Projet Python pour la Data Science\data\no2_stations_meta.csv
