In [None]:
import json
import pandas as pd
from pathlib import Path

# On part du dossier où est le notebook: PROJET/.../zfe-scm
HERE = Path().resolve()
print("cwd =", HERE)

# Le dossier racine du projet = parent de zfe-scm
PROJECT_ROOT = HERE.parent
DATA_DIR = PROJECT_ROOT / "data"
print("data dir =", DATA_DIR)

# --- chemins complets ---
aires_path = DATA_DIR / "aires.geojson"
voies_path = DATA_DIR / "voies.geojson"
zfe_ids_path = DATA_DIR / "zfe_ids.csv"

print(aires_path.exists(), voies_path.exists(), zfe_ids_path.exists())

# --- charger les fichiers ---
with aires_path.open(encoding="utf-8") as f:
    aires_gj = json.load(f)

with voies_path.open(encoding="utf-8") as f:
    voies_gj = json.load(f)

zfe_ids = pd.read_csv(zfe_ids_path, sep=";")

print("aires keys:", aires_gj.keys())
print("voies keys:", voies_gj.keys())
print("zfe_ids shape:", zfe_ids.shape)
zfe_ids.head()


In [None]:
def flatten_geojson(gj):
    rows = []
    for feat in gj["features"]:
        props = feat.get("properties", {}).copy()
        pub = feat.get("publisher", {})
        for k, v in pub.items():
            props[f"publisher_{k}"] = v
        rows.append(props)
    return pd.DataFrame(rows)

aires_df = flatten_geojson(aires_gj)
voies_df = flatten_geojson(voies_gj)

aires_df.head(), voies_df.head()


In [None]:
aires_df.to_csv(DATA_DIR / "aires_flat.csv", index=False)
voies_df.to_csv(DATA_DIR / "voies_flat.csv", index=False)

print("aires_flat :", aires_df.shape)
print("voies_flat :", voies_df.shape)

In [None]:
import pandas as pd
from pathlib import Path

ROOT = Path().resolve().parent   # on est dans zfe-scm, on remonte
DATA = ROOT / "data"

aires = pd.read_csv(DATA / "aires_flat.csv")
voies = pd.read_csv(DATA / "voies_flat.csv")
zfe_ids = pd.read_csv(DATA / "zfe_ids.csv", sep=";")

print("aires:", aires.shape, "voies:", voies.shape)

# 1) Colonnes qu'on garde
aires_keep = [
    "publisher_zfe_id",
    "publisher_nom",
    "publisher_siren",
    "publisher_forme_juridique",
    "id",
    "date_debut",
    "date_fin",
    "vp_critair", "vp_horaires",
    "vul_critair", "vul_horaires",
    "pl_critair", "pl_horaires",
    "autobus_autocars_critair", "autobus_autocars_horaires",
    "deux_rm_critair", "deux_rm_horaires",
    "url_arrete",
    "url_site_information",
]

voies_keep = [
    "publisher_zfe_id",
    "publisher_nom",
    "publisher_siren",
    "publisher_forme_juridique",
    "id",
    "osm_id",
    "ref",
    "one_way",
    "date_debut",
    "date_fin",
    "vp_critair", "vp_horaires",
    "vul_critair", "vul_horaires",
    "pl_critair", "pl_horaires",
    "autobus_autocars_critair", "autobus_autocars_horaires",
    "deux_rm_critair", "deux_rm_horaires",
    "zfe_derogation",
    "url_arrete",
    "url_site",
    "url_site_information",
]

aires_clean = aires[aires_keep].copy()
voies_clean = voies[voies_keep].copy()

# 2) Convertir dates en datetime
for df in (aires_clean, voies_clean):
    for col in ["date_debut", "date_fin"]:
        df[col] = pd.to_datetime(df[col], errors="coerce")

# 3) Table méta ZFE (1 ligne par ZFE)
zfe_meta = (
    aires_clean
    .groupby(["publisher_zfe_id", "publisher_siren", "publisher_nom"], as_index=False)
    .agg(
        first_date_debut=("date_debut", "min"),
        last_date_debut=("date_debut", "max"),
        first_date_fin=("date_fin", lambda s: s.dropna().min() if s.notna().any() else pd.NaT),
        n_aires=("id", "nunique"),
        has_vp_restriction=("vp_critair", lambda s: s.notna().any())
    )
)

# 4) Jointure avec zfe_ids pour récupérer epci_principal & forme_juridique
zfe_ids["siren"] = zfe_ids["siren"].astype(int)
zfe_meta = zfe_meta.merge(
    zfe_ids,
    left_on="publisher_siren",
    right_on="siren",
    how="left",
)

# 5) Sauvegarde
aires_clean.to_csv(DATA / "aires_clean.csv", index=False)
voies_clean.to_csv(DATA / "voies_clean.csv", index=False)
zfe_meta.to_csv(DATA / "zfe_meta.csv", index=False)

print("aires_clean:", aires_clean.shape)
print("voies_clean:", voies_clean.shape)
print("zfe_meta:", zfe_meta.shape)

zfe_meta.sort_values("first_date_debut")


In [None]:
import json
from pathlib import Path

import pandas as pd
from shapely.geometry import shape, Point

# chemins
HERE = Path().resolve()          # zfe-scm/
ROOT = HERE.parent               # dossier racine
DATA = ROOT / "data"

# 1) Charger la ZFE Grenoble depuis aires.geojson
with open(DATA / "aires.geojson", encoding="utf-8") as f:
    gj = json.load(f)

grenoble_feats = [feat for feat in gj["features"]
                  if feat["publisher"]["zfe_id"] == "GRENOBLE"]

len(grenoble_feats), grenoble_feats[0]["publisher"]["nom"]


In [None]:
from pathlib import Path
import pandas as pd
import json
from shapely.geometry import shape, Point

# ---------- chemins ----------
HERE = Path().resolve()          # dossier zfe-scm
ROOT = HERE.parent               # racine du projet
DATA = ROOT / "data"

# adapte le nom du fichier si tu l'as renommé
poll_path = DATA / "Export Moy. journalière - 20251204215149 - 2016-02-05 00_00 - 2024-02-05 21_00.csv"
aires_path = DATA / "aires.geojson"

# ---------- 1) charger le CSV et filtrer NO2 ----------
df_raw = pd.read_csv(poll_path, sep=";", engine="python")

# colonnes de dates
date_debut_col = [c for c in df_raw.columns if "Date de début" in c][0]
df_raw["date"] = pd.to_datetime(df_raw[date_debut_col])

# on garde uniquement NO2
df_no2 = df_raw[df_raw["Polluant"] == "NO2"].copy()

# on construit un dataset propre
no2 = (
    df_no2[[
        "date",
        "code site",
        "nom site",
        "type d'implantation",
        "type d'influence",
        "valeur",
        "Latitude",
        "Longitude",
    ]]
    .rename(columns={
        "code site": "station_id",
        "nom site": "station_name",
        "type d'implantation": "station_env",
        "type d'influence": "station_influence",
        "valeur": "no2_ug_m3",
        "Latitude": "lat",
        "Longitude": "lon",
    })
    .sort_values(["station_id", "date"])
)

no2["zone"] = "GRENOBLE"

print("Aperçu NO2 :")
display(no2.head())
print("Stations :", no2["station_id"].unique())

# ---------- 2) charger la ZFE Grenoble ----------
with aires_path.open(encoding="utf-8") as f:
    gj = json.load(f)

grenoble_feats = [feat for feat in gj["features"]
                  if feat["publisher"]["zfe_id"] == "GRENOBLE"]

# au cas où il y aurait plusieurs polygones, on les unionne
zfe_geom = shape(grenoble_feats[0]["geometry"])

# ---------- 3) vérifier que Les Frênes et Boulevards sont dans la ZFE ----------
coords = (
    no2.groupby(["station_id", "station_name"])[["lat", "lon"]]
    .first()
    .reset_index()
)

for _, row in coords.iterrows():
    pt = Point(row["lon"], row["lat"])
    inside = zfe_geom.contains(pt)
    print(f"{row['station_id']} ({row['station_name']}) dans la ZFE GRENOBLE ? -> {inside}")

# (optionnel) sauvegarder le dataset propre pour plus tard
out_path = DATA / "pollution_grenoble_no2_daily_clean.csv"
no2.to_csv(out_path, index=False)
print("✅ Dataset NO2 propre sauvegardé ->", out_path)


In [None]:
from pathlib import Path
import pandas as pd
import json
from shapely.geometry import shape, Point
from shapely.ops import unary_union

# ---------- chemins ----------
HERE = Path().resolve()      # dossier zfe-scm
ROOT = HERE.parent           # racine du projet
DATA = ROOT / "data"

poll_path = DATA / "Export Moy. journalière - 20251205011655 - 2016-02-05 00_00 - 2024-02-05 00_00.csv"
aires_path = DATA / "aires.geojson"

print("Fichier pollution :", poll_path.exists(), poll_path)

# ---------- 1) charger le CSV et filtrer NO2 ----------
df_raw = pd.read_csv(poll_path, sep=";", engine="python")

# repérer la colonne "Date de début" (il peut y avoir un caractère caché)
date_debut_col = [c for c in df_raw.columns if "Date de début" in c][0]
df_raw["date"] = pd.to_datetime(df_raw[date_debut_col])

# garder uniquement NO2
df_no2 = df_raw[df_raw["Polluant"] == "NO2"].copy()

# dataset "propre" : colonnes utiles + renommage
no2 = (
    df_no2[[
        "date",
        "code site",
        "nom site",
        "type d'implantation",
        "type d'influence",
        "valeur",
        "Latitude",
        "Longitude",
    ]]
    .rename(columns={
        "code site": "station_id",
        "nom site": "station_name",
        "type d'implantation": "station_env",
        "type d'influence": "station_influence",
        "valeur": "no2_ug_m3",
        "Latitude": "lat",
        "Longitude": "lon",
    })
    .sort_values(["station_id", "date"])
)

print("Aperçu NO2 propre :")
display(no2.head())
print("Stations dans le fichier :", no2["station_id"].unique())

# ---------- 2) table méta des stations ----------
stations_meta = (
    no2.groupby(["station_id", "station_name", "station_env", "station_influence"])[["lat", "lon"]]
    .first()
    .reset_index()
)

print("Table stations_meta :")
display(stations_meta)

# ---------- 3) charger la ZFE Grenoble ----------
with aires_path.open(encoding="utf-8") as f:
    gj = json.load(f)

grenoble_feats = [feat for feat in gj["features"]
                  if feat["publisher"]["zfe_id"] == "GRENOBLE"]

geoms = [shape(feat["geometry"]) for feat in grenoble_feats]
zfe_grenoble = unary_union(geoms)

# ---------- 4) vérifier pour chaque station si elle est dans la ZFE ----------
def is_in_zfe(row):
    pt = Point(row["lon"], row["lat"])
    return zfe_grenoble.contains(pt)

stations_meta["in_zfe_grenoble"] = stations_meta.apply(is_in_zfe, axis=1)

print("Stations et appartenance à la ZFE Grenoble :")
display(stations_meta)

# ---------- 5) sauvegarde des datasets propres ----------
no2_out = DATA / "no2_all_stations_daily_clean.csv"
stations_out = DATA / "no2_stations_meta.csv"

no2.to_csv(no2_out, index=False)
stations_meta.to_csv(stations_out, index=False)

print("✅ NO2 daily propre sauvegardé ->", no2_out)
print("✅ Méta stations sauvegardée ->", stations_out)
