In [None]:
from pathlib import Path
import pandas as pd
import json
from shapely.geometry import shape, Point
from shapely.ops import unary_union

# ---------- chemins ----------
HERE = Path().resolve()      # zfe-scm/
ROOT = HERE.parent           # racine du projet
DATA = ROOT / "data"

# üîÅ mets √† jour le nom du fichier si tu le renommes
CSV_FILE = "Export Moy. journali√®re - 20251205114649 - 2017-08-17 00_00 - 2025-12-04 11_00.csv"
csv_path = DATA / CSV_FILE

# ---------- 1) charger & nettoyer le CSV NO‚ÇÇ Paris ----------
df_raw = pd.read_csv(csv_path, sep=";", engine="python")

# garder uniquement NO‚ÇÇ
df_no2 = df_raw[df_raw["Polluant"] == "NO2"].copy()

# colonne date
date_col = [c for c in df_no2.columns if "Date de d√©but" in c][0]
df_no2["date"] = pd.to_datetime(df_no2[date_col])

# lat / lon en float
df_no2["lat"] = df_no2["Latitude"].astype(float)
df_no2["lon"] = df_no2["Longitude"].astype(float)

# dataframe propre au format "maison"
no2_paris_daily = (
    df_no2.rename(columns={
        "code site": "station_id",
        "nom site": "station_name",
        "type d'implantation": "station_env",
        "type d'influence": "station_influence",
        "valeur": "no2_ug_m3",
    })[
        ["date", "station_id", "station_name",
         "station_env", "station_influence",
         "no2_ug_m3", "lat", "lon"]
    ]
    .sort_values(["station_id", "date"])
    .reset_index(drop=True)
)

print("Aper√ßu NO‚ÇÇ Paris :")
display(no2_paris_daily.head())
print("Stations Paris dans ce fichier :")
display(no2_paris_daily[["station_id", "station_name"]].drop_duplicates())

# ---------- 2) charger la ZFE Paris depuis aires.geojson ----------
with open(DATA / "aires.geojson", encoding="utf-8") as f:
    gj = json.load(f)

paris_feats = [
    ft for ft in gj["features"]
    if ft.get("publisher", {}).get("zfe_id") == "PARIS"
]

if not paris_feats:
    raise ValueError("Impossible de trouver une ZFE avec publisher.zfe_id == 'PARIS' dans aires.geojson")

paris_geom = unary_union([shape(ft["geometry"]) for ft in paris_feats])

# ---------- 3) construire le meta stations + in_zfe_paris ----------
stations_meta = (
    no2_paris_daily
    .groupby(["station_id", "station_name",
              "station_env", "station_influence"])[["lat", "lon"]]
    .first()
    .reset_index()
)

stations_meta["in_zfe_paris"] = stations_meta.apply(
    lambda row: paris_geom.contains(Point(row["lon"], row["lat"])),
    axis=1
)

print("Meta stations Paris (+ in_zfe_paris) :")
display(stations_meta)

# ---------- 4) sauvegarde des CSV propres ----------
no2_paris_daily.to_csv(DATA / "pollution_paris_no2_daily_clean.csv", index=False)
stations_meta.to_csv(DATA / "no2_paris_stations_meta.csv", index=False)

print("Fichiers sauvegard√©s :")
print(" - pollution_paris_no2_daily_clean.csv")
print(" - no2_paris_stations_meta.csv")
