In [1]:
# === Étape 1 — Exploration & nettoyage du fichier reseau_en_arbre.xlsx ===
# - Liste les onglets
# - Charge l’onglet (par défaut le 1er)
# - Normalise les noms de colonnes
# - Infère (si possible) les colonnes source/target/cost/status
# - Affiche un aperçu
# - Sauvegarde un CSV nettoyé

from pathlib import Path
import pandas as pd

# 0) Chemin du fichier (adapte si besoin)
XLSX_PATH = Path("reseau_en_arbre.xlsx")  # ex: Path("/mnt/data/reseau_en_arbre.xlsx")

assert XLSX_PATH.exists(), f"Fichier introuvable: {XLSX_PATH.resolve()}"

# 1) Lire les onglets et choisir celui à charger
xls = pd.ExcelFile(XLSX_PATH)
sheet_names = xls.sheet_names
SHEET_TO_READ = sheet_names[0]  # change si besoin: SHEET_TO_READ = "reseau_en_arbre"

# 2) Charger l’onglet
df_raw = pd.read_excel(XLSX_PATH, sheet_name=SHEET_TO_READ)

# 3) Normaliser les noms de colonnes
def normalize(col: str) -> str:
    return (
        str(col)
        .strip()
        .lower()
        .replace(" ", "_")
        .replace("-", "_")
        .replace(".", "_")
        .replace("/", "_")
    )

df = df_raw.copy()
df.columns = [normalize(c) for c in df.columns]

# 4) Inférence (facultative) de colonnes de graphe
def pick(cols, options):
    for o in options:
        if o in cols:
            return o
    return None

source_col = pick(df.columns, ["source","from","src","noeud_source","node_source","origin","arc_debut","noeud_debut","sommet_debut","u","depart"])
target_col = pick(df.columns, ["target","to","dst","noeud_cible","node_target","destination","arc_fin","noeud_fin","sommet_fin","v","arrivee","arrivée"])
cost_col   = pick(df.columns, ["cost","cout","coût","weight","poids","length","longueur","distance","prix"])
status_col = pick(df.columns, ["status","statut","etat","état","state","condition","type_etat","etat_arc"])

summary = {
    "sheet_names": sheet_names,
    "loaded_sheet": SHEET_TO_READ,
    "row_count": len(df),
    "columns": list(df.columns),
    "inferred_columns": {
        "source": source_col,
        "target": target_col,
        "cost_or_length": cost_col,
        "status": status_col,
    },
}

# 5) Afficher un aperçu
print("=== RÉSUMÉ ===")
print(summary)
print("\n=== APERÇU (30 premières lignes) ===")
display(df.head(30)) if "display" in globals() else print(df.head(30))

# 6) Sauvegarder un CSV nettoyé pour les prochaines étapes
CLEAN_CSV_PATH = XLSX_PATH.with_suffix("").with_name("reseau_en_arbre_clean.csv")
df.to_csv(CLEAN_CSV_PATH, index=False)
print(f"\nCSV nettoyé sauvegardé → {CLEAN_CSV_PATH.resolve()}")

# 7) Diagnostics simples (si source/target existent)
if source_col and target_col:
    unique_nodes = pd.unique(pd.concat([df[source_col], df[target_col]], ignore_index=True)).size
    print({"unique_nodes_estimate": unique_nodes, "edge_count": len(df)})
else:
    print("Colonnes source/target non trouvées (normal si le fichier ne les contient pas).")


=== RÉSUMÉ ===
{'sheet_names': ['reseau_en_arbre'], 'loaded_sheet': 'reseau_en_arbre', 'row_count': 6107, 'columns': ['id_batiment', 'nb_maisons', 'infra_id', 'infra_type', 'longueur'], 'inferred_columns': {'source': None, 'target': None, 'cost_or_length': 'longueur', 'status': None}}

=== APERÇU (30 premières lignes) ===
   id_batiment  nb_maisons infra_id     infra_type   longueur
0      E000001           4  P007111  infra_intacte  12.314461
1      E000001           4  P007983  infra_intacte  40.320929
2      E000001           4  P000308  infra_intacte  39.140799
3      E000001           4  P007819  infra_intacte  17.390464
4      E000002           1  P007111  infra_intacte  12.314461
5      E000002           1  P007983  infra_intacte  40.320929
6      E000002           1  P000308  infra_intacte  39.140799
7      E000002           1  P007240  infra_intacte  10.914626
8      E000003           1  P007111  infra_intacte  12.314461
9      E000003           1  P007111  infra_intacte  12.3

In [2]:
# === Étape 2 — Construire les graphes NetworkX ===
# - G_bip (biparti): noeuds={batiments, infras}, arêtes=(batiment <-> infra)
# - G_building (projection): noeuds=bâtiments, arêtes=lignes électriques (mutualisées)
#   Poids = coût du raccordement (par défaut: longueur ; option “per_house” possible)
#
# Entrée : reseau_en_arbre_clean.csv avec colonnes attendues :
#   id_batiment, nb_maisons, infra_id, infra_type, longueur
#
# Sorties :
#   - Diagnostics imprimés (taille, composantes)
#   - Deux fichiers CSV d’arêtes pour visualiser ou recharger plus tard:
#       edges_building_graph.csv  (bâtiments seulement)
#       edges_bipartite_graph.csv (biparti bâtiment<->infra)
#   - (optionnel) graphes GraphML :
#       G_building.graphml, G_bip.graphml

from pathlib import Path
import pandas as pd
import networkx as nx

# 0) Paramètres
CLEAN_CSV_PATH = Path("reseau_en_arbre_clean.csv")   # adapte si besoin (ex: /mnt/data/...)
assert CLEAN_CSV_PATH.exists(), f"Fichier introuvable: {CLEAN_CSV_PATH.resolve()}"

# Mode de coût (poids de l’arête dans le graphe Bâtiments)
#   "length"   -> coût = longueur totale, répartie équitablement sur (k-1) arêtes du groupe
#   "per_house"-> coût = (longueur / nb_maisons), répartie sur (k-1) arêtes
COST_MODE = "length"   # "length" ou "per_house"

df = pd.read_csv(CLEAN_CSV_PATH)

required_cols = {"id_batiment", "nb_maisons", "infra_id", "infra_type", "longueur"}
missing = [c for c in required_cols if c not in df.columns]
if missing:
    raise ValueError(f"Colonnes manquantes: {missing}")

# ——————————————————————————————————————————————
# 1) Graphe biparti Bâtiment <-> Infrastructure
# ——————————————————————————————————————————————
G_bip = nx.Graph()

# Ajout des nœuds bâtiments + infras
# On met un attribut "bipartite" pour distinguer les deux ensembles
for bid in df["id_batiment"].unique():
    G_bip.add_node(("B", bid), kind="building", bipartite="building")

for iid, row in df.drop_duplicates("infra_id")[["infra_id","infra_type"]].itertuples(index=False):
    G_bip.add_node(("I", iid), kind="infra", infra_type=row, bipartite="infra")

# Ajout des arêtes bâtiment <-> infra
# Attributs utiles: longueur, nb_maisons, infra_type
for _, r in df.iterrows():
    b_node = ("B", r["id_batiment"])
    i_node = ("I", r["infra_id"])
    G_bip.add_edge(
        b_node, i_node,
        longueur=float(r["longueur"]),
        nb_maisons=int(r["nb_maisons"]) if pd.notna(r["nb_maisons"]) else None,
        infra_type=r["infra_type"]
    )

# Exporte l’edge list bipartite pour inspection
edges_bip = []
for u, v, d in G_bip.edges(data=True):
    edges_bip.append({
        "u": u[1], "u_kind": u[0],
        "v": v[1], "v_kind": v[0],
        "longueur": d.get("longueur"),
        "nb_maisons": d.get("nb_maisons"),
        "infra_type": d.get("infra_type")
    })
edges_bip_df = pd.DataFrame(edges_bip)
edges_bip_df.to_csv("edges_bipartite_graph.csv", index=False)

# ——————————————————————————————————————————————
# 2) Graphe Bâtiments-uniquement (projection)
#    Principe : pour chaque infra_id, relier entre eux les bâtiments qui la partagent.
#    Pour éviter des cliques denses, on crée une simple chaîne (k-1 arêtes) en triant les id_batiment.
#    Le coût de l’infra est réparti uniformément sur ces (k-1) arêtes.
# ——————————————————————————————————————————————
G_building = nx.Graph()

# Ajoute tous les bâtiments comme nœuds (avec attributs éventuels)
for bid in df["id_batiment"].unique():
    # On peut stocker le nb_maisons max observé pour ce bâtiment (si utile)
    nb_maisons_b = df.loc[df["id_batiment"] == bid, "nb_maisons"].max()
    G_building.add_node(bid, nb_maisons=int(nb_maisons_b) if pd.notna(nb_maisons_b) else None)

# Regroupe par infra
for infra_id, g in df.groupby("infra_id"):
    batts = sorted(g["id_batiment"].unique().tolist())
    k = len(batts)
    if k < 2:
        # Une infra liée à un seul bâtiment ne crée pas d’arête dans le graphe bâtiment-bâtiment
        continue

    # Coût (longueur) total de l’infra
    longueur_total = float(g["longueur"].sum())
    # Nombre de maisons max sur cette infra (proxy de mutualisation)
    nb_maisons_max = int(g["nb_maisons"].max()) if pd.notna(g["nb_maisons"].max()) else 1

    # Répartition du coût sur (k-1) arêtes formant une chaîne
    denom = max(1, k - 1)

    if COST_MODE == "length":
        cost_edge = longueur_total / denom
    elif COST_MODE == "per_house":
        # coût “par maison” réparti sur les arêtes
        cost_edge = (longueur_total / max(1, nb_maisons_max)) / denom
    else:
        raise ValueError("COST_MODE invalide. Utilise 'length' ou 'per_house'.")

    # Construire la chaîne
    for a, b in zip(batts[:-1], batts[1:]):
        # Si une arête existe déjà (via une autre infra), on cumule les coûts
        if G_building.has_edge(a, b):
            G_building[a][b]["cost"] += cost_edge
            G_building[a][b]["infras"].add(infra_id)
            G_building[a][b]["longueur_total"] += longueur_total / denom
        else:
            G_building.add_edge(
                a, b,
                cost=cost_edge,
                infras={infra_id},
                longueur_total=longueur_total / denom,
                k_group=k,
                nb_maisons_max=nb_maisons_max
            )

# ——————————————————————————————————————————————
# 3) Diagnostics & export
# ——————————————————————————————————————————————
print("=== Graphe biparti ===")
print(f"- Nœuds: {G_bip.number_of_nodes()} | Arêtes: {G_bip.number_of_edges()}")

print("\n=== Graphe bâtiments-uniquement ===")
print(f"- Nœuds: {G_building.number_of_nodes()} | Arêtes: {G_building.number_of_edges()}")

# Composantes connexes (bâtiments)
components = list(nx.connected_components(G_building))
print(f"- Composantes connexes: {len(components)}")
if components:
    largest = max(components, key=len)
    print(f"- Plus grande composante: {len(largest)} nœuds")

# Export edge list pour G_building
edges_building = []
for u, v, d in G_building.edges(data=True):
    edges_building.append({
        "u": u,
        "v": v,
        "cost": d.get("cost"),
        "longueur_total_share": d.get("longueur_total"),
        "nb_infras_partagees": len(d.get("infras", [])),
        "k_group": d.get("k_group"),
        "nb_maisons_max": d.get("nb_maisons_max"),
    })
edges_building_df = pd.DataFrame(edges_building).sort_values("cost", ascending=True)
edges_building_df.to_csv("edges_building_graph.csv", index=False)

# (Optionnel) Sauvegarde en GraphML pour Gephi / yEd / etc.
try:
    nx.write_graphml(G_building, "G_building.graphml")
    nx.write_graphml(G_bip, "G_bip.graphml")
    print("\nGraphML exportés: G_building.graphml, G_bip.graphml")
except Exception as e:
    print(f"Export GraphML (optionnel) ignoré: {e}")

print("\nFichiers générés:")
print("- edges_building_graph.csv")
print("- edges_bipartite_graph.csv")


=== Graphe biparti ===
- Nœuds: 1025 | Arêtes: 5586

=== Graphe bâtiments-uniquement ===
- Nœuds: 381 | Arêtes: 533
- Composantes connexes: 1
- Plus grande composante: 381 nœuds
Export GraphML (optionnel) ignoré: GraphML does not support type <class 'set'> as data values.

Fichiers générés:
- edges_building_graph.csv
- edges_bipartite_graph.csv


In [3]:
import pandas as pd
import matplotlib.pyplot as plt

infra = pd.read_csv("infrastructures_mutualisees.csv")

top_n = 20  # ajuste si besoin
top = infra.sort_values(["nb_batiments", "difficulty_infra"], ascending=[False, True]).head(top_n)

plt.figure()
plt.bar(top["infra_id"].astype(str), top["nb_batiments"])
plt.xticks(rotation=90)
plt.title(f"Top {top_n} infrastructures par nb de bâtiments desservis")
plt.xlabel("infra_id")
plt.ylabel("Nombre de bâtiments")
plt.tight_layout()
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'infrastructures_mutualisees.csv'

In [5]:
# ============================================
# Plan de raccordement — Version Orientée Objet
# ============================================
# Hypothèses de colonnes (df): id_batiment, nb_maisons, infra_id, infra_type, longueur
# Métriques (adaptables) :
#   - difficulté(infra) = longueur * (1 + α*replace) * (1 + β*complex) * (1 - γ*simple)
#   - difficulté(bâtiment) = somme des difficultés des infras non réparées qui le raccordent
# Algorithme (itératif, commence par le plus facile) :
#   - choisir le bâtiment avec difficulté min
#   - "réparer" ses infras (elles sortent du calcul)
#   - réévaluer difficultés des voisins
#   - répéter
#
# Dunder methods:
#   - __repr__/__str__ pour affichage
#   - __lt__ pour trier (ordre par difficulté puis tie-break)
#   - @classmethod from_dataframe(...) pour construire l'objet réseau

from __future__ import annotations
from dataclasses import dataclass, field
from typing import Dict, Set, List, Tuple, Optional, Iterable
import pandas as pd
import numpy as np

# -------------------------
# Petites fonctions utilitaires
# -------------------------
def parse_flags(infra_type: str) -> Tuple[int,int,int]:
    """Détecte 'remplac', 'simple', 'complex/difficil' dans infra_type."""
    s = str(infra_type).lower() if pd.notna(infra_type) else ""
    replace_flag = int("remplac" in s)
    simple_flag  = int("simple" in s)
    complex_flag = int(("complex" in s) or ("difficil" in s))
    return replace_flag, simple_flag, complex_flag


@dataclass(order=False)
class Infrastructure:
    id: int | str
    longueur_total: float
    replace_any: int = 0
    simple_any: int = 0
    complex_any: int = 0
    batiments: Set[int] = field(default_factory=set)

    # paramètres de difficulté (globaux mais passés ici pour flexibilité)
    alpha_replace: float = 1.0   # pénalité si à remplacer
    beta_complex: float  = 0.5   # pénalité si complexe/difficile
    gamma_simple: float  = 0.3   # bonus si simple (réduit la difficulté)

    def difficulty(self) -> float:
        base = max(0.0, float(self.longueur_total))
        mult = (1.0 + self.alpha_replace*self.replace_any) \
             * (1.0 + self.beta_complex*self.complex_any) \
             * (1.0 - self.gamma_simple*self.simple_any)
        return max(0.0, base * mult)

    def __repr__(self):
        return (f"Infrastructure(id={self.id}, L={self.longueur_total:.2f}, "
                f"rep={self.replace_any}, simp={self.simple_any}, cmp={self.complex_any}, "
                f"bats={len(self.batiments)})")


@dataclass(order=False)
class Building:
    id: int
    nb_maisons: Optional[int] = None
    # références vers le réseau pour accéder aux infras non réparées
    _network: "Network" = field(default=None, repr=False, compare=False)

    # tie-break: favoriser zones avec bcp de remplacements / longueurs
    tb_w_replace: float = 1.0
    tb_w_length: float  = 0.2

    def current_infras(self) -> Iterable[Infrastructure]:
        for iid in self._network.b_to_i.get(self.id, set()):
            if iid in self._network.unrepaired:
                yield self._network.infras[iid]

    def difficulty(self) -> float:
        return float(sum(infra.difficulty() for infra in self.current_infras()))

    def tie_break(self) -> float:
        rep = 0.0
        length = 0.0
        for infra in self.current_infras():
            rep    += float(infra.replace_any)
            length += float(infra.longueur_total)
        return self.tb_w_replace * rep + self.tb_w_length * length

    # dunder pour tri : plus facile d'abord ; si égalité => tie-break décroissant
    def __lt__(self, other: "Building") -> bool:
        d1, d2 = self.difficulty(), other.difficulty()
        if not np.isclose(d1, d2):
            return d1 < d2
        return self.tie_break() > other.tie_break()

    def __repr__(self):
        return f"Building(id={self.id}, diff={self.difficulty():.3f}, tb={self.tie_break():.3f})"


class Network:
    def __init__(self,
                 buildings: Dict[int, Building],
                 infras: Dict[str | int, Infrastructure],
                 b_to_i: Dict[int, Set[str | int]]):
        self.buildings = buildings
        self.infras = infras
        self.b_to_i = b_to_i
        self.unrepaired: Set[str | int] = set(infras.keys())
        # back-reference
        for b in self.buildings.values():
            b._network = self

    @classmethod
    def from_dataframe(cls,
                       df: pd.DataFrame,
                       alpha_replace: float = 1.0,
                       beta_complex: float  = 0.5,
                       gamma_simple: float  = 0.3) -> "Network":
        required = {"id_batiment","infra_id","infra_type","longueur"}
        missing = [c for c in required if c not in df.columns]
        if missing:
            raise ValueError(f"Colonnes manquantes: {missing}")

        # flags & agrégation des infras
        flags = df["infra_type"].apply(parse_flags).apply(pd.Series)
        flags.columns = ["replace_flag","simple_flag","complex_flag"]
        dff = pd.concat([df.copy(), flags], axis=1)

        agg = (dff.groupby("infra_id", as_index=False)
                     .agg(longueur_total=("longueur","sum"),
                          replace_any=("replace_flag","max"),
                          simple_any=("simple_flag","max"),
                          complex_any=("complex_flag","max")))
        # créer objets Infrastructure
        infras: Dict[str | int, Infrastructure] = {}
        for row in agg.itertuples(index=False):
            infras[row.infra_id] = Infrastructure(
                id=row.infra_id,
                longueur_total=float(row.longueur_total),
                replace_any=int(row.replace_any),
                simple_any=int(row.simple_any),
                complex_any=int(row.complex_any),
                batiments=set(),
                alpha_replace=alpha_replace,
                beta_complex=beta_complex,
                gamma_simple=gamma_simple
            )

        # mapping bâtiment -> infras, et rattacher bâtiments aux infras
        b_to_i: Dict[int, Set[str | int]] = {}
        for r in dff[["id_batiment","infra_id"]].drop_duplicates().itertuples(index=False):
            b_to_i.setdefault(int(r.id_batiment), set()).add(r.infra_id)
            infras[r.infra_id].batiments.add(int(r.id_batiment))

        # créer objets Building
        buildings: Dict[int, Building] = {}
        nb_map = dff.groupby("id_batiment")["nb_maisons"].max().to_dict()
        for bid in sorted(b_to_i.keys()):
            buildings[bid] = Building(id=bid, nb_maisons=nb_map.get(bid))

        return cls(buildings=buildings, infras=infras, b_to_i=b_to_i)

    # opérations
    def repair_infras_of(self, bid: int) -> List[str | int]:
        """Marque comme réparées toutes les infras du bâtiment."""
        repaired = []
        for iid in list(self.b_to_i.get(bid, set())):
            if iid in self.unrepaired:
                self.unrepaired.remove(iid)
                repaired.append(iid)
        return repaired

    def building_order_plan(self) -> pd.DataFrame:
        """Algorithme itératif : commence par les plus faciles (somme des difficultés)."""
        remaining = set(self.buildings.keys())
        plan_rows = []
        cum_cost = 0.0
        step = 0

        while remaining:
            # tri grâce à __lt__ (difficulty asc, tie-break desc)
            sorted_bs = sorted((self.buildings[b] for b in remaining))
            best = sorted_bs[0]
            step += 1
            marginal = best.difficulty()
            cum_cost += marginal
            repaired = self.repair_infras_of(best.id)

            plan_rows.append({
                "ordre": step,
                "id_batiment": best.id,
                "cout_marginal": float(marginal),
                "cout_cumule": float(cum_cost),
                "nb_infras_reparees": len(repaired),
                "infras_reparees": ";".join(map(str, repaired)),
                "tie_break": float(best.tie_break())
            })
            remaining.remove(best.id)

        return pd.DataFrame(plan_rows)

    # représentation
    def __repr__(self):
        return f"Network(|B|={len(self.buildings)}, |I|={len(self.infras)})"


# ===========================
# Exécution sur ton fichier
# ===========================
# 1) Charger les données
#    -> adapte le chemin si besoin (ex: '/mnt/data/reseau_en_arbre_clean.csv')
CSV = "reseau_en_arbre_clean.csv"
df = pd.read_csv(CSV)

# 2) Construire le réseau (tu peux ajuster α, β, γ)
net = Network.from_dataframe(
    df,
    alpha_replace=1.2,   # pénalise plus les infras "à remplacer"
    beta_complex=0.6,    # pénalise un peu plus "complexe/difficile"
    gamma_simple=0.25    # bonus (réduction) si "simple"
)

print(net)  # __repr__

# 3) Générer le plan (POO)
plan = net.building_order_plan()

# 4) Résultats
print("\n=== APERÇU PLAN (top 20) ===")
print(plan.head(20))
print("\nCOUT CUMULÉ FINAL:", float(plan["cout_cumule"].iloc[-1]))
print("BÂTIMENTS COUVERTS:", len(plan))

# 5) Sauvegarde
plan_path = "/mnt/data/plan_raccordement_oo.csv"
plan.to_csv(plan_path, index=False)
print("\nExport:", plan_path)


ValueError: invalid literal for int() with base 10: 'E000001'

In [6]:
# analyse_topologie.py
# Requirements: pandas, networkx, matplotlib (optionnel pour la visualisation)

from __future__ import annotations
import pandas as pd
import networkx as nx
from dataclasses import dataclass
from typing import Optional, Dict, Any, List, Tuple
from pathlib import Path

@dataclass
class TopologySummary:
    n_nodes: int
    n_edges: int
    is_connected: bool
    n_components: int
    component_sizes: List[int]
    density: float
    avg_degree: float
    diameter_lcc: Optional[int]
    aspl_lcc: Optional[float]
    n_bridges: int
    n_articulation_points: int
    total_length: Optional[float]
    total_length_to_replace: Optional[float]
    total_length_intact: Optional[float]

class NetworkAnalyzer:
    """
    Analyse préliminaire de la topologie du réseau.
    Nœuds = bâtiments ; Arêtes = infrastructures.
    Attributs d'arêtes gérés si présents: length|longueur, cost|cout, status, houses_served|nb_maisons.
    """
    def __init__(self, csv_path: str | Path):
        self.csv_path = Path(csv_path)
        self.edges_df: Optional[pd.DataFrame] = None
        self.G: Optional[nx.Graph] = None

    # -------- Chargement & normalisation --------
    def load_data(self) -> pd.DataFrame:
        if not self.csv_path.exists():
            raise FileNotFoundError(f"CSV introuvable: {self.csv_path}")
        df = pd.read_csv(self.csv_path)

        # Vérif colonnes source/target
        possible_src = [c for c in df.columns if c.lower() in {"source","src","from","u"}]
        possible_tgt = [c for c in df.columns if c.lower() in {"target","tgt","to","v"}]
        if not possible_src or not possible_tgt:
            raise ValueError("Colonnes source/target non trouvées (attendues: source/target).")

        # Standardise les noms
        df = df.rename(columns={
            possible_src[0]: "source",
            possible_tgt[0]: "target"
        })

        # Normalise quelques attributs d'arêtes courants (optionnels)
        def pick(*names):
            for n in names:
                if n in df.columns: return n
            return None

        length_col = pick("length","longueur","len","distance","dist","cost","cout")
        status_col = pick("status","etat","state")
        houses_col = pick("houses_served","nb_maisons","houses","bâtiments desservis","batiments_servis","served")

        # Valeurs par défaut
        if length_col is None:
            df["length"] = 1.0
            length_col = "length"
        # status par défaut = "to_replace"
        if status_col is None:
            df["status"] = "to_replace"
            status_col = "status"

        # Nettoyage léger
        df["source"] = df["source"].astype(str)
        df["target"] = df["target"].astype(str)
        df[length_col] = pd.to_numeric(df[length_col], errors="coerce").fillna(1.0)
        df[status_col] = df[status_col].astype(str).str.lower().str.strip()
        if houses_col:
            df[houses_col] = pd.to_numeric(df[houses_col], errors="coerce").fillna(1).astype(int)

        # Conserve noms standardisés pour la suite
        self._length_col = length_col
        self._status_col = status_col
        self._houses_col = houses_col
        self.edges_df = df
        return df

    # -------- Construction du graphe --------
    def build_graph(self) -> nx.Graph:
        if self.edges_df is None:
            self.load_data()

        G = nx.Graph()
        attrs = ["source","target", self._length_col, self._status_col]
        if self._houses_col:
            attrs.append(self._houses_col)

        for _, row in self.edges_df.iterrows():
            u, v = row["source"], row["target"]
            data = {
                "length": float(row[self._length_col]),
                "status": row[self._status_col],
            }
            if self._houses_col:
                data["houses_served"] = int(row[self._houses_col])
            # Ajout arête
            G.add_edge(u, v, **data)

        self.G = G
        return G

    # -------- Métriques de topologie --------
    def compute_summary(self) -> TopologySummary:
        if self.G is None:
            self.build_graph()
        G = self.G

        n_nodes = G.number_of_nodes()
        n_edges = G.number_of_edges()
        is_connected = nx.is_connected(G) if n_nodes > 0 else False

        components = [len(c) for c in sorted(nx.connected_components(G), key=len, reverse=True)]
        n_components = len(components)
        component_sizes = components[:10]  # top 10 tailles

        density = nx.density(G) if n_nodes > 1 else 0.0
        avg_degree = (2*n_edges / n_nodes) if n_nodes > 0 else 0.0

        # LCC = plus grande composante
        diameter_lcc = None
        aspl_lcc = None
        if n_nodes > 0 and n_components >= 1:
            lcc_nodes = max(nx.connected_components(G), key=len)
            H = G.subgraph(lcc_nodes).copy()
            if H.number_of_nodes() > 1:
                # Diamètre et longueur moyenne des plus courts chemins
                try:
                    diameter_lcc = nx.diameter(H)
                except nx.NetworkXError:
                    diameter_lcc = None
                try:
                    aspl_lcc = nx.average_shortest_path_length(H, weight=None)
                except nx.NetworkXError:
                    aspl_lcc = None

        # Ponts & points d'articulation
        bridges = list(nx.bridges(G)) if n_edges > 0 else []
        apoints = list(nx.articulation_points(G)) if n_nodes > 0 else []

        # Stats d'attributs (si length/status)
        total_length = total_length_to_replace = total_length_intact = None
        if n_edges > 0:
            lengths = nx.get_edge_attributes(G, "length")
            statuses = nx.get_edge_attributes(G, "status")
            total_length = float(sum(lengths.values()))
            total_length_to_replace = float(sum(l for (e, l) in lengths.items() 
                                                if statuses.get(e, "to_replace") == "to_replace"))
            total_length_intact = total_length - total_length_to_replace

        return TopologySummary(
            n_nodes=n_nodes,
            n_edges=n_edges,
            is_connected=is_connected,
            n_components=n_components,
            component_sizes=component_sizes,
            density=density,
            avg_degree=avg_degree,
            diameter_lcc=diameter_lcc,
            aspl_lcc=aspl_lcc,
            n_bridges=len(bridges),
            n_articulation_points=len(apoints),
            total_length=total_length,
            total_length_to_replace=total_length_to_replace,
            total_length_intact=total_length_intact,
        )

    # -------- Rapports CSV optionnels --------
    def export_reports(self, out_dir: str | Path = "outputs") -> Dict[str, Path]:
        """
        Exporte: 
          - components.csv : composantes et leurs tailles
          - degree_stats.csv : degré des nœuds (degree, is_articulation)
          - bridges.csv : liste des ponts
        """
        if self.G is None:
            self.build_graph()
        G = self.G
        out_dir = Path(out_dir)
        out_dir.mkdir(parents=True, exist_ok=True)
        artifacts: Dict[str, Path] = {}

        # Composantes
        comps = []
        for i, comp in enumerate(sorted(nx.connected_components(G), key=len, reverse=True), start=1):
            for n in comp:
                comps.append({"component_id": i, "node": n, "size": len(comp)})
        df_comps = pd.DataFrame(comps).sort_values(["component_id","node"])
        p1 = out_dir / "components.csv"
        df_comps.to_csv(p1, index=False)
        artifacts["components"] = p1

        # Degrés & articulation
        deg = dict(G.degree())
        ap = set(nx.articulation_points(G)) if G.number_of_nodes() > 0 else set()
        df_deg = pd.DataFrame(
            [{"node": n, "degree": d, "is_articulation": n in ap} for n, d in deg.items()]
        ).sort_values(["degree","node"], ascending=[False, True])
        p2 = out_dir / "degree_stats.csv"
        df_deg.to_csv(p2, index=False)
        artifacts["degree_stats"] = p2

        # Ponts
        bridges = list(nx.bridges(G)) if G.number_of_edges() > 0 else []
        df_br = pd.DataFrame([{"u": u, "v": v} for u, v in bridges])
        p3 = out_dir / "bridges.csv"
        df_br.to_csv(p3, index=False)
        artifacts["bridges"] = p3

        return artifacts

    # -------- Visualisation rapide (optionnelle) --------
    def quick_plot(self, figsize: Tuple[int,int]=(10,8)) -> None:
        """
        Aperçu du graphe avec spring_layout. 
        Les arêtes 'to_replace' sont plus épaisses (pour les repérer facilement).
        """
        import matplotlib.pyplot as plt

        if self.G is None:
            self.build_graph()
        G = self.G

        pos = nx.spring_layout(G, seed=42)  # layout générique (pas géographique)

        # Sépare arêtes par status
        ed_intact = [(u,v) for u,v,d in G.edges(data=True) if d.get("status","to_replace") != "to_replace"]
        ed_replace = [(u,v) for u,v,d in G.edges(data=True) if d.get("status","to_replace") == "to_replace"]

        plt.figure(figsize=figsize)
        nx.draw_networkx_nodes(G, pos, node_size=60)
        # On ne fixe pas de couleurs personnalisées (simples traits)
        nx.draw_networkx_edges(G, pos, edgelist=ed_intact, width=1.0)
        nx.draw_networkx_edges(G, pos, edgelist=ed_replace, width=2.0, style="dashed")
        nx.draw_networkx_labels(G, pos, font_size=7)
        plt.title("Aperçu topologique — continu: intact / pointillé: à remplacer")
        plt.axis("off")
        plt.tight_layout()
        plt.show()


# ------------- Exemple d'utilisation -------------
if __name__ == "__main__":
    csv_path = "reseau_en_arbre.csv"  # adapte si besoin

    analyzer = NetworkAnalyzer(csv_path)
    analyzer.load_data()
    analyzer.build_graph()
    summary = analyzer.compute_summary()

    print("\n=== RÉSUMÉ TOPOLOGIQUE ===")
    print(f"Nodes: {summary.n_nodes}")
    print(f"Edges: {summary.n_edges}")
    print(f"Connected: {summary.is_connected}")
    print(f"Components: {summary.n_components}  (top sizes: {summary.component_sizes})")
    print(f"Density: {summary.density:.6f}   AvgDegree: {summary.avg_degree:.3f}")
    print(f"Diameter (LCC): {summary.diameter_lcc}   ASPL (LCC): {summary.aspl_lcc}")
    print(f"Bridges: {summary.n_bridges}   Articulation points: {summary.n_articulation_points}")
    if summary.total_length is not None:
        print(f"Total length: {summary.total_length:.2f}")
        print(f"  - to_replace: {summary.total_length_to_replace:.2f}")
        print(f"  - intact    : {summary.total_length_intact:.2f}")

    #Exports (décommente si tu veux des CSV)
    artifacts = analyzer.export_reports("outputs")
    print("CSV écrits:", artifacts)

    #Visualisation (décommente pour un aperçu)
    analyzer.quick_plot()


FileNotFoundError: CSV introuvable: reseau_en_arbre.csv

In [None]:
from __future__ import annotations
import pandas as pd, networkx as nx
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, List, Dict

# ---------- Config fichiers ----------
BASE = Path(".")
FP_BATS = BASE / "batiments.csv"
FP_INFRA = BASE / "infra.csv"
FP_LINKS = BASE / "reseau_en_arbre.csv"   # <<--- fournis ceci

OUT = BASE / "outputs"
OUT.mkdir(parents=True, exist_ok=True)

# ---------- Barèmes (annexe.docx) ----------
PRICE_PER_M = {"aerien": 500.0, "semi-aerien": 750.0, "fourreau": 900.0}
DUR_PER_M_H = {"aerien": 2.0,   "semi-aerien": 4.0,   "fourreau": 5.0}

def pick_col(df: pd.DataFrame, candidates: List[str]) -> Optional[str]:
    low = {c.lower(): c for c in df.columns}
    for cand in candidates:
        if cand in low:
            return low[cand]
    # tolérance accents/ponctuation
    import unicodedata, re
    norm = {re.sub(r"[^a-z0-9]","",unicodedata.normalize("NFD",c).encode("ascii","ignore").decode().lower()): c
            for c in df.columns}
    for cand in candidates:
        k = re.sub(r"[^a-z0-9]","",cand.lower())
        if k in norm: return norm[k]
    return None

def normalize_type(s: str) -> str:
    if not isinstance(s, str): return "aerien"
    import unicodedata
    s2 = unicodedata.normalize("NFD", s.strip().lower()).encode("ascii","ignore").decode()
    if "semi" in s2: return "semi-aerien"
    if "fourr" in s2 or "gaine" in s2 or "conduit" in s2: return "fourreau"
    return "aerien"

@dataclass
class TopologySummary:
    n_nodes: int
    n_edges: int
    is_connected: bool
    n_components: int
    component_sizes: List[int]
    density: float
    avg_degree: float
    diameter_lcc: Optional[int]
    aspl_lcc: Optional[float]
    n_bridges: int
    n_articulation_points: int
    total_length: float
    total_cost_to_replace: float
    total_duration_h: float

class Analyzer:
    def __init__(self, fp_bats: Path, fp_infra: Path, fp_links: Path):
        self.fp_bats, self.fp_infra, self.fp_links = fp_bats, fp_infra, fp_links
        self.bats = pd.read_csv(fp_bats)
        self.infra = pd.read_csv(fp_infra)
        self.links = pd.read_csv(fp_links)
        self.G: Optional[nx.Graph] = None
        self._prepare_tables()

    def _prepare_tables(self):
        # infra.csv: id_infra, type_infra
        id_infra = pick_col(self.infra, ["id_infra","infra_id","id"])
        type_infra = pick_col(self.infra, ["type_infra","type","categorie","pose","mode"])
        if not id_infra: raise ValueError("infra.csv doit contenir id_infra")
        if not type_infra: raise ValueError("infra.csv doit contenir type_infra")
        self.infra = self.infra.rename(columns={id_infra:"id_infra", type_infra:"type_infra"})
        self.infra["type_infra"] = self.infra["type_infra"].apply(normalize_type)

        # links: source, target, id_infra, length, status
        src = pick_col(self.links, ["source","from","u","src"])
        tgt = pick_col(self.links, ["target","to","v","tgt","dest"])
        lin = pick_col(self.links, ["length","longueur","len","distance","dist","metres","m"])
        status = pick_col(self.links, ["status","etat","state","a_remplacer","to_replace_flag"])
        link_infra = pick_col(self.links, ["id_infra","infra_id","id"])
        if not (src and tgt and link_infra):
            raise ValueError("La table des liaisons doit contenir au minimum: source, target, id_infra")
        if not lin:
            self.links["length"] = 1.0; lin = "length"
        if not status:
            self.links["status"] = "to_replace"; status = "status"

        self.links = self.links.rename(columns={src:"source", tgt:"target", lin:"length", status:"status", link_infra:"id_infra"})
        self.links["source"] = self.links["source"].astype(str)
        self.links["target"] = self.links["target"].astype(str)
        self.links["id_infra"] = self.links["id_infra"].astype(str)
        self.links["length"] = pd.to_numeric(self.links["length"], errors="coerce").fillna(1.0)
        self.links["status"] = self.links["status"].astype(str).str.lower().str.strip()

        # join pour obtenir le type
        self.links = self.links.merge(self.infra, on="id_infra", how="left")
        self.links["type_infra"] = self.links["type_infra"].fillna("aerien").apply(normalize_type)

        # coûts/durées par arête si à remplacer
        self.links["edge_cost"] = self.links.apply(
            lambda r: PRICE_PER_M.get(r["type_infra"], 500.0) * float(r["length"]) if r["status"] in ("to_replace","1","true","yes","y") else 0.0,
            axis=1
        )
        self.links["edge_duration_h"] = self.links.apply(
            lambda r: DUR_PER_M_H.get(r["type_infra"], 2.0) * float(r["length"]) if r["status"] in ("to_replace","1","true","yes","y") else 0.0,
            axis=1
        )

    def build_graph(self) -> nx.Graph:
        G = nx.Graph()
        for _, r in self.links.iterrows():
            G.add_edge(
                r["source"], r["target"],
                id_infra=r["id_infra"],
                length=float(r["length"]),
                status=r["status"],
                type=r["type_infra"],
                edge_cost=float(r["edge_cost"]),
                edge_duration_h=float(r["edge_duration_h"]),
            )
        # ajoute les bâtiments isolés éventuels
        id_b = pick_col(self.bats, ["id_batiment","building_id","id","node"])
        if id_b:
            for b in self.bats[id_b].astype(str).unique():
                if b not in G: G.add_node(b)
        self.G = G
        return G

    def summary(self) -> TopologySummary:
        if self.G is None: self.build_graph()
        G = self.G
        n_nodes = G.number_of_nodes()
        n_edges = G.number_of_edges()
        is_conn = nx.is_connected(G) if n_nodes > 0 else False
        comps = [len(c) for c in sorted(nx.connected_components(G), key=len, reverse=True)]
        dens = nx.density(G) if n_nodes > 1 else 0.0
        avg_deg = (2*n_edges / n_nodes) if n_nodes > 0 else 0.0

        # LCC
        diam = aspl = None
        if n_nodes > 1 and comps:
            lcc = max(nx.connected_components(G), key=len)
            H = G.subgraph(lcc).copy()
            if H.number_of_nodes() > 1 and H.number_of_edges() > 0:
                try: diam = nx.diameter(H)
                except: pass
                try: aspl = nx.average_shortest_path_length(H)
                except: pass

        bridges = list(nx.bridges(G)) if n_edges > 0 else []
        arts = list(nx.articulation_points(G)) if n_nodes > 0 else []
        lengths = nx.get_edge_attributes(G, "length")
        total_length = float(sum(lengths.values())) if lengths else 0.0
        total_cost = float(sum(nx.get_edge_attributes(G, "edge_cost").values() or [0.0]))
        total_dur = float(sum(nx.get_edge_attributes(G, "edge_duration_h").values() or [0.0]))

        return TopologySummary(
            n_nodes, n_edges, is_conn, len(comps), comps[:10], dens, avg_deg,
            diam, aspl, len(bridges), len(arts), total_length, total_cost, total_dur
        )

    def export_reports(self):
        if self.G is None: self.build_graph()
        G = self.G
        # composantes
        comp_rows = []
        for i, comp in enumerate(sorted(nx.connected_components(G), key=len, reverse=True), start=1):
            for n in comp:
                comp_rows.append({"component_id": i, "node": n, "size": len(comp)})
        pd.DataFrame(comp_rows).to_csv(OUT / "components.csv", index=False)
        # degrés + points d’articulation
        deg = dict(G.degree())
        arts = set(nx.articulation_points(G)) if G.number_of_nodes() > 0 else set()
        pd.DataFrame([{"node": n, "degree": d, "is_articulation": n in arts} for n,d in deg.items()])\
          .sort_values(["degree","node"], ascending=[False, True])\
          .to_csv(OUT / "degree_stats.csv", index=False)
        # ponts
        pd.DataFrame([{"u":u,"v":v} for (u,v) in (list(nx.bridges(G)) if G.number_of_edges()>0 else [])])\
          .to_csv(OUT / "bridges.csv", index=False)
        # arêtes enrichies
        self.links.to_csv(OUT / "edges_with_costs.csv", index=False)

if __name__ == "__main__":
    analyzer = Analyzer(FP_BATS, FP_INFRA, FP_LINKS)
    analyzer.build_graph()
    s = analyzer.summary()
    print("=== RÉSUMÉ TOPOLOGIQUE ===")
    print(f"Nodes: {s.n_nodes}  Edges: {s.n_edges}  Connected: {s.is_connected}")
    print(f"Components: {s.n_components} (top sizes: {s.component_sizes})")
    print(f"Density: {s.density:.6f}   AvgDegree: {s.avg_degree:.3f}")
    print(f"Diameter (LCC): {s.diameter_lcc}   ASPL (LCC): {s.aspl_lcc}")
    print(f"Bridges: {s.n_bridges}   Articulation points: {s.n_articulation_points}")
    print(f"Total length: {s.total_length:.2f} m")
    print(f"Total COST to_replace: {s.total_cost_to_replace:,.2f} €")
    print(f"Total DURATION to_replace: {s.total_duration_h:,.2f} h")
    analyzer.export_reports()
    print(f"\nRapports dans: {OUT.resolve()}")


In [9]:
from dataclasses import dataclass, field
from typing import Dict, List, Tuple, Set
import pandas as pd
import numpy as np
import networkx as nx
import json, os

# ---------------------------
# PARAMÈTRES RAPIDES (à adapter)
# ---------------------------
PATH_XLSX = "reseau_en_arbre.xlsx" 
SHEET_NAME = 0                                
OUT_DIR = "outputs_raccordement"    

# Pondérations / coût
ALPHA = 1.0           
COUT_UNITAIRE = 1.0    
DELTA_REMPLACEMENT = 0.5  
BUDGET = None          

os.makedirs(OUT_DIR, exist_ok=True)
OUT_ORDRE = os.path.join(OUT_DIR, "ordre_de_raccordement.csv")
OUT_TRAVAUX = os.path.join(OUT_DIR, "travaux_par_etape.csv")
OUT_KPIS = os.path.join(OUT_DIR, "kpis.json")
OUT_XLSX = os.path.join(OUT_DIR, "planification_raccordement.xlsx")

--
def std_colnames(df: pd.DataFrame) -> pd.DataFrame:
    return df.rename(columns={c: str(c).strip().lower().replace(" ", "_") for c in df.columns})

def normalize_etat(val) -> str:
    if pd.isna(val): return "intacte"
    s = str(val).strip().lower().replace("à", "a")
    if "remplac" in s or "a_remplacer" in s: return "a_remplacer"
    if "intact" in s or "ok" in s or "neuf" in s: return "intacte"
    return s

# ---------------------------
# Lecture du fichier
# ---------------------------
def read_input(path, sheet_name=0) -> pd.DataFrame:
    try:
        df = pd.read_excel(path, sheet_name=sheet_name)
    except Exception:
        # fallback CSV si jamais
        df = pd.read_csv(path)
    return std_colnames(df)

edges_raw = read_input(PATH_XLSX, SHEET_NAME)


has_edge_list = {"source", "target"}.issubset(edges_raw.columns)
has_bipartite = {"id_batiment", "infra_id"}.issubset(edges_raw.columns)

if not (has_edge_list or has_bipartite):
    raise ValueError(
        f"Colonnes détectées: {list(edges_raw.columns)}\n"
        "Le fichier doit suivre l'un des schémas :\n"
        "- Edge list: source, target [, longueur, cout, etat, nb_maisons]\n"
        "- Biparti: id_batiment, infra_id [, infra_type, longueur, nb_maisons, etat]"
    )


@dataclass
class InfrastructureEL:
    eid: int
    u: str
    v: str
    longueur: float
    cout: float
    etat: str
    nb_maisons: int
    type_intervention: str
    repaired: bool = False

    def difficulte(self, alpha=ALPHA, delta_remplacement=DELTA_REMPLACEMENT) -> float:
        cout_eff = self.cout * (1 + (delta_remplacement if self.etat == "a_remplacer" else 0.0))
        denom = max(1, self.nb_maisons)
        mix = alpha * cout_eff + (1 - alpha) * (self.longueur / denom)
        return 0.0 if self.repaired else float(mix)

@dataclass
class BatimentEL:
    bid: str
    infras: List[int] = field(default_factory=list)

class ReseauEL:
    def __init__(self):
        self.G = nx.Graph()
        self.infras: Dict[int, InfrastructureEL] = {}
        self.batiments: Dict[str, BatimentEL] = {}
        self.repaired_edges: Set[int] = set()

    def add_infra(self, infra: InfrastructureEL):
        self.infras[infra.eid] = infra
        self.G.add_edge(infra.u, infra.v, eid=infra.eid)
        for node in (infra.u, infra.v):
            if node not in self.batiments:
                self.batiments[node] = BatimentEL(node, [])
            self.batiments[node].infras.append(infra.eid)

    def set_repaired(self, eid: int):
        self.infras[eid].repaired = True
        self.repaired_edges.add(eid)

    def edge_weight(self, eid: int) -> float:
        return self.infras[eid].difficulte()

    def path_incremental_cost(self, path_nodes: List[str]) -> Tuple[float, List[int]]:
        total, used_eids = 0.0, []
        for u, v in zip(path_nodes[:-1], path_nodes[1:]):
            eid = self.G[u][v]["eid"]
            w = self.edge_weight(eid)
            if w > 0:
                total += w
                used_eids.append(eid)
        return total, used_eids

    def multi_source_shortest_path(self, sources: Set[str]) -> Dict[str, Tuple[float, List[str]]]:
        H = nx.Graph()
        for u, v, d in self.G.edges(data=True):
            H.add_edge(u, v, weight=self.edge_weight(d["eid"]))
        SUP = "__SUP__"
        for s in sources:
            H.add_edge(SUP, s, weight=0.0)
        dist, paths = nx.single_source_dijkstra(H, SUP, weight="weight")
        dist.pop(SUP, None); paths.pop(SUP, None)
        out = {}
        for node, p in paths.items():
            if p and p[0] == SUP: p = p[1:]
            cost, used_eids = self.path_incremental_cost(p)
            out[node] = (cost, p)
        return out

def run_pipeline_edge_list(edges: pd.DataFrame):
    # Colonnes optionnelles
    for new_name, candidates, default in [
        ("longueur", ["longueur","length","len_m","distance"], np.nan),
        ("cout", ["cout","cost","prix","price"], np.nan),
        ("etat", ["etat","state","condition"], "intacte"),
        ("nb_maisons", ["nb_maisons","maisons","prises","nb_prises"], np.nan),
        ("type_intervention", ["type_intervention","intervention"], np.nan),
    ]:
        if new_name not in edges.columns:
            for c in candidates:
                if c in edges.columns:
                    edges[new_name] = edges[c]; break
            if new_name not in edges.columns:
                edges[new_name] = default

    edges["etat"] = edges["etat"].map(normalize_etat)
    edges["nb_maisons"] = edges["nb_maisons"].fillna(1).astype(int)
    if edges["cout"].isna().all() and edges["longueur"].isna().all():
        edges["longueur"] = 1.0
    edges["cout"] = edges["cout"].fillna(0.0)
    mask = edges["cout"] == 0.0
    edges.loc[mask, "cout"] = edges.loc[mask, "longueur"].fillna(0.0) * COUT_UNITAIRE
    edges["longueur"] = edges["longueur"].fillna(0.0)
    edges["type_intervention"] = edges["type_intervention"].fillna(
        edges["etat"].map(lambda s: "remplacement" if s == "a_remplacer" else "pose/reutilisation")
    )

    reseau = ReseauEL()
    for i, row in edges.reset_index(drop=True).iterrows():
        inf = InfrastructureEL(
            eid=int(i),
            u=str(row["source"]), v=str(row["target"]),
            longueur=float(row["longueur"] or 0.0),
            cout=float(row["cout"] or 0.0),
            etat=normalize_etat(row["etat"]),
            nb_maisons=int(row["nb_maisons"] or 1),
            type_intervention=str(row["type_intervention"])
        )
        reseau.add_infra(inf)

    # Phase 0 — amorçage par l’arête minimale si aucune source fournie
    sources: Set[str] = set()
    if not sources:
        best_eid = min(reseau.infras, key=lambda e: reseau.infras[e].difficulte())
        best = reseau.infras[best_eid]
        reseau.set_repaired(best_eid)
        sources.update([best.u, best.v])

    # Boucle greedy
    ordre_rows, travaux_rows = [], []
    cout_cumule, longueur_reparee, etape = 0.0, 0.0, 0
    raccordes: Set[str] = set(sources)
    pop_par_bat = {b: 1 for b in reseau.batiments}
    prises_cumulees = sum(pop_par_bat.get(n, 0) for n in raccordes if n in pop_par_bat)
    total_population = sum(pop_par_bat.values())

    # Log phase 0
    for b in sorted(raccordes):
        ordre_rows.append({"rang": etape, "batiment": b, "cout_incremental": 0.0,
                           "cout_cumule": cout_cumule, "prises_cumulees": prises_cumulees,
                           "aretes_reparees": []})

    while True:
        candidats = set(reseau.batiments.keys()) - raccordes
        if not candidats: break
        dist_info = reseau.multi_source_shortest_path(raccordes)
        dist_info = {n:v for n,v in dist_info.items() if n in candidats}
        if not dist_info: break

        def key_fn(item):
            n,(c,p) = item
            return (c, len(p), str(n))
        n_best, (cost_best, path_best) = min(dist_info.items(), key=key_fn)
        if BUDGET is not None and (cout_cumule + cost_best) > BUDGET: break

        etape += 1
        repaired_eids = []
        for u, v in zip(path_best[:-1], path_best[1:]):
            eid = reseau.G[u][v]["eid"]
            if not reseau.infras[eid].repaired:
                reseau.set_repaired(eid)
                repaired_eids.append(eid)
                longueur_reparee += reseau.infras[eid].longueur

        cout_cumule += cost_best
        raccordes.add(n_best)
        prises_cumulees += pop_par_bat.get(n_best, 1)
        ordre_rows.append({"rang": etape, "batiment": n_best, "cout_incremental": round(cost_best,3),
                           "cout_cumule": round(cout_cumule,3), "prises_cumulees": int(prises_cumulees),
                           "aretes_reparees": repaired_eids})
        for eid in repaired_eids:
            inf = reseau.infras[eid]
            travaux_rows.append({"etape": etape, "eid": eid, "u": inf.u, "v": inf.v,
                                 "longueur": inf.longueur, "cout": inf.cout,
                                 "etat_initial": inf.etat, "type_intervention": inf.type_intervention})

    df_ordre = pd.DataFrame(ordre_rows)
    df_travaux = pd.DataFrame(travaux_rows)
    kpis = {
        "cout_total": float(round(cout_cumule, 3)),
        "prises_total": int(total_population),
        "prises_reconnectees": int(prises_cumulees),
        "taux_reconnexion": float(round((prises_cumulees/total_population*100),2)) if total_population else None,
        "longueur_reparee": float(round(longueur_reparee,2)),
        "nb_etapes": int(etape)
    }
    return df_ordre, df_travaux, kpis, edges


def run_pipeline_bipartite(edges: pd.DataFrame):
    # colonnes minimales garanties
    if "nb_maisons" not in edges.columns: edges["nb_maisons"] = 1
    if "longueur" not in edges.columns: edges["longueur"] = 1.0
    if "infra_type" not in edges.columns: edges["infra_type"] = "ligne"
    if "etat" not in edges.columns: edges["etat"] = "intacte"
    edges["etat"] = edges["etat"].map(normalize_etat)

    G = nx.Graph()
    # noeuds B:<id> pour bâtiments, I:<id> pour infrastructures
    for _, r in edges.iterrows():
        b = f"B:{r['id_batiment']}"; i = f"I:{r['infra_id']}"
        if b not in G: G.add_node(b, kind="batiment", nb_maisons=int(r["nb_maisons"]))
        if i not in G: G.add_node(i, kind="infra", infra_type=str(r["infra_type"]), etat=str(r["etat"]))
        # arête B-I (poids = coût proxy)
        G.add_edge(b, i,
                   longueur=float(r["longueur"]),
                   cout=float(r["longueur"]) * COUT_UNITAIRE,
                   repaired=False)

    def edge_difficulte(u, v) -> float:
        data = G[u][v]
        cout = data.get("cout", data.get("longueur", 1.0)*COUT_UNITAIRE)
        # état depuis le nœud infra
        etat = G.nodes[u].get("etat") if str(u).startswith("I:") else G.nodes[v].get("etat")
        if data.get("repaired", False): return 0.0
        return cout * (1 + (DELTA_REMPLACEMENT if normalize_etat(etat) == "a_remplacer" else 0.0))

    # Amorçage : arête (B-I) la moins coûteuse
    min_edge = min(G.edges(), key=lambda e: edge_difficulte(e[0], e[1]))
    u0, v0 = min_edge
    G[u0][v0]["repaired"] = True
    sources: Set[str] = {u0, v0}

    def dijkstra_from_sources(G, sources: Set[str]):
        H = nx.Graph()
        for u, v in G.edges():
            H.add_edge(u, v, weight=edge_difficulte(u, v))
        SUP = "__SUP__"
        for s in sources: H.add_edge(SUP, s, weight=0.0)
        dist, paths = nx.single_source_dijkstra(H, SUP, weight="weight")
        dist.pop(SUP, None); paths.pop(SUP, None)
        for n, p in list(paths.items()):
            if p and p[0] == SUP: paths[n] = p[1:]
        return dist, paths

    def path_incremental_cost(G, path: List[str]) -> Tuple[float, List[Tuple[str,str]]]:
        total, repaired_edges = 0.0, []
        for u, v in zip(path[:-1], path[1:]):
            if not G[u][v].get("repaired", False):
                total += edge_difficulte(u, v)
                repaired_edges.append((u, v))
        return total, repaired_edges

    batiments = [n for n, d in G.nodes(data=True) if d.get("kind") == "batiment"]
    pop_par_bat = {b: G.nodes[b].get("nb_maisons",1) for b in batiments}
    raccordes: Set[str] = set(sources)
    etape, cout_cumule, longueur_reparee = 0, 0.0, 0.0

    prises_cumulees = sum(pop_par_bat.get(n,0) for n in raccordes if n in pop_par_bat)
    total_population = int(sum(pop_par_bat.values()))

    ordre_rows, travaux_rows = [], []
    # phase 0
    for n in sorted(raccordes):
        ordre_rows.append({"rang": etape, "batiment": n if n.startswith("B:") else None,
                           "cout_incremental": 0.0, "cout_cumule": cout_cumule,
                           "prises_cumulees": int(prises_cumulees), "aretes_reparees": []})

    # boucle
    while True:
        candidats = set(batiments) - raccordes
        if not candidats: break
        dist, paths = dijkstra_from_sources(G, raccordes)
        cand_info = {b:(dist.get(b,np.inf), paths.get(b,[])) for b in candidats}
        cand_info = {b:t for b,t in cand_info.items() if np.isfinite(t[0])}
        if not cand_info: break

        def key_fn(item):
            b,(c,p) = item
            return (c, len(p), str(b))
        b_best,(c_best,p_best) = min(cand_info.items(), key=key_fn)
        if BUDGET is not None and (cout_cumule + c_best) > BUDGET: break

        etape += 1
        repaired_edges = []
        for u,v in zip(p_best[:-1], p_best[1:]):
            if not G[u][v].get("repaired", False):
                G[u][v]["repaired"] = True
                repaired_edges.append((u,v))
                longueur_reparee += float(G[u][v].get("longueur",0.0))

        cout_cumule += c_best
        raccordes.add(b_best)
        prises_cumulees += int(pop_par_bat.get(b_best,0))

        ordre_rows.append({"rang": etape, "batiment": b_best, "cout_incremental": float(round(c_best,3)),
                           "cout_cumule": float(round(cout_cumule,3)), "prises_cumulees": int(prises_cumulees),
                           "aretes_reparees": repaired_edges})
        for (u,v) in repaired_edges:
            infra_node = u if str(u).startswith("I:") else (v if str(v).startswith("I:") else None)
            etat_initial = G.nodes[infra_node].get("etat","intacte") if infra_node else "intacte"
            type_interv = "remplacement" if normalize_etat(etat_initial)=="a_remplacer" else "pose/reutilisation"
            travaux_rows.append({"etape": etape, "u": u, "v": v,
                                 "longueur": float(G[u][v].get("longueur",0.0)),
                                 "cout_proxy": float(G[u][v].get("cout",0.0)),
                                 "etat_initial": etat_initial, "type_intervention": type_interv})

    df_ordre = pd.DataFrame(ordre_rows)
    df_travaux = pd.DataFrame(travaux_rows)
    kpis = {
        "cout_total_proxy": float(round(cout_cumule,3)),
        "prises_total": int(total_population),
        "prises_reconnectees": int(prises_cumulees),
        "taux_reconnexion": float(round((prises_cumulees/total_population*100),2)) if total_population else None,
        "longueur_reparee_totale": float(round(longueur_reparee,2)),
        "nb_etapes": int(etape)
    }
    return df_ordre, df_travaux, kpis, edges

# ---------------------------
# Exécution selon schéma détecté
# ---------------------------
if has_edge_list:
    df_ordre, df_travaux, kpis, df_entree = run_pipeline_edge_list(edges_raw.copy())
else:
    df_ordre, df_travaux, kpis, df_entree = run_pipeline_bipartite(edges_raw.copy())

# ---------------------------
# Exports & aperçu
# ---------------------------
df_ordre.to_csv(OUT_ORDRE, index=False)
df_travaux.to_csv(OUT_TRAVAUX, index=False)
with open(OUT_KPIS, "w", encoding="utf-8") as f:
    json.dump(kpis, f, ensure_ascii=False, indent=2)

with pd.ExcelWriter(OUT_XLSX, engine="xlsxwriter") as xw:
    df_entree.to_excel(xw, index=False, sheet_name="entree")
    df_ordre.to_excel(xw, index=False, sheet_name="ordre_raccordement")
    df_travaux.to_excel(xw, index=False, sheet_name="travaux_par_etape")

print("KPIs:", kpis)
print("Exports :")
print(" -", OUT_ORDRE)
print(" -", OUT_TRAVAUX)
print(" -", OUT_KPIS)
print(" -", OUT_XLSX)

# (Option) afficher un aperçu dans le notebook
try:
    display(df_ordre.head(10))
    display(df_travaux.head(10))
except NameError:
    pass


✅ Pipeline terminé.
KPIs: {'cout_total_proxy': 2652.819, 'prises_total': 389, 'prises_reconnectees': 389, 'taux_reconnexion': 100.0, 'longueur_reparee_totale': 2652.82, 'nb_etapes': 380}
Exports :
 - outputs_raccordement\ordre_de_raccordement.csv
 - outputs_raccordement\travaux_par_etape.csv
 - outputs_raccordement\kpis.json
 - outputs_raccordement\planification_raccordement.xlsx


Unnamed: 0,rang,batiment,cout_incremental,cout_cumule,prises_cumulees,aretes_reparees
0,0,B:E000228,0.0,0.0,1,[]
1,0,,0.0,0.0,1,[]
2,1,B:E000229,5.609,5.609,2,"[(B:E000228, I:P008012), (I:P008012, B:E000229)]"
3,2,B:E000260,2.804,8.413,3,"[(I:P008012, B:E000260)]"
4,3,B:E000275,2.804,11.217,4,"[(I:P008012, B:E000275)]"
5,4,B:E000303,2.804,14.022,5,"[(I:P008012, B:E000303)]"
6,5,B:E000317,2.804,16.826,6,"[(I:P008012, B:E000317)]"
7,6,B:E000327,2.804,19.63,7,"[(I:P008012, B:E000327)]"
8,7,B:E000358,2.804,22.435,8,"[(I:P008012, B:E000358)]"
9,8,B:E000363,2.804,25.239,9,"[(I:P008012, B:E000363)]"


Unnamed: 0,etape,u,v,longueur,cout_proxy,etat_initial,type_intervention
0,1,B:E000228,I:P008012,2.804326,2.804326,intacte,pose/reutilisation
1,1,I:P008012,B:E000229,2.804326,2.804326,intacte,pose/reutilisation
2,2,I:P008012,B:E000260,2.804326,2.804326,intacte,pose/reutilisation
3,3,I:P008012,B:E000275,2.804326,2.804326,intacte,pose/reutilisation
4,4,I:P008012,B:E000303,2.804326,2.804326,intacte,pose/reutilisation
5,5,I:P008012,B:E000317,2.804326,2.804326,intacte,pose/reutilisation
6,6,I:P008012,B:E000327,2.804326,2.804326,intacte,pose/reutilisation
7,7,I:P008012,B:E000358,2.804326,2.804326,intacte,pose/reutilisation
8,8,I:P008012,B:E000363,2.804326,2.804326,intacte,pose/reutilisation
9,9,I:P008012,B:E000370,2.804326,2.804326,intacte,pose/reutilisation


In [8]:
!pip install xlsxwriter openpyxl


Collecting xlsxwriter
  Downloading xlsxwriter-3.2.9-py3-none-any.whl.metadata (2.7 kB)
Downloading xlsxwriter-3.2.9-py3-none-any.whl (175 kB)
Installing collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.9
