In [2]:
import pandas as pd
import numpy as np

# ================================
# ⚙️ 1. Chargement des données
# ================================
df_bat = pd.read_csv("batiments_avec_infras.csv")
df_infra = pd.read_csv("donnees_infrastructures_complet.csv")

# Normalisation des colonnes
df_infra.columns = df_infra.columns.str.strip()
df_bat.columns = df_bat.columns.str.strip()

df_infra["type_infra"] = df_infra["type_infra"].str.lower().str.strip()
df_infra["infra_type"] = df_infra["infra_type"].str.lower().str.strip()
df_bat["type_batiment"] = df_bat["type_batiment"].str.lower().str.strip()

# ================================
# 💰 2. Tarifs et durées par type
# ================================
prix_par_m = {"aerien": 500, "semi-aerien": 750, "fourreau": 900}
temps_par_m = {"aerien": 2, "semi-aerien": 4, "fourreau": 5}

df_infra["prix_m"] = df_infra["type_infra"].map(prix_par_m)
df_infra["duree_h_m"] = df_infra["type_infra"].map(temps_par_m)
df_infra["prix"] = df_infra["longueur"] * df_infra["prix_m"]
df_infra["duree"] = df_infra["longueur"] * df_infra["duree_h_m"]

# Marquer infrastructures à réparer
df_infra["a_reparer"] = df_infra["infra_type"].str.contains("remplacer", case=False, na=False)

# ================================
# 🧠 3. Calcul difficulté initiale par bâtiment
# ================================
def calculer_difficulte_batiment(bat_id):
    lignes = df_infra[df_infra["id_batiment"] == bat_id]
    lignes_a_reparer = lignes[lignes["a_reparer"]]
    if lignes_a_reparer.empty:
        return 0, 0, 0, 0, []
    difficulte = (lignes_a_reparer["longueur"] / (lignes_a_reparer.get("nb_maisons",1) + 1e-6)).sum()
    cout = lignes_a_reparer["prix"].sum()
    duree = lignes_a_reparer["duree"].sum()
    nb = len(lignes_a_reparer)
    liste_infras = lignes_a_reparer["infra_id"].tolist()
    return difficulte, cout, duree, nb, liste_infras

bat_metrics = []
for bat_id in df_bat["id_batiment"]:
    difficulte, cout, duree, nb, liste_infras = calculer_difficulte_batiment(bat_id)
    bat_metrics.append([bat_id, difficulte, cout, duree, nb, liste_infras])

df_bat_metrics = pd.DataFrame(
    bat_metrics,
    columns=["id_batiment", "difficulte", "cout_total", "duree_totale", "nb_infras_a_remplacer", "liste_infras_a_remplacer"]
).merge(df_bat[["id_batiment", "type_batiment", "nb_maisons"]], on="id_batiment", how="left")

# ================================
# 🩺 4. Priorisation : hopital > ecole > habitation
# ================================
priorite = {"hôpital": 1, "ecole": 2, "habitation": 3}
df_bat_metrics["priorite"] = df_bat_metrics["type_batiment"].map(priorite).fillna(4)

# ================================
# 🧩 5. Planification par phases avec score combiné incluant le temps
# ================================
phases_stats = []
infras_reparees = set()

# Phase 0 : bâtiments sans travaux
df_phase0 = df_bat_metrics[df_bat_metrics["nb_infras_a_remplacer"] == 0].copy()
df_phase0["phase"] = 0
phases_stats.append({
    "phase": 0,
    "nb_batiments": len(df_phase0),
    "nb_maisons": df_phase0["nb_maisons"].sum(),
    "nb_infras_reparees": 0,
    "cout_total": 0,
    "duree_totale": 0,
    "liste_infras_reparees": []
})

# Bâtiments à réparer
to_fix = df_bat_metrics[df_bat_metrics["nb_infras_a_remplacer"] > 0].copy()
phase_num = 1

# Coefficients pour score combiné : priorité, difficulté, coût, durée
alpha, beta, gamma, delta = 0.4, 0.3, 0.2, 0.1

while not to_fix.empty:
    # Normalisation pour calcul du score combiné
    to_fix["difficulte_norm"] = to_fix["difficulte"] / (to_fix["difficulte"].max() + 1e-6)
    to_fix["cout_norm"] = to_fix["cout_total"] / (to_fix["cout_total"].max() + 1e-6)
    to_fix["priorite_norm"] = to_fix["priorite"] / (to_fix["priorite"].max() + 1e-6)
    to_fix["duree_norm"] = to_fix["duree_totale"] / (to_fix["duree_totale"].max() + 1e-6)
    
    # Score combiné : plus petit = priorité haute
    to_fix["score_combine"] = (alpha*to_fix["priorite_norm"] 
                               + beta*to_fix["difficulte_norm"] 
                               + gamma*to_fix["cout_norm"] 
                               + delta*to_fix["duree_norm"])
    
    to_fix = to_fix.sort_values(by="score_combine", ascending=True)
    repaired_this_phase = []

    # Choisir le bâtiment le plus prioritaire
    row = to_fix.iloc[0]
    bat_id = row["id_batiment"]
    infras_a_reparer_phase = [i for i in row["liste_infras_a_remplacer"] if i not in infras_reparees]
    
    if not infras_a_reparer_phase:
        to_fix = to_fix.iloc[1:]
        continue
    
    # Ajouter infrastructures réparées
    infras_reparees.update(infras_a_reparer_phase)
    repaired_this_phase.append(row)

    # Recalculer difficultés, coûts et durées des autres bâtiments
    recalcul = []
    for i, r in to_fix.iterrows():
        nouvelles_infras = [i_id for i_id in r["liste_infras_a_remplacer"] if i_id not in infras_reparees]
        difficulte, cout, duree, nb, liste_infras = 0, 0, 0, 0, []
        if nouvelles_infras:
            lignes = df_infra[(df_infra["infra_id"].isin(nouvelles_infras)) & (df_infra["id_batiment"] == r["id_batiment"])]
            difficulte = (lignes["longueur"] / (lignes.get("nb_maisons",1) + 1e-6)).sum()
            cout = lignes["prix"].sum()
            duree = lignes["duree"].sum()
            nb = len(lignes)
            liste_infras = lignes["infra_id"].tolist()
        recalcul.append([r["id_batiment"], difficulte, cout, duree, nb, liste_infras])
        
    recalcul_df = pd.DataFrame(recalcul, columns=["id_batiment", "difficulte", "cout_total", "duree_totale", "nb_infras_a_remplacer", "liste_infras_a_remplacer"])
    to_fix = to_fix.drop(columns=["difficulte","cout_total","duree_totale","nb_infras_a_remplacer","liste_infras_a_remplacer"]).merge(recalcul_df, on="id_batiment")
    
    # Retirer bâtiments entièrement réparés
    to_fix = to_fix[to_fix["nb_infras_a_remplacer"] > 0]

    # Stats pour cette phase
    nb_batiments = len(repaired_this_phase)
    nb_maisons = sum([r["nb_maisons"] for r in repaired_this_phase])
    cout_total_phase = df_infra.loc[df_infra["infra_id"].isin(infras_a_reparer_phase), "prix"].sum()
    duree_totale_phase = df_infra.loc[df_infra["infra_id"].isin(infras_a_reparer_phase), "duree"].sum()

    phases_stats.append({
        "phase": phase_num,
        "nb_batiments": nb_batiments,
        "nb_maisons": nb_maisons,
        "nb_infras_reparees": len(infras_a_reparer_phase),
        "cout_total": cout_total_phase,
        "duree_totale": duree_totale_phase,
        "liste_infras_reparees": infras_a_reparer_phase
    })

    phase_num += 1

# ================================
# 💾 6. Export CSV et affichage
# ================================
df_phases_stats = pd.DataFrame(phases_stats)
df_phases_stats.to_csv("statistiques_par_phase.csv", index=False)
display(df_phases_stats)


Unnamed: 0,phase,nb_batiments,nb_maisons,nb_infras_reparees,cout_total,duree_totale,liste_infras_reparees
0,0,296,1316,0,0.000000,0.000000,[]
1,1,1,1,3,70041.212654,284.148343,"[P005500, P007447, P007990]"
2,2,1,6,2,10442.534704,41.770139,"[P005664, P007448]"
3,3,1,4,2,10580.306905,42.321228,"[P005781, P007539]"
4,4,1,2,2,16296.542002,86.566790,"[P001391, P007689]"
...,...,...,...,...,...,...,...
76,76,1,4,2,12833.936338,63.353950,"[P007607, P008026]"
77,77,1,1,2,11524.234592,46.096938,"[P007785, P008021]"
78,78,1,2,3,80434.965676,444.980978,"[P007562, P008015, P008023]"
79,79,1,2,2,22425.603635,124.586687,"[P007566, P008024]"
