#### Création du tableau CSV au niveau des prix (tablea décomposé de Fanny)

In [120]:
# Import des bibliothèques 

import openpyxl
import pandas as pd
import re
from openpyxl.utils.cell import column_index_from_string
import requests
import io

# Configuration des paramètres 

FILE_URL = 'https://raw.githubusercontent.com/mission-donnees-dett/analyse_preventifs_23_25/main/Tunnels_Cout%20pre%CC%81ventif_13.06.2025%20(5).xlsx'
TARGET_MARCHES = {"Bâtiment", "Propreté", "ContReg", "Eclairage", "AEV", "Automate", "PAU/TSE", "Onduleur", "Détection", "Ventilation", "Vidéo", "Pompage"}
TUNNEL_SHEETS = {'Boissy', 'Champigny', 'Guy Môquet', 'Moulin', 'Nogent',
    'Ambroise Paré', 'Belle-Rive', 'Chennevières', 'Fontenay', 'La Défense',
    'Nanterre Centre', 'Nanterre échangeur', 'Neuilly', 'Saint-Cloud', 'Sévines',
    'Bobigny', 'La Courneuve', 'Landy', 'Lumen-Norton', 'Taverny',
    'Antony', 'Fresnes', 'Bicêtre', 'Italie', 'Orly'}  # Liste des noms de feuilles tunnels
PREVENTIF_SHEET = 'Préventifs_tunnels'

# Téléchargement et chargement du classeur Excel depuis GitHub 

response = requests.get(FILE_URL)
if response.status_code != 200:
    raise FileNotFoundError(f"Failed to download file: {FILE_URL}")
wb = openpyxl.load_workbook(io.BytesIO(response.content), data_only=False)
ws_preventif = wb[PREVENTIF_SHEET]

# Résolution de valeur/formule de cellule (même avec réf. croisée) 

def resolve_cell_value(ws, cell):
    value = cell.value
    if isinstance(value, str) and value.startswith('='):
        match = re.match(r'=([A-Za-z0-9_]+)!([A-Z]+)(\d+)', value)
        if match:
            sheet_name, col, row = match.groups()
            try:
                target_ws = wb[sheet_name]
                target_cell = target_ws[f"{col}{row}"]
                return resolve_cell_value(target_ws, target_cell)
            except Exception as e:
                print(f"    ⚠️ Failed to resolve cross-sheet reference {value}: {e}")
                return None
        else:
            return None
    try:
        return float(value)
    except:
        return None

# Évaluation simple des formules (type A1+A2+A3)

def evaluate_simple_formula(ws, formula, row_num):
    if not isinstance(formula, str) or not formula.startswith('='):
        try:
            return float(formula)
        except:
            return None

    formula = formula.strip('=').replace(' ', '')
    parts = formula.split('+')
    total = 0
    for ref in parts:
        match = re.match(r'([A-Z]+)(\d+)?', ref)
        if not match:
            return None
        col_letter = match.group(1)
        row_number = int(match.group(2)) if match.group(2) else row_num
        try:
            col_index = column_index_from_string(col_letter)
            cell = ws.cell(row=row_number, column=col_index)
            val = resolve_cell_value(ws, cell)
            if val is None:
                return None
            total += float(val)
        except Exception:
            return None
    return total

# Analyse du type de formule et extraction des références

def parse_formula(formula):
    range_matches = re.findall(r'Préventifs_tunnels!G(\d+)(?::G(\d+))?', formula)
    refs = []
    for start_str, end_str in range_matches:
        start = int(start_str)
        if end_str:
            end = int(end_str)
            refs.extend(range(start, end + 1))
        else:
            refs.append(start)

    coeff = 0.5 if '/2' in formula else 1

    if '/2' in formula:
        form_typ = 'div_2'
    elif len(refs) == 1 and formula.strip('=') == f"Préventifs_tunnels!G{refs[0]}":
        form_typ = 'direct_reference'
    elif 'SUM' in formula.upper():
        form_typ = f"sum_{len(refs)}"
    elif '+' in formula:
        form_typ = f"sum_{len(refs)}" if len(refs) >= 2 else 'unknown'
    else:
        form_typ = 'unknown'

    return refs, form_typ, coeff

# Résolution du champ 'Nombre équipé' en cas de formule

def resolve_nb_eq_value(ws, cell, depth=0):
    MAX_DEPTH = 5
    if depth > MAX_DEPTH:
        return 0

    value = cell.value
    if isinstance(value, str) and value.startswith('='):
        matches_cross = re.findall(r"'([^']+)'!([A-Z]+)(\d+)", value)
        if matches_cross:
            total = 0
            for sheet_name, col, row in matches_cross:
                if sheet_name in wb.sheetnames:
                    try:
                        target_ws = wb[sheet_name]
                        target_cell = target_ws[f"{col}{row}"]
                        val = resolve_nb_eq_value(target_ws, target_cell, depth + 1)
                        total += val if val is not None else 0
                    except Exception:
                        continue
            return total

        matches_same = re.findall(r"([A-Z]+)(\d+)", value)
        if matches_same:
            total = 0
            for col, row in matches_same:
                try:
                    target_cell = ws[f"{col}{row}"]
                    val = resolve_nb_eq_value(ws, target_cell, depth + 1)
                    total += val if val is not None else 0
                except Exception:
                    continue
            return total

        try:
            expr = value.lstrip('=').replace(',', '.')
            if re.fullmatch(r'[\d\.\+\-\*\/\(\) ]+', expr):
                return float(eval(expr, {"__builtins__": None}, {}))
        except:
            return 0

    try:
        return float(value) if float(value) != 0 else 0
    except:
        return 0

# Résolution du champ 'Coût HT' même avec formule ou réf. croisée

def resolve_cout_ht(ws, cell, depth=0):
    MAX_DEPTH = 10
    if depth > MAX_DEPTH:
        return None

    val = cell.value
    if isinstance(val, str) and val.startswith('='):
        match_sheet = re.match(r"='([^']+)'!([A-Z]+)(\d+)", val)
        if match_sheet:
            sheet_name, col, row = match_sheet.groups()
            if sheet_name in wb.sheetnames:
                target_ws = wb[sheet_name]
                target_cell = target_ws[f"{col}{row}"]
                return resolve_cout_ht(target_ws, target_cell, depth + 1)
        match_same = re.match(r"=([A-Z]+)(\d+)", val)
        if match_same:
            col, row = match_same.groups()
            return resolve_cout_ht(ws, ws[f"{col}{row}"], depth + 1)

        cached_value = getattr(cell, 'cached_value', None)
        if cached_value is None:
            cached_value = getattr(cell, '_value', None)

        if cached_value is not None:
            try:
                return float(str(cached_value).replace(',', '.'))
            except:
                return 0
        return 0

    try:
        return float(val)
    except:
        return 0

# Traitement des lignes d'une feuille tunnel

def process_tunnel_sheet(ws_tunnel):
    print(f"\nProcessing tunnel sheet: {ws_tunnel.title}")

    header_row = ws_tunnel[2]
    headers = {}
    for idx, cell in enumerate(header_row):
        if cell.value:
            clean_header = str(cell.value).strip().replace('\n', ' ')
            headers[clean_header] = idx
    print(f"Headers found: {list(headers.keys())}")

    col_marche = headers.get('Marché')
    col_freq = headers.get('Fréquence totale')
    col_nbeq = headers.get('Nombre équipé')
    col_cht = headers.get('Coût HT')

    # Add column C and D for freq_annuelle and ajust_freq_suppl
    col_freq_annuelle = 2  # Column C (index 2 in zero-based)
    col_ajust_freq_suppl = 3  # Column D (index 3 in zero-based)

    if None in (col_marche, col_freq, col_nbeq, col_cht):
        print(f"⚠️ Missing one or more required columns in sheet {ws_tunnel.title}. Skipping.")
        return [], {}

    sheet_results = []
    skipped_reasons = {}

    for row in ws_tunnel.iter_rows(min_row=3):
        marche_raw = row[col_marche].value
        if marche_raw is None:
            continue
        marche = str(marche_raw).strip()
        if marche not in TARGET_MARCHES:
            continue

        # Use resolve_cell_value to handle external references in columns C and D
        freq_annuelle = resolve_cell_value(ws_tunnel, row[col_freq_annuelle])
        ajust_freq_suppl = resolve_cell_value(ws_tunnel, row[col_ajust_freq_suppl])

        raw_freq = row[col_freq].value
        freq = evaluate_simple_formula(ws_tunnel, raw_freq, row[0].row)
        if freq is None:
            skipped_reasons[row[0].row] = "Invalid frequency"
            continue

        nb_eq = resolve_nb_eq_value(ws_tunnel, row[col_nbeq])
        if nb_eq is None:
            skipped_reasons[row[0].row] = "Could not resolve nb_eq"
            continue

        cell_cht = row[col_cht]
        cell_value = cell_cht.value
        cell_type = cell_cht.data_type

        mar_diminutif = marche
        tunnel = ws_tunnel.title

        if cell_type != 'f':
            try:
                cout_ht_resolved = float(cell_value)
            except:
                cout_ht_resolved = 0
            total = freq * nb_eq * cout_ht_resolved
            sheet_results.append({
                'mar_diminutif': mar_diminutif,
                'tunnel': tunnel,
                'ref_prix': None,
                'form_typ': 'direct_value',
                'frq_totale': freq,
                'freq_annuelle': freq_annuelle,
                'ajust_freq_suppl': ajust_freq_suppl,
                'nb_equipe': nb_eq,
                'coeff': 1,
                'cout_ht': cout_ht_resolved,
                'cout_total': round(total, 2)
            })
            continue

        formula = cell_value
        if 'Préventifs_tunnels!G' not in formula:
            skipped_reasons[row[0].row] = "Formula does not reference Préventifs_tunnels!G"
            continue

        refs, form_typ, coeff = parse_formula(formula)

        for ref_row in refs:
            ref_prix = ws_preventif[f'D{ref_row}'].value
            cout_ht_cell = ws_preventif[f'G{ref_row}']
            cout_ht_resolved = resolve_cout_ht(ws_preventif, cout_ht_cell)
            if cout_ht_resolved is None:
                skipped_reasons[row[0].row] = f"Could not resolve cout_ht for G{ref_row}"
                continue

            total = freq * nb_eq * coeff * cout_ht_resolved
            sheet_results.append({
                'mar_diminutif': mar_diminutif,
                'tunnel': tunnel,
                'ref_prix': ref_prix,
                'form_typ': form_typ,
                'frq_totale': freq,
                'freq_annuelle': freq_annuelle,
                'ajust_freq_suppl': ajust_freq_suppl,
                'nb_equipe': nb_eq,
                'coeff': coeff,
                'cout_ht': cout_ht_resolved,
                'cout_total': round(total, 2)
            })

    return sheet_results, skipped_reasons

# Exécution sur toutes les feuilles tunnel

all_results = []
all_skipped = {}

for tunnel_name in TUNNEL_SHEETS:
    if tunnel_name not in wb.sheetnames:
        print(f"Sheet '{tunnel_name}' not found, skipping.")
        continue
    ws_tunnel = wb[tunnel_name]
    results, skipped = process_tunnel_sheet(ws_tunnel)
    all_results.extend(results)
    if skipped:
        all_skipped[tunnel_name] = skipped

# Création du DataFrame final

df = pd.DataFrame(all_results)
if not df.empty:

    df_final = df.drop(columns=['form_typ', 'coeff'], errors='ignore')


    freq_order = ['freq_annuelle', 'ajust_freq_suppl', 'frq_totale']
    freq_cols = [col for col in freq_order if col in df_final.columns]
    other_cols = [col for col in df_final.columns if col not in freq_cols]
    new_col_order = freq_cols + other_cols

    df_final = df_final[new_col_order]

    print(df_final.head())
else:
    print("No data to display")



Processing tunnel sheet: Saint-Cloud
Headers found: ['Marché', 'Opération', 'Fréquence annuelle', 'Ajustement fréquence supplémentaire', 'Fréquence totale', 'Nombre équipé', 'Coût HT', 'Coût total HT', 'Commentaire/Question']

Processing tunnel sheet: Boissy
Headers found: ['Marché', 'Opération', 'Fréquence annuelle', 'Ajustement fréquence supplémentaire', 'Fréquence totale', 'Nombre équipé', 'Coût HT', 'Coût total HT', 'Commentaire/Question']

Processing tunnel sheet: Taverny
Headers found: ['Marché', 'Opération', 'Fréquence annuelle', 'Ajustement fréquence supplémentaire', 'Fréquence totale', 'Nombre équipé', 'Coût HT', 'Coût total HT', 'Commentaire/Question']

Processing tunnel sheet: La Courneuve
Headers found: ['Marché', 'Opération', 'Fréquence annuelle', 'Ajustement fréquence supplémentaire', 'Fréquence totale', 'Nombre équipé', 'Coût HT', 'Coût total HT', 'Commentaire/Question']

Processing tunnel sheet: Neuilly
Headers found: ['Marché', 'Opération', 'Fréquence annuelle', 'Ajus

In [121]:
df.groupby('tunnel')['cout_total'].sum()

tunnel
Ambroise Paré          227419.93
Antony                 150926.14
Belle-Rive             284358.03
Bicêtre                219212.70
Bobigny                274904.34
Boissy                 157750.30
Champigny              254497.57
Chennevières           122960.62
Fontenay               156008.04
Fresnes                165767.49
Guy Môquet             178913.54
Italie                  78171.13
La Courneuve           101015.06
La Défense            1130110.88
Landy                  305631.10
Lumen-Norton           203176.21
Moulin                 180645.57
Nanterre Centre        308494.37
Nanterre échangeur     484508.68
Neuilly                124335.58
Nogent                 442966.85
Orly                   106075.22
Saint-Cloud            230813.48
Sévines                118078.62
Taverny                135365.62
Name: cout_total, dtype: float64

In [122]:
new_order = ['mar_diminutif', 'tunnel', 'ref_prix', 'freq_annuelle', 'ajust_freq_suppl', 'frq_totale', 'nb_equipe', 'cout_ht', 'cout_total']
df_final = df_final[new_order]
df_final

Unnamed: 0,mar_diminutif,tunnel,ref_prix,freq_annuelle,ajust_freq_suppl,frq_totale,nb_equipe,cout_ht,cout_total
0,Bâtiment,Saint-Cloud,VJO001,6.0,0.0,6.0,1.0,2029.00,12174.0
1,Bâtiment,Saint-Cloud,VJO002,6.0,0.0,6.0,1.0,1522.00,9132.0
2,Bâtiment,Saint-Cloud,VNO001,6.0,0.0,6.0,1.0,3264.00,19584.0
3,Bâtiment,Saint-Cloud,VNO002,6.0,0.0,6.0,1.0,2611.00,15666.0
4,Bâtiment,Saint-Cloud,MPO001,1.0,0.0,1.0,1.0,3519.00,3519.0
...,...,...,...,...,...,...,...,...,...
1641,Pompage,Sévines,INP600,1.0,0.0,1.0,0.0,858.61,0.0
1642,Pompage,Sévines,INP700,1.0,0.0,1.0,0.0,214.65,0.0
1643,Pompage,Sévines,INP800,1.0,0.0,1.0,0.0,214.65,0.0
1644,Pompage,Sévines,INP900,1.0,0.0,1.0,0.0,208.40,0.0


Maintenant, on veut rajouter les prdp_code (codes prix permanents) à ce table. 

In [123]:
csv_url = 'https://raw.githubusercontent.com/mission-donnees-dett/analyse_preventifs_23_25/main/prdp_prod_code_montants_designation_2324.csv'
df_csv = pd.read_csv(csv_url, delimiter=';')

mapping = dict(zip(df_csv['prod_code'], df_csv['prdp_code']))

df_final['prdp_code'] = df_final['ref_prix'].map(mapping)

ref_prix_index = df_final.columns.get_loc('ref_prix')
cols = df_final.columns.tolist()
cols.insert(ref_prix_index, cols.pop(cols.index('prdp_code')))
df_final = df_final[cols]

In [124]:
df_final

Unnamed: 0,mar_diminutif,tunnel,prdp_code,ref_prix,freq_annuelle,ajust_freq_suppl,frq_totale,nb_equipe,cout_ht,cout_total
0,Bâtiment,Saint-Cloud,BATCJN001,VJO001,6.0,0.0,6.0,1.0,2029.00,12174.0
1,Bâtiment,Saint-Cloud,BATCJN002,VJO002,6.0,0.0,6.0,1.0,1522.00,9132.0
2,Bâtiment,Saint-Cloud,BATCNO001,VNO001,6.0,0.0,6.0,1.0,3264.00,19584.0
3,Bâtiment,Saint-Cloud,BATCNO002,VNO002,6.0,0.0,6.0,1.0,2611.00,15666.0
4,Bâtiment,Saint-Cloud,BATMPO001,MPO001,1.0,0.0,1.0,1.0,3519.00,3519.0
...,...,...,...,...,...,...,...,...,...,...
1641,Pompage,Sévines,POMERD600,INP600,1.0,0.0,1.0,0.0,858.61,0.0
1642,Pompage,Sévines,POMERE700,INP700,1.0,0.0,1.0,0.0,214.65,0.0
1643,Pompage,Sévines,POMERF800,INP800,1.0,0.0,1.0,0.0,214.65,0.0
1644,Pompage,Sévines,POMERG900,INP900,1.0,0.0,1.0,0.0,208.40,0.0


---

Ici, on veut rajouter des catégories plus générales pour grouper/aggréger les prix 

In [125]:
x = pd.read_excel(FILE_URL, sheet_name='Préventifs_tunnels')
y = x.iloc[:, 2].unique()
y

array(['Préventif IS + niches', 'Contrôle des IS de jour',
       'Contrôle des IS + niches de nuit', "Lavage d'un tube",
       'Vérification des dispositifs de levage',
       'Vérification des installations électriques',
       'Nettoyage des plots de jalonnement pour un sens',
       "Nettoyage de l'éclairage pour un sens",
       'Remplacement systématiques des sources',
       "Mesure du niveau d'éclairement",
       'Essais fonctionnels pour un sens',
       'Maintenance préventive pour un sens',
       'Maintenance préventive prises/coffret pompier',
       'Préventif automates principaux et baies MESD (fiche n°3 du CCTP)',
       'Préventif armoires MESD (fiche n°4 du CCTP)',
       'Préventif automates métier (fiche n°5 du CCTP)',
       "Dépollution d'une baie",
       "Maintenance préventive de l'ensemble des PAU du tunnel",
       'Maintenance préventive d’un Poste d’Appel d’Urgence (PAU) analogique',
       'Maintenance préventive d’un Poste d’Appel d’Urgence (PAU) IP',
 

In [126]:
# Accès à la feuille Préventifs_tunnels
ws_preventif = wb['Préventifs_tunnels']

# Création d'un dictionnaire: ref_prix → prod_groupe
ref_to_groupe = {}
for row in ws_preventif.iter_rows(min_row=3):  # Ignorer l'en-tête
    ref_prix = row[3].value  # Colonne D
    prod_groupe = row[2].value  # Colonne C
    if ref_prix:
        ref_to_groupe[ref_prix] = prod_groupe

# Ajout de la colonne 'prod_groupe' à partir de 'ref_prix'
df_final['prod_groupe'] = df_final['ref_prix'].map(ref_to_groupe)


col_order = df_final.columns.tolist()
col_order.insert(3, col_order.pop(col_order.index('prod_groupe')))
df_final = df_final[col_order]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final['prod_groupe'] = df_final['ref_prix'].map(ref_to_groupe)


In [127]:
df_final.head()

Unnamed: 0,mar_diminutif,tunnel,prdp_code,prod_groupe,ref_prix,freq_annuelle,ajust_freq_suppl,frq_totale,nb_equipe,cout_ht,cout_total
0,Bâtiment,Saint-Cloud,BATCJN001,Contrôle des IS de jour,VJO001,6.0,0.0,6.0,1.0,2029.0,12174.0
1,Bâtiment,Saint-Cloud,BATCJN002,Contrôle des IS de jour,VJO002,6.0,0.0,6.0,1.0,1522.0,9132.0
2,Bâtiment,Saint-Cloud,BATCNO001,Contrôle des IS + niches de nuit,VNO001,6.0,0.0,6.0,1.0,3264.0,19584.0
3,Bâtiment,Saint-Cloud,BATCNO002,Contrôle des IS + niches de nuit,VNO002,6.0,0.0,6.0,1.0,2611.0,15666.0
4,Bâtiment,Saint-Cloud,BATMPO001,Préventif IS + niches,MPO001,1.0,0.0,1.0,1.0,3519.0,3519.0


In [129]:
df_final.to_csv('coutTunnelsPreventifs_parPrix.csv')


---

In [19]:
import pandas as pd

# Example DataFrame (replace this with your actual DataFrame)
df = pd.read_csv('/Users/iustintdr/General-2/comparaison_mar/prdp_prod_code_montants_designation_2324.csv', sep=';')

# Create the average column
df['montant_ht_moy'] = (df['montant_ht_2023'] + df['montant_ht_2024']) / 2


In [20]:
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,mar_diminutif,prod_code,prdp_code,montant_ht_2023,montant_ht_2024,prod_designation,montant_ht_moy
0,0,0,Bat,B2P001,BATMAI001,48033.05,95676.42,Prestation de maintenance préventive d'une iss...,71854.735
1,1,1,Bat,B2P003,BATMAN003,39268.71,27336.12,Prestation de maintenance préventive d une ni...,33302.415
2,2,2,Bat,B2P002,BATPLV002,7200.0,9011.87,Plus-value à l entretien d une issue de seco...,8105.935
3,3,3,Bâtiment,MPE001,BATMPE001,0.0,0.0,EST - Inspection de maintenance préventive des...,0.0
4,4,4,Bâtiment,MPE002,BATMPE002,0.0,0.0,EST - Inspection de maintenance préventive des...,0.0


In [29]:
import pandas as pd

# Load the main data
df_quant = pd.read_csv('/Users/iustintdr/General-2/comparaison_mar/prodQuantAnLieuUniTyp2324Preventifs.csv')

# Load the prdp mapping data (semicolon separator!)
df_prdp = pd.read_csv('/Users/iustintdr/General-2/comparaison_mar/prdp_prod_code_montants_designation_2324.csv', sep=';')

# Filter only necessary columns from prdp data
df_prdp = df_prdp[['prod_code', 'prdp_code']]

# Filter rows for 2023 and 2024
df_filtered = df_quant[df_quant['com_bud_annee'].isin([2023, 2024])]

# Group by mar_diminutif, prod_code, lieu, and year
grouped = df_filtered.groupby(
    ['mar_diminutif', 'prod_code', 'lieu', 'com_bud_annee'],
    as_index=False
)['com_prod_montant_ht'].sum()

# Pivot to get 2023 and 2024 columns
pivoted = grouped.pivot_table(
    index=['mar_diminutif', 'prod_code', 'lieu'],
    columns='com_bud_annee',
    values='com_prod_montant_ht',
    fill_value=0
).reset_index()

# Rename columns
pivoted = pivoted.rename(columns={
    2023: 'montant_ht_2023',
    2024: 'montant_ht_2024'
})

# Calculate the average
pivoted['montant_moy_ht'] = pivoted[['montant_ht_2023', 'montant_ht_2024']].mean(axis=1)

pivoted = pivoted[~pivoted['lieu'].isin(['PCTT', 'Tous tunnels'])]

# Merge with prdp_code info
final_df = pd.merge(pivoted, df_prdp, on='prod_code', how='left')

# Optional: Reorder columns
final_df = final_df[[
    'mar_diminutif', 'lieu', 'prod_code', 'montant_ht_2023',
    'montant_ht_2024', 'montant_moy_ht', 'prdp_code'
]]

# Show or save the result
final_df

Unnamed: 0,mar_diminutif,lieu,prod_code,montant_ht_2023,montant_ht_2024,montant_moy_ht,prdp_code
0,AEV,Ambroise PARE,AEFB10,1407.40,1439.78,1423.59,AEVEFB010
1,AEV,Chennevières,AEFB11,647.84,662.74,655.29,AEVEFB011
2,AEV,Fontenay le Fleury,AEFB12,0.00,743.62,371.81,AEVEFB012
3,AEV,Saint Cloud,AEFB13,1464.44,1498.12,1481.28,AEVEFB013
4,AEV,Bellerive,AEFB14,1435.92,1468.94,1452.43,AEVEFB014
...,...,...,...,...,...,...,...
1764,Vidéo 2,Antony,VI MP 418,0.00,133.80,66.90,VIDPCI418
1765,Vidéo 2,Fresnes,VI MP 419,0.00,29.70,14.85,VIDPCI419
1766,Vidéo 2,Bicêtre,VI MP 420,0.00,193.30,96.65,VIDPCI420
1767,Vidéo 2,Orly,VI MP 421,0.00,29.70,14.85,VIDPCI421


In [32]:
final_df.iloc[:, 1].unique()

array(['Ambroise PARE', 'Chennevières', 'Fontenay le Fleury',
       'Saint Cloud', 'Bellerive', 'Bobigny-Lumen-Norton', 'La Courneuve',
       'Landy', 'Taverny', 'Nanterre / La Défense', 'Sévines', 'Neuilly',
       'Thiais', 'Champigny', 'Nogent', 'Boissy-Saint-Léger', 'Bicêtre',
       'Antony', 'Orly', 'Fresnes', 'Italie'], dtype=object)

In [39]:
tableau_decompose_25 = pd.read_csv('https://raw.githubusercontent.com/mission-donnees-dett/analyse_preventifs_23_25/main/coutTunnelsPreventifs_parPrix.csv')

In [40]:
tableau_decompose_25.iloc[:, 1].unique()

array(['Neuilly', 'Nanterre Centre', 'Moulin', 'Bicêtre', 'Guy Môquet',
       'Nanterre échangeur', 'Fontenay', 'Orly', 'Taverny',
       'Ambroise Paré', 'Boissy', 'Champigny', 'Nogent', 'Fresnes',
       'Italie', 'La Courneuve', 'Lumen-Norton', 'Sévines', 'Belle-Rive',
       'Antony', 'Bobigny', 'La Défense', 'Chennevières', 'Landy',
       'Saint-Cloud'], dtype=object)

In [44]:
tableau_decompose_25['prdp_code_prefix'] = tableau_decompose_25['prdp_code'].str[:6]

# Drop duplicates to keep only one row per 6-character prefix
unique_mapping = tableau_decompose_25.drop_duplicates(subset='prdp_code_prefix')[['prdp_code_prefix', 'prod_groupe']]

# Now `unique_mapping` holds each unique 6-char code with its associated prod_groupe
unique_mapping.to_csv('prdp_code_prod_groupe_table_correspondance.csv')


In [37]:
tableau_decompose_25

Unnamed: 0.1,Unnamed: 0,tunnel,mar_diminutif,prdp_code,prod_groupe,ref_prix,form_typ,frq_totale,nb_equipe,coeff,cout_ht,cout_total,prdp_code_prefix
0,0,Saint-Cloud,Bâtiment,BATCJN001,Contrôle des IS de jour,VJO001,sum_2,6.0,1.0,1.0,2029.00,12174.0,BATCJN
1,1,Saint-Cloud,Bâtiment,BATCJN002,Contrôle des IS de jour,VJO002,sum_2,6.0,1.0,1.0,1522.00,9132.0,BATCJN
2,2,Saint-Cloud,Bâtiment,BATCNO001,Contrôle des IS + niches de nuit,VNO001,sum_2,6.0,1.0,1.0,3264.00,19584.0,BATCNO
3,3,Saint-Cloud,Bâtiment,BATCNO002,Contrôle des IS + niches de nuit,VNO002,sum_2,6.0,1.0,1.0,2611.00,15666.0,BATCNO
4,4,Saint-Cloud,Bâtiment,BATMPO001,Préventif IS + niches,MPO001,sum_2,1.0,1.0,1.0,3519.00,3519.0,BATMPO
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1641,1641,Sévines,Pompage,POMERD600,Entretien autres systèmes du réseau incendie,INP600,direct_reference,1.0,0.0,1.0,858.61,0.0,POMERD
1642,1642,Sévines,Pompage,POMERE700,Entretien autres systèmes du réseau incendie,INP700,direct_reference,1.0,0.0,1.0,214.65,0.0,POMERE
1643,1643,Sévines,Pompage,POMERF800,Entretien autres systèmes du réseau incendie,INP800,direct_reference,1.0,0.0,1.0,214.65,0.0,POMERF
1644,1644,Sévines,Pompage,POMERG900,Entretien autres systèmes du réseau incendie,INP900,direct_reference,1.0,0.0,1.0,208.40,0.0,POMERG


---

#### Création du nouveau tableau Excel

In [138]:
import pandas as pd
from openpyxl.utils import get_column_letter
from openpyxl.styles import PatternFill, Alignment, Font, Border, Side
from openpyxl import load_workbook, Workbook

# Charger CSV
url = "https://raw.githubusercontent.com/mission-donnees-dett/analyse_preventifs_23_25/main/coutTunnelsPreventifs_parPrix.csv"
df = pd.read_csv(url)
df.columns = df.columns.str.strip()

# Groupes couleurs tunnels
green_tabs = {'Boissy', 'Champigny', 'Guy Môquet', 'Moulin', 'Nogent'}
yellow_tabs = {'Ambroise Paré', 'Belle-Rive', 'Chennevières', 'Fontenay', 'La Défense',
               'Nanterre Centre', 'Nanterre échangeur', 'Neuilly', 'Saint-Cloud', 'Sévines'}
grey_tabs = {'Bobigny', 'La Courneuve', 'Landy', 'Lumen-Norton', 'Taverny'}

# Couleurs onglets (hex sans #)
tab_colors = {
    'green': 'C6EFCE',
    'yellow': 'FFF9C4',
    'grey': 'E0E0E0',
    'blue': 'DCE6F1'
}

# Fond header (orange très clair)
header_fill = PatternFill(start_color="FFF3E0", end_color="FFF3E0", fill_type="solid")

# Bordures
thin = Side(border_style="thin", color="000000")
all_border = Border(left=thin, right=thin, top=thin, bottom=thin)
vertical_border = Border(left=thin, right=thin)

# Liste pour trier les feuilles
sheet_map = []

with pd.ExcelWriter('temp_output.xlsx', engine='openpyxl') as writer:
    for tunnel_value in df['tunnel'].dropna().unique():
        tunnel_df = df[df['tunnel'] == tunnel_value]
        if tunnel_df.empty:
            continue

        tunnel_df = tunnel_df.drop_duplicates(subset=['mar_diminutif', 'prod_groupe'])

        # Colonnes utiles
        sheet_df = tunnel_df[['mar_diminutif', 'prod_groupe', 'freq_annuelle',
                              'ajust_freq_suppl', 'frq_totale', 'nb_equipe', 'cout_total']]

        sheet_df.columns = ['Marché', 'Opération', 'Fréquence annuelle',
                            'Ajustement fréquence supplémentaire', 'Fréquence totale',
                            'Nombre équipé', 'Coût total HT']

        sheet_df = sheet_df.set_index('Marché')
        safe_sheet_name = str(tunnel_value)[:31].replace('/', '-').replace('\\', '-')

        # Choix groupe couleur
        if tunnel_value in green_tabs:
            group = 'green'
        elif tunnel_value in yellow_tabs:
            group = 'yellow'
        elif tunnel_value in grey_tabs:
            group = 'grey'
        else:
            group = 'blue'

        sheet_map.append((group, safe_sheet_name, sheet_df))
        sheet_df.to_excel(writer, sheet_name=safe_sheet_name)

# Recharger et reformater
wb = load_workbook('temp_output.xlsx')
new_wb = Workbook()
new_wb.remove(new_wb.active)

group_order = ['green', 'yellow', 'grey', 'blue']
sheet_map.sort(key=lambda x: group_order.index(x[0]))

for group, name, df_sheet in sheet_map:
    ws = new_wb.create_sheet(title=name)

    headers = list(df_sheet.reset_index().columns)
    values = df_sheet.reset_index().values


    for col_idx, col_name in enumerate(headers, 1):
        cell = ws.cell(row=1, column=col_idx, value=col_name)
        if col_name != 'Marché':
            cell.font = Font(bold=True)
        cell.alignment = Alignment(horizontal="center", vertical="center")
        cell.fill = header_fill
        cell.border = all_border


    for row_idx, row in enumerate(values, start=2):
        for col_idx, value in enumerate(row, start=1):
            cell = ws.cell(row=row_idx, column=col_idx, value=value)
            cell.alignment = Alignment(horizontal="left", vertical="center")
            cell.border = vertical_border


    max_row = ws.max_row
    max_col = ws.max_column
    for row in range(1, max_row + 1):
        for col in range(1, max_col + 1):
            cell = ws.cell(row=row, column=col)
            border = cell.border
            new_sides = {
                'top': thin if row == 1 else border.top,
                'bottom': thin if row == max_row else border.bottom,
                'left': thin if col == 1 else border.left,
                'right': thin if col == max_col else border.right
            }
            cell.border = Border(**new_sides)

    # Ajuster colonnes
    for col_idx in range(1, max_col + 1):
        max_len = max(
            len(str(ws.cell(row=row_idx, column=col_idx).value or ""))
            for row_idx in range(1, max_row + 1)
        )
        ws.column_dimensions[get_column_letter(col_idx)].width = max_len + 2

    # Couleur onglet
    ws.sheet_properties.tabColor = tab_colors[group]

    # 'Freeze' première ligne et colonne
    ws.freeze_panes = 'B2'


new_wb.save("Cout_tunnels_preventifs_25.xlsx")



In [206]:
import pandas as pd

# === Load Excel file (local) ===
excel_path = "/Users/iustintdr/General-2/extraction_formules_df/Tunnels_Cout préventif_13.06.2025 (5).xlsx"
df_excel = pd.read_excel(excel_path, sheet_name="Préventifs_tunnels", engine="openpyxl")
df_excel.columns = df_excel.columns.str.replace('\n', ' ').str.strip()

# === Build equipement mapping ===
df_temp = df_excel[['Référence prix', 'Équipements']].copy()
df_temp = df_temp.groupby('Référence prix')['Équipements'] \
                 .apply(lambda x: ', '.join(x.dropna().astype(str))) \
                 .reset_index()
df_temp.columns = ['ref_prix', 'equipements']

# === Load CSV from URL ===
csv_url = "https://raw.githubusercontent.com/mission-donnees-dett/analyse_preventifs_23_25/main/coutTunnelsPreventifs_parPrix.csv"
df_csv = pd.read_csv(csv_url)
df_csv.columns = df_csv.columns.str.strip()  # Clean up column names if needed

# === Merge on ref_prix ===
df_merged = df_csv.merge(df_temp, on='ref_prix', how='left')

# === Prepare formula parts ===
sheet_name = "Equipements par tunnel"  # Corrected internal sheet name

def build_nomb_equipe(row):
    equipement = row['equipements']
    tunnel = row['tunnel']
    nb_equipe = row.get('nb_equipe', '')

    if pd.isna(equipement) or str(equipement).strip() == '':
        # If no equipements, return static nb_equipe value (number or empty)
        return nb_equipe if pd.notna(nb_equipe) else ''
    else:
        equipement_str = str(equipement).strip().replace('"', '""')  # Escape double quotes
        tunnel_str = str(tunnel).strip().replace('"', '""')
        # Formula referencing internal sheet with commas
        return f'=SUMIFS(\'{sheet_name}\'!C:C, \'{sheet_name}\'!A:A, "{tunnel_str}", \'{sheet_name}\'!B:B, "{equipement_str}")'

# === Apply the function ===
df_merged['nomb_equipe'] = df_merged.apply(build_nomb_equipe, axis=1)


In [208]:
df_merged.to_csv('coutTunnelsPreventifs_parPrix_2.csv')

In [211]:
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import PatternFill, Alignment, Font, Border, Side
from openpyxl.utils import get_column_letter

# === Load your saved CSV with formulas ===
csv_path = "/Users/iustintdr/General-2/extraction_formules_df/coutTunnelsPreventifs_parPrix_2.csv"
df = pd.read_csv(csv_path)
df.columns = df.columns.str.strip()

# Define tunnel groups and colors (same as before)
green_tabs = {'Boissy', 'Champigny', 'Guy Môquet', 'Moulin', 'Nogent'}
yellow_tabs = {'Ambroise Paré', 'Belle-Rive', 'Chennevières', 'Fontenay', 'La Défense',
               'Nanterre Centre', 'Nanterre échangeur', 'Neuilly', 'Saint-Cloud', 'Sévines'}
grey_tabs = {'Bobigny', 'La Courneuve', 'Landy', 'Lumen-Norton', 'Taverny'}

tab_colors = {
    'green': 'C6EFCE',
    'yellow': 'FFF9C4',
    'grey': 'E0E0E0',
    'blue': 'DCE6F1'
}

header_fill = PatternFill(start_color="FFF3E0", end_color="FFF3E0", fill_type="solid")
thin = Side(border_style="thin", color="000000")
all_border = Border(left=thin, right=thin, top=thin, bottom=thin)
vertical_border = Border(left=thin, right=thin)

# Prepare new workbook
wb = Workbook()
wb.remove(wb.active)  # remove default sheet

# Get unique tunnels
unique_tunnels = df['tunnel'].dropna().unique()

for tunnel_value in unique_tunnels:
    tunnel_df = df[df['tunnel'] == tunnel_value]
    if tunnel_df.empty:
        continue
    
    # Drop duplicates if needed (keep as you had)
    tunnel_df = tunnel_df.drop_duplicates(subset=['mar_diminutif', 'prod_groupe'])

    # Columns to keep and rename
    sheet_df = tunnel_df[['mar_diminutif', 'prod_groupe', 'freq_annuelle',
                          'ajust_freq_suppl', 'frq_totale', 'nomb_equipe', 'cout_total']]

    sheet_df.columns = ['Marché', 'Opération', 'Fréquence annuelle',
                        'Ajustement fréquence supplémentaire', 'Fréquence totale',
                        'Nombre équipé', 'Coût total HT']

    sheet_df = sheet_df.set_index('Marché')

    # Safe sheet name (max 31 chars, no slashes)
    safe_sheet_name = str(tunnel_value)[:31].replace('/', '-').replace('\\', '-')

    # Choose tab color group
    if tunnel_value in green_tabs:
        group = 'green'
    elif tunnel_value in yellow_tabs:
        group = 'yellow'
    elif tunnel_value in grey_tabs:
        group = 'grey'
    else:
        group = 'blue'

    # Create sheet
    ws = wb.create_sheet(title=safe_sheet_name)

    headers = list(sheet_df.reset_index().columns)
    values = sheet_df.reset_index().values

    # Write header with style
    for col_idx, col_name in enumerate(headers, 1):
        cell = ws.cell(row=1, column=col_idx, value=col_name)
        if col_name != 'Marché':
            cell.font = Font(bold=True)
        cell.alignment = Alignment(horizontal="center", vertical="center")
        cell.fill = header_fill
        cell.border = all_border

    # Write values row by row
    for row_idx, row in enumerate(values, start=2):
        for col_idx, value in enumerate(row, start=1):
            cell = ws.cell(row=row_idx, column=col_idx)
            # For 'Nombre équipé' column, insert formula or value as string to be evaluated by Excel
            if headers[col_idx -1] == 'Nombre équipé' and isinstance(value, str) and value.startswith('='):
                cell.value = value  # formula, let Excel evaluate it
            else:
                cell.value = value
            cell.alignment = Alignment(horizontal="left", vertical="center")
            cell.border = vertical_border

    max_row = ws.max_row
    max_col = ws.max_column

    # Apply borders for all cells
    for row in range(1, max_row + 1):
        for col in range(1, max_col + 1):
            cell = ws.cell(row=row, column=col)
            border = cell.border
            new_sides = {
                'top': thin if row == 1 else border.top,
                'bottom': thin if row == max_row else border.bottom,
                'left': thin if col == 1 else border.left,
                'right': thin if col == max_col else border.right
            }
            cell.border = Border(**new_sides)

    # Adjust column widths
    for col_idx in range(1, max_col + 1):
        max_len = max(
            len(str(ws.cell(row=row_idx, column=col_idx).value or ""))
            for row_idx in range(1, max_row + 1)
        )
        ws.column_dimensions[get_column_letter(col_idx)].width = max_len + 2

    # Tab color
    ws.sheet_properties.tabColor = tab_colors[group]

    # Freeze first row and first column
    ws.freeze_panes = 'B2'

# Save new Excel file
output_excel_path = "/Users/iustintdr/General-2/extraction_formules_df/coutTunnelsPreventifs.xlsx"
wb.save(output_excel_path)
print(f"✅ Excel file saved at {output_excel_path}")


✅ Excel file saved at /Users/iustintdr/General-2/extraction_formules_df/coutTunnelsPreventifs.xlsx
