In [15]:

import pandas as pd
import numpy as np
import re
from datetime import datetime

class FertilizerStandardizer:
    def __init__(self):
        self.icv_fertilizer_list = ["Solution Liquide N 39"]
        self.default_mineral_density = 1.325
        self.organic_solid_density = 1000
        self.organic_liquid_density = 1.0
        self.type_mapping = {
            "Fertilisation et amendement organique": "engrais organiques",
            "Fertilisation et amendement mineral - foliaire inclus": "engrais mineraux",
            "Fertilisation organique": "engrais organiques",
            "Fertilisation minérale": "engrais mineraux"
        }

    def standardize_fertilizer_name(self, name):
        if pd.isna(name): return name
        if isinstance(name, (pd.Timestamp, datetime)):
            name = name.strftime('%d-%m-%Y')
        name = str(name).upper()
        name = re.sub(r'[^\w\s\-]', ' ', name)
        name = re.sub(r'\s+', ' ', name)
        return name.strip()

    def is_in_icv_list(self, fertilizer_name):
        std_name = self.standardize_fertilizer_name(fertilizer_name)
        std_icv_list = [self.standardize_fertilizer_name(name) for name in self.icv_fertilizer_list]
        return std_name in std_icv_list

    def standardize_type_engrais(self, intervention_type):
        if pd.isna(intervention_type):
            return "engrais mineraux"
        type_std = self.type_mapping.get(intervention_type)
        if type_std is None:
            print(f"Avertissement: Type d'intervention non reconnu: {intervention_type}")
            return "engrais mineraux"
        return type_std

    def convert_to_kg(self, row):
        try:
            if pd.isna(row.get('unite_intrant_intervention')) or pd.isna(row.get('quantite_totale')):
                return np.nan
            unite = str(row.get('unite_intrant_intervention', '')).upper()
            type_engrais = str(row.get('type_engrais', '')).lower()
            quantite = float(row.get('quantite_totale', 0))
            
            if unite == 'KG':
                return quantite
            elif unite == 'L':
                return quantite * (self.organic_liquid_density if 'organique' in type_engrais else self.default_mineral_density)
            elif unite == 'M3':
                if 'organique' in type_engrais:
                    return quantite * self.organic_solid_density
                return quantite * self.default_mineral_density * 1000
            else:
                print(f"Avertissement: Unité non reconnue: {unite} pour l'engrais {row.get('libelle', 'Unknown')}")
                return np.nan
        except Exception as e:
            print(f"Erreur lors de la conversion pour l'engrais {row.get('libelle', 'Unknown')}: {str(e)}")
            return np.nan

    def convert_dose_to_kg_ha(self, row):
        try:
            if pd.isna(row.get('unite')) or pd.isna(row.get('dose')):
                return np.nan
            
            unite = str(row['unite']).upper()
            if unite == 'KG/HA':
                return row['dose']
            elif unite == 'T/HA':
                return row['dose'] * 1000
            elif unite == 'L/HA':
                if row['type_engrais'] == 'engrais organiques':
                    return row['dose'] * self.organic_liquid_density
                return row['dose'] * self.default_mineral_density
            elif unite == 'M3/HA':
                if row['type_engrais'] == 'engrais organiques':
                    return row['dose'] * self.organic_solid_density
                return row['dose'] * self.default_mineral_density * 1000
            else:
                print(f"Avertissement: Unité non reconnue: {unite} pour l'engrais {row['libelle']}")
                return np.nan
        except Exception as e:
            print(f"Erreur lors de la conversion pour l'engrais {row['libelle']}: {str(e)}")
            return np.nan

    def calculate_nutrients(self, row):
        try:
            if not row['in_icv_list']:
                if row['type_engrais'] == 'engrais organiques':
                    volume_m3 = row['quantite_totale'] if row['unite_intrant_intervention'].upper() == 'M3' \
                        else row['quantite_kg'] / self.organic_solid_density
                    
                    row['N_kg_ha'] = row['fertilisant.composition.n_total'] * volume_m3 / row['surface_travaillee_ha']
                    row['P_kg_ha'] = row['fertilisant.composition.p'] * volume_m3 / row['surface_travaillee_ha']
                    row['K_kg_ha'] = row['fertilisant.composition.k'] * volume_m3 / row['surface_travaillee_ha']
                else:
                    row['N_kg_ha'] = row['dose_kg_ha'] * row['fertilisant.composition.n_total'] / 100
                    row['P_kg_ha'] = row['dose_kg_ha'] * row['fertilisant.composition.p'] / 100
                    row['K_kg_ha'] = row['dose_kg_ha'] * row['fertilisant.composition.k'] / 100
            return row
        except Exception as e:
            print(f"Erreur lors du calcul des nutriments pour l'engrais {row['libelle']}: {str(e)}")
            return row

    def standardize_mesparcelles(self, data):
        print("\nStandardisation des données MesParcelles...")
        print("DEBUG - Colonnes initiales MP:", data.columns.tolist())
        
        fertilisation_types = [
            "Fertilisation et amendement organique",
            "Fertilisation et amendement mineral - foliaire inclus"
        ]
        df = data[data['type_intervention.libelle'].isin(fertilisation_types)].copy()
        
        if 'culture.libelle' in df.columns:
            df = df.rename(columns={'culture.libelle': 'culture'})
            print("Colonne culture.libelle trouvée et renommée en 'culture'")
        else:
            print("ATTENTION: culture.libelle non trouvée!")
            print("Colonnes disponibles:", df.columns.tolist())
            return None
            
        df['type_engrais'] = df['type_intervention.libelle'].apply(self.standardize_type_engrais)
        df['in_icv_list'] = df['libelle'].apply(self.is_in_icv_list)
        df['quantite_kg'] = df.apply(self.convert_to_kg, axis=1)
        df['dose_kg_ha'] = df['quantite_kg'] / df['surface_travaillee_ha']
        df = df.apply(self.calculate_nutrients, axis=1)
        
        return self._aggregate_by_exploitation_and_culture(df)

    def standardize_smag(self, data, cultures_data):
        print("\nStandardisation des données SMAG...")
        print("DEBUG - Colonnes initiales SMAG:", data.columns.tolist())
        print("DEBUG - Colonnes cultures SMAG:", cultures_data.columns.tolist())
        
        fertilisation_types = ["Fertilisation organique", "Fertilisation minérale"]
        df = data[data["Type d'intervention"].isin(fertilisation_types)].copy()
        
        base_columns = {
            'Intrant': 'libelle',
            'Unité': 'unite',
            'Dose': 'dose',
            'Surface intervention sur parcelle': 'surface_travaillee_ha',
            'N': 'N_kg_ha',
            'P': 'P_kg_ha',
            'K': 'K_kg_ha',
            'SIRET': 'siret_exploitation',
            'Code edi parcelle': 'uuid_parcelle'
        }
        df = df.rename(columns=base_columns)
        
        print("\nDEBUG - Après renommage SMAG:")
        print("Colonnes dans df:", df.columns.tolist())
        
        print("\nDEBUG - Avant fusion:")
        print("Colonnes df:", df.columns.tolist())
        print("Colonnes cultures:", cultures_data.columns.tolist())
        
        df = pd.merge(
            df,
            cultures_data[['Code edi parcelle', 'Culture']],
            left_on='uuid_parcelle',
            right_on='Code edi parcelle',
            how='left'
        )
        
        print("\nDEBUG - Après fusion:")
        print("Colonnes df:", df.columns.tolist())
        
        df = df.rename(columns={'Culture': 'culture'})
        
        if 'Code edi parcelle_x' in df.columns:
            df = df.drop(['Code edi parcelle_x', 'Code edi parcelle_y'], axis=1, errors='ignore')
        else:
            df = df.drop('Code edi parcelle', axis=1, errors='ignore')
        
        df['type_engrais'] = df["Type d'intervention"].apply(self.standardize_type_engrais)
        df['in_icv_list'] = df['libelle'].apply(self.is_in_icv_list)
        df.loc[df['in_icv_list'], 'dose_kg_ha'] = df[df['in_icv_list']].apply(
            self.convert_dose_to_kg_ha, axis=1
        )
        
        return self._aggregate_by_exploitation_and_culture(df)

    def _aggregate_by_exploitation_and_culture(self, df):
        """Agrège les données par exploitation et par culture avec pondération par surface"""
        def weighted_average(group):
            result = pd.Series()
            
            # Conversion en string et liste des engrais
            group['libelle'] = group['libelle'].astype(str)
            result['engrais_utilises'] = ', '.join(sorted(group['libelle'].unique()))
            
            # Pour chaque engrais ICV, créer sa colonne de dose
            for icv_engrais in self.icv_fertilizer_list:
                icv_engrais_str = str(icv_engrais)
                mask = (group['in_icv_list']) & (group['libelle'].str.contains(icv_engrais_str, case=False, regex=False))
                if mask.any():
                    result[f"{icv_engrais_str}_dose"] = np.average(
                        group.loc[mask, 'dose_kg_ha'],
                        weights=group.loc[mask, 'surface_travaillee_ha']
                    )
                else:
                    result[f"{icv_engrais_str}_dose"] = np.nan
            
            # Pour les nutriments non-ICV
            non_icv_mask = ~group['in_icv_list']
            if non_icv_mask.any():
                for nutrient in ['N_kg_ha', 'P_kg_ha', 'K_kg_ha']:
                    if nutrient in group.columns:
                        values = group.loc[non_icv_mask, nutrient]
                        weights_subset = group.loc[non_icv_mask, 'surface_travaillee_ha']
                        if not values.empty and not weights_subset.empty:
                            result[f"non_icv_{nutrient}"] = np.average(
                                values,
                                weights=weights_subset
                            )
                        else:
                            result[f"non_icv_{nutrient}"] = np.nan
            else:
                for nutrient in ['N_kg_ha', 'P_kg_ha', 'K_kg_ha']:
                    result[f"non_icv_{nutrient}"] = np.nan
                    
            return result
        
        # Convertir la colonne libelle en string avant le groupby
        df['libelle'] = df['libelle'].astype(str)
        
        return df.groupby(['siret_exploitation', 'culture'], as_index=False).apply(weighted_average)
    def combine_sources(self, mesparcelles_data, smag_data, smag_cultures_data):
        print("\nTraitement des données MesParcelles...")
        mp_std = self.standardize_mesparcelles(mesparcelles_data)
        
        print("\nTraitement des données SMAG...")
        smag_std = self.standardize_smag(smag_data, smag_cultures_data)
        
        if mp_std is None or smag_std is None:
            print("ERREUR: problème dans la standardisation des données")
            return None
        
        # Définir l'ordre des colonnes attendues
        expected_columns = ['siret_exploitation', 'culture', 'engrais_utilises']
        # Ajouter les colonnes pour chaque engrais ICV
        for engrais in self.icv_fertilizer_list:
            expected_columns.append(f"{engrais}_dose")
        # Ajouter les colonnes NPK
        expected_columns.extend(['non_icv_N_kg_ha', 'non_icv_P_kg_ha', 'non_icv_K_kg_ha'])
        
        # S'assurer que toutes les colonnes existent
        for df in [mp_std, smag_std]:
            for col in expected_columns:
                if col not in df.columns:
                    df[col] = np.nan
        
        mp_std['source'] = 'mesparcelles'
        smag_std['source'] = 'smag'
        
        # Combiner et réorganiser les colonnes
        combined = pd.concat([mp_std, smag_std], ignore_index=True)
        expected_columns.append('source')  # Ajouter la colonne source à la fin
        combined = combined[expected_columns]
        
        return combined

# Code d'utilisation
if __name__ == "__main__":
    print("\nImport des données...")
    
    mp_path = r"C:\Users\MonirNajem\OneDrive - FOOD PILOT\Desktop\monir\MESPARCELLES\mesparcelles_data.xlsx"
    smag_path = r"C:\Users\MonirNajem\OneDrive - FOOD PILOT\Desktop\monir\MESPARCELLES\SMAG.xlsx"
    smag_cultures_path = r"C:\Users\MonirNajem\OneDrive - FOOD PILOT\Desktop\monir\MESPARCELLES\SMAG_cultures.xlsx"
    
    mp_data = pd.read_excel(mp_path, sheet_name='Intervention_Details')
    smag_data = pd.read_excel(smag_path)
    smag_cultures = pd.read_excel(smag_cultures_path)
    
    print("\nColonnes dans les fichiers source:")
    print("MP:", mp_data.columns.tolist())
    print("SMAG:", smag_data.columns.tolist())
    print("SMAG Cultures:", smag_cultures.columns.tolist())
    
    standardizer = FertilizerStandardizer()
    combined_data = standardizer.combine_sources(mp_data, smag_data, smag_cultures)
    
    if combined_data is not None:
        output_path = 'resultats_standardisation_par_exploitation_culture.xlsx'
        combined_data.to_excel(output_path, index=False)
        print(f"\nRésultats exportés dans '{output_path}'")



Import des données...

Colonnes dans les fichiers source:
MP: ['uuid_intervention', 'numero_lot', 'siret_exploitation', 'uuid_parcelle', 'link_parcelle', 'surface_travaillee_ha', 'date_debut', 'date_fin', 'materiels', 'prestataires', 'culture.id_culture', 'culture.libelle', 'type_intervention.id_type_intervention', 'type_intervention.libelle', 'id_intrant', 'libelle', 'quantite_totale', 'unite_intrant_intervention', 'phyto', 'type_intrant.id_type_intrant', 'type_intrant.libelle', 'type_intrant.categorie', 'fertilisant.composition.n_total', 'fertilisant.composition.p', 'fertilisant.composition.k', 'fertilisant.composition.cao', 'fertilisant.composition.mgo', 'fertilisant.composition.s', 'fertilisant.composition.unite', 'fertilisant.condition_epandage', 'fertilisant.condition_epandage.id_condition_epandage', 'fertilisant.condition_epandage.libelle', 'fertilisant', 'phyto.code_amm', 'phyto.cible']
SMAG: ['Code producteur', 'Exploitation', 'SIRET', 'Numéro îlot', 'Nom de la parcelle', 'Co

  return df.groupby(['siret_exploitation', 'culture'], as_index=False).apply(weighted_average)



DEBUG - Après fusion:
Colonnes df: ['Code producteur', 'Exploitation', 'siret_exploitation', 'Numéro îlot', 'Nom de la parcelle', 'uuid_parcelle', 'Date intervention', 'Date fin intervention', 'surface_travaillee_ha', "Type d'intervention", "Etat de l'intervention", 'Nature intrant', 'libelle', 'Code GNIS', 'dose', 'unite', 'Stade inter', 'Cibles', 'N_kg_ha', 'P_kg_ha', 'K_kg_ha', 'CaO', 'MgO', 'SO3', 'B', 'Zn', 'Mn', 'Mo', 'Cu', 'Fe', 'Quantité récoltée', 'Date de création', 'Créateur', 'Obs inter', 'Code edi parcelle', 'Culture']


  return df.groupby(['siret_exploitation', 'culture'], as_index=False).apply(weighted_average)



Résultats exportés dans 'resultats_standardisation_par_exploitation_culture.xlsx'
