# Importation des bibliothèques nécessaires

In [1]:
import pandas as pd
from pandas import ExcelWriter
import xlsxwriter
from pandas import ExcelFile
from datetime import datetime
import datetime
from datetime import timedelta
import numpy as np
import os
import statistics
from dateutil.parser import parse
import math
import calendar
from sklearn.cluster import DBSCAN
import joblib

In [2]:
# Charger le modèle à partir du fichier picklé
loaded_model = joblib.load("random_forest.pkl")

# Création de deux fonctions utilisables dans l'importation des données

In [3]:

def toZero(x):
    try:
        converted_value = int(x)
    except ValueError:
        converted_value = 0
    return converted_value

def findDay(date):
    day, month, year = (int(i) for i in date.split('/'))
    dayNumber = calendar.weekday(year, month, day)
    return dayNumber

def arrondir_multiple_de_5(nombre):
    multiple_de_5 = round(nombre / 5) * 5
    return multiple_de_5

def process_data(df_Fusion, seuil):
    groupe = 0
    df_Fusion['Groupe'] = 0

    for index, row in df_Fusion.iterrows():
        if index == 0:
            df_Fusion.at[index, 'Groupe'] = groupe
        else:
            diff = abs(row['Talon surconso identifie'] - df_Fusion.at[index - 1, 'Talon surconso identifie'])
            if diff <= seuil:
                df_Fusion.at[index, 'Groupe'] = groupe
            else:
                groupe += 1
                df_Fusion.at[index, 'Groupe'] = groupe

    df_simplified = df_Fusion.groupby(['Groupe', 'Début surconsommation', 'Fin surconsommation']).agg({
        'Code': 'first',  # Include the 'Code' column from the original dataframe
        'Energie': 'first',  # Include the 'Energie' column from the original dataframe
        'heure ouverture': 'first',  # Include the 'heure ouverture' column from the original dataframe
        'heure fermetur': 'first',  # Include the 'heure fermetur' column from the original dataframe
        'TalonRef': 'first',  # Include the 'TalonRef' column from the original dataframe
        'Début surconsommation': 'first',
        'Fin surconsommation': 'first',
        'Talon surconso identifie': 'mean',
        'impact': 'first',  # Include the 'impact' column from the original dataframe
        'NbrHeures': 'sum',
        'NbrNuits': 'first',  # Include the 'NbrNuits' column from the original dataframe
        'Impact conso (kWh)': 'first',  # Include the 'Impact conso (kWh)' column from the original dataframe
        '% Surconso': 'first',  # Include the '% Surconso' column from the original dataframe
        "Période d'alerte": 'first',  # Include the "Période d'alerte" column from the original dataframe
        'Pourcentage de précision': 'first'  # Include the 'Pourcentage de précision' column from the original dataframe
    }).reset_index(drop=True)

    return df_simplified

# Création de l'algorithme de machine learning personnalisé que nous allons utiliser dans la fusion des heures de surconsommation.

# Cet algorithme est un algorithme de clustering non supervisé.

# Il prend en compte la successivité des heures et une marge de surconsommation.

In [4]:
import numpy as np
from sklearn.base import BaseEstimator, ClusterMixin

class TimeMarginClustering(BaseEstimator, ClusterMixin):
    def __init__(self, time_margin=1, value_margin=20):
        self.time_margin = time_margin
        self.value_margin = value_margin
        self.labels_ = None

    def fit(self, X):
        # Sort the data based on the time column (column 0)
        sorted_indices = np.argsort(X[:, 0])
        sorted_X = X[sorted_indices]

        # Initialize variables for clustering
        cluster_labels = np.zeros(len(X), dtype=int)
        current_cluster = 0
        prev_time = sorted_X[0, 0]
        prev_value = sorted_X[0, 1]

        # Iterate over the sorted data and assign cluster labels
        for i in range(len(sorted_X)):
            time = sorted_X[i, 0]
            value = sorted_X[i, 1]

            # Check if the time difference exceeds the time margin
            time_diff = time - prev_time
            if time_diff > self.time_margin:
                current_cluster += 1

            # Check if the value difference exceeds the value margin
            value_diff = abs(value - prev_value)
            if time_diff <= self.time_margin and value_diff > self.value_margin:
                current_cluster += 1

            # Assign the cluster label
            cluster_labels[i] = current_cluster

            # Update previous time and value
            prev_time = time
            prev_value = value

        # Assign the cluster labels to the algorithm's attribute
        self.labels_ = cluster_labels
        return self

    def fit_predict(self, X):
        self.fit(X)
        return self.labels_

    def predict(self, X):
        return self.fit_predict(X)

# Importation des données.

# Prétraitement des données (nettoyage des données)

# La détection de la surconsommation pour les nuits.

# L'appel et l'application de l'algorithme de fusion des heures pour les nuits.

In [5]:
writer = pd.ExcelWriter('suivi_9_sites.xlsx', engine='xlsxwriter')

Code_list=[]
TalonRef_list=[]
Date1_list=[]
Date2_list=[]
heurev_list=[]
heuref_list=[]
NbrHeure_list=[]
defrence_list=[]
surconso_identifie=[]
Energie=[]
Marge_list=[]



# Charger les données du fichier CSV
df = pd.read_csv('dimanche.csv', sep=',', parse_dates=True, dtype={"Date":"str","Time":"str","salesforceName":"str","siteName":"str","Real Consumption (kWh)":"int64" , "TalonRef":"int64" , "Heure Ouverture":"int64" , "Heure Fermeture":"int64" , "Ouverture dim":"str" , "Margin":"int64"},converters={'Real Consumption (kWh)':toZero},encoding="utf-8-sig")
# Filtrer les données pour l'entreprise spécifiée
#print(df.columns)
for code in df["salesforceName"].unique():
    df2 = df[df["salesforceName"] == code]

    # Sélectionner les colonnes pertinentes (Date, Time, Real Consumption (kWh) , talont , ouverture , fermeture)
    df3 = df2[['Date', 'Time', 'Real Consumption (kWh)',"TalonRef" , "Heure Ouverture" , "Heure Fermeture" , "Margin"]]

    # Convertir la colonne 'Date' en type datetime avec le format approprié
    df3['Date'] = pd.to_datetime(df3['Date'], format='%Y-%m-%d')

    # Convertir la colonne 'Time' en type timedelta
    # Convertir la colonne 'Time' en type timedelta
    df3['Time'] = pd.to_datetime(df3['Time'], format='%H:%M:%S')

    # Combiner les colonnes 'Date' et 'Time' en une seule colonne de type datetime
    df3['DateTime'] = df3['Date'] + pd.to_timedelta(df3['Time'].dt.strftime('%H:%M:%S'))

    # Définition des heures d'ouverture et de fermeture de l'entreprise
    heure_ouverture = df3["Heure Ouverture"].iloc[0]
    heure_fermeture = df3["Heure Fermeture"].iloc[0]+1

    # Définition du talon de consommation
    talon_consommation = df3["TalonRef"].iloc[0]
    marge=df3["Margin"].iloc[0]

    # Filtrer les données pour les heures en dehors des heures d'ouverture de l'entreprise
    if heure_fermeture == 0:
        donnees_filtrees = df3[df3['DateTime'].dt.hour < heure_ouverture]
    else:
        donnees_filtrees = df3[(df3['DateTime'].dt.hour >= heure_fermeture) | (df3['DateTime'].dt.hour < heure_ouverture)]


    # Calculer la différence entre la consommation réelle et le talon de consommation
    donnees_filtrees['Difference'] = donnees_filtrees['Real Consumption (kWh)'] - talon_consommation

    # Filtrer les heures de surconsommation (différence > talon_consommation * 0.08)
    heures_surconsommation = donnees_filtrees[donnees_filtrees['Difference'] > talon_consommation * 0.08]

    print("\n")
    print("\n")
    print("l'analyse de site : {}", code)
    print("\n")
    print("\n")

    # Liste des dates uniques dans votre DataFrame
    dates_uniques = heures_surconsommation['Date'].unique()

    # Parcourir chaque date et effectuer le clustering
    for i in range(len(dates_uniques) - 1):
        date_actuelle = dates_uniques[i]
        date_suivante = dates_uniques[i + 1]

        # Filtrer les données pour la date actuelle et la date suivante
        heures_nuit = heures_surconsommation[
            (heures_surconsommation['Date'] == date_actuelle) &
            (heures_surconsommation['DateTime'].dt.hour >= heure_fermeture)
        ]

        heures_nuit_suivante = heures_surconsommation[
            (heures_surconsommation['Date'] == date_suivante) &
            (heures_surconsommation['DateTime'].dt.hour < heure_ouverture)
        ]


        # Combiner les données de la nuit actuelle et de la nuit suivante
        if heure_fermeture == 0:
            heures_date = heures_nuit_suivante
        else:
            heures_date = pd.concat([heures_nuit, heures_nuit_suivante])

        #print(heures_date.head(10))

        # Convertir les colonnes de surconsommation et d'impact en tableaux Numpy
        surconsommation = heures_date['DateTime'].values.reshape(-1, 1)
        # Extraire les indices et les valeurs de la colonne "Difference" des données heures_date
        indices = heures_date.index
        differences = heures_date['Difference'].values

        # Créer le tableau NumPy à partir des indices et des valeurs
        impact = np.column_stack((indices, differences))

        print(impact)

        # Créer l'objet DBSCAN
        #if code == 'MO0157':
        clustering_algo = TimeMarginClustering(time_margin=2, value_margin=marge)
        #elif code == 'MO2316':
            #clustering_algo = TimeMarginClustering(time_margin=2, value_margin=5)
        #elif code == 'MO0459':
            #clustering_algo = TimeMarginClustering(time_margin=2, value_margin=20)


        labels = clustering_algo.fit_predict(impact)

        # Ajouter les labels de clustering comme une nouvelle colonne
        heures_date['Cluster'] = labels

        # Calculer la valeur moyenne de l'impact pour chaque cluster
        clusters_moyenne = heures_date.groupby('Cluster')['Difference'].mean().reset_index()

        # Compter le nombre d'heures regroupées dans chaque cluster
        clusters_compte = heures_date.groupby('Cluster')['DateTime'].count().reset_index()
        clusters_compte = clusters_compte.rename(columns={'DateTime': 'Nombre d\'heures regroupées'})

        # Fusionner les informations de valeur moyenne et de compte dans un seul DataFrame
        clusters_info = clusters_moyenne.merge(clusters_compte, on='Cluster')

        #saver dans un excel
        for index, row in clusters_info.iterrows():
            heuref_list.append(heure_fermeture - 1)
            Code_list.append(code)
            Energie.append('Electricité')
            TalonRef_list.append(talon_consommation)
            date1 = dates_uniques[i].to_pydatetime().date()
            #date1 = datetime.datetime.strptime(date1, '%Y-%m-%d').date()
            Date1_list.append(date1)
            date2 = dates_uniques[i + 1].to_pydatetime().date()
            #date2 = datetime.datetime.strptime(date2, '%Y-%m-%d').date()
            Date2_list.append(date2)
            heurev_list.append(heure_ouverture)
            NbrHeure_list.append(row['Nombre d\'heures regroupées'])
            defrence_list.append(int(row['Difference']))
            surconso_identifie.append(talon_consommation+int(row['Difference']))
            Marge_list.append(marge)

        # Afficher les informations des clusters pour la date spécifique
        print("\n")
        print(f"Date: {dates_uniques[i]}")
        print("\n")
        print(clusters_info)
        print("\n")

df_NuitOut = pd.DataFrame({'Code':Code_list, 'Energie': Energie,'heure ouverture':heurev_list,'heure fermetur':heuref_list, 'TalonRef':TalonRef_list, 'Début surconsommation':Date1_list, 'Fin surconsommation':Date2_list, 'Talon surconso identifie':surconso_identifie, 'impact':defrence_list,'NbrHeures':NbrHeure_list, 'Marge':Marge_list})
#df_NuitOut.to_excel(writer, sheet_name='Nuit', index=False)

  return func(*args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%Y/%m/%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_datetime(df3['Time'], format='%H:%M:%S')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['DateTime'] = df3['Date'] + pd.to_timedelta(df3[





l'analyse de site : {} MO0157




[[23 64]
 [24 62]
 [25 62]
 [26 55]
 [27 55]
 [28 60]]


Date: 2023-06-18T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   59.666667                           6


[[47 66]
 [48 59]
 [49 60]
 [50 51]
 [51 55]
 [52 56]]


Date: 2023-06-19T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   57.833333                           6


[[71 68]
 [72 64]
 [73 61]
 [74 59]
 [75 56]
 [76 56]]


Date: 2023-06-20T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   60.666667                           6


[[ 95  66]
 [ 96  57]
 [ 97  59]
 [ 98  57]
 [ 99  54]
 [100  56]]


Date: 2023-06-21T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   58.166667                           6


[[119  61]
 [120  53]
 [121  53]
 [122  45]
 [123  52]
 [124  52]]


Date: 2023-06-22T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroup

  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%Y/%m/%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_datetime(df3['Time'], format='%H:%M:%S')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#re

[[167  63]
 [168  57]
 [169  55]
 [170  50]
 [171  51]
 [172  51]]


Date: 2023-06-24T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        54.5                           6






l'analyse de site : {} MO0193




[[214  68]
 [215  69]
 [216  56]
 [217  69]
 [218  70]
 [219  66]
 [220  81]]


Date: 2023-06-18T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   68.428571                           7


[[238  77]
 [239  72]
 [240  61]
 [241  70]
 [242  69]
 [243  68]
 [244  79]]


Date: 2023-06-19T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   70.857143                           7


[[262 105]
 [263  92]
 [264  78]
 [265  91]
 [266  92]
 [267  88]
 [268 101]]


Date: 2023-06-20T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   92.428571                           7


[[286  96]
 [287  96]
 [288  81]
 [289  94]
 [290  97]
 [291  87]
 [292 102]]


Date:

  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%Y/%m/%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_datetime(df3['Time'], format='%H:%M:%S')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value inste




[[310  75]
 [311  71]
 [312  56]
 [313  67]
 [314  67]
 [315  65]
 [316  78]]


Date: 2023-06-22T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   68.428571                           7


[[334  78]
 [335  74]
 [336  58]
 [337  76]
 [338  76]
 [339  68]
 [340  82]]


Date: 2023-06-23T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   73.142857                           7


[[358  95]
 [359  78]
 [360  66]
 [361  80]
 [362  76]
 [363  72]
 [364  86]]


Date: 2023-06-24T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        79.0                           7






l'analyse de site : {} MO2814




[[406  38]
 [407  41]
 [408  37]
 [409  39]
 [410  37]
 [411  34]
 [412  33]]


Date: 2023-06-18T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        37.0                           7




  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%Y/%m/%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_datetime(df3['Time'], format='%H:%M:%S')
A valu

[[430  43]
 [431  41]
 [432  39]
 [433  40]
 [434  40]
 [435  37]
 [436  35]]


Date: 2023-06-19T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   39.285714                           7


[[454  38]
 [455  41]
 [456  37]
 [457  39]
 [458  38]
 [459  35]
 [460  35]]


Date: 2023-06-20T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   37.571429                           7


[[478  43]
 [479  43]
 [480  40]
 [481  42]
 [482  40]
 [483  39]
 [484  35]]


Date: 2023-06-21T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   40.285714                           7


[[502  34]
 [503  35]
 [504  36]
 [505  37]
 [506  36]
 [507  33]
 [508  33]]


Date: 2023-06-22T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   34.857143                           7


[[526  36]
 [527  37]
 [528  36]
 [529  37]
 [530  35]
 [531  32]
 [532  31]]


Date: 2023-06-23T00:00:00.000000

  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%Y/%m/%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_datetime(df3['Time'], format='%H:%M:%S')
A value is trying to be set on a copy of a slice from a DataFra





l'analyse de site : {} CS855




[[600  24]
 [601  25]
 [602  29]
 [603  23]]


Date: 2023-06-18T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0       25.25                           4


[[624  22]
 [625  22]
 [626  30]
 [627  23]]


Date: 2023-06-19T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0       24.25                           4


[[648  25]
 [649  24]
 [650  30]
 [651  25]]


Date: 2023-06-20T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        26.0                           4


[[672  30]
 [673  34]
 [674  35]
 [675  53]]


Date: 2023-06-21T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        38.0                           4


[[696  14]
 [698  16]]


Date: 2023-06-22T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        15.0                           2






l'analyse de site : {} CS528




[[7

  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%Y/%m/%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_datetime(df3['Time'], format='%H:%M:%S')
A valu





l'analyse de site : {} CS625




[[983  40]
 [984  45]
 [985  43]
 [986  51]
 [987  43]]


Date: 2023-06-18T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        44.4                           5


[[1007   46]
 [1008   33]
 [1009   33]
 [1010   42]
 [1011   48]]


Date: 2023-06-19T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        40.4                           5


[[1031   45]
 [1032   37]
 [1033   36]
 [1034   44]
 [1035   49]]


Date: 2023-06-20T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        42.2                           5


[[1055   47]
 [1056   41]
 [1057   37]
 [1058   47]
 [1059   51]]


Date: 2023-06-21T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        44.6                           5


[[1079   43]
 [1080   41]
 [1081   35]
 [1082   39]
 [1083   45]]


Date: 2023-06-22T00:00:00.000000000


   Cluster  Difference  Nom

  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)


[[1127   54]
 [1128   46]
 [1129   40]
 [1130   45]
 [1131   46]]


Date: 2023-06-24T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        46.2                           5






l'analyse de site : {} CG820




[[1174   64]
 [1175   61]
 [1176   55]
 [1177   70]
 [1178   96]
 [1179   80]]


Date: 2023-06-18T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        71.0                           6




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%Y/%m/%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_datetime(df3['Time'], format='%H:%M:%S')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['DateTime'] = df3['Date'] + pd.to_timedelta(df3['Time'].dt.strftime('%H:%M:%S')

[[1198   84]
 [1199   73]
 [1200   66]
 [1201   78]
 [1202  108]
 [1203   84]]


Date: 2023-06-19T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   82.166667                           6


[[1222   75]
 [1223   66]
 [1224   56]
 [1225   81]
 [1226  100]
 [1227   98]]


Date: 2023-06-20T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   79.333333                           6


[[1246   80]
 [1247   59]
 [1248   55]
 [1249   75]
 [1250  114]
 [1251   84]]


Date: 2023-06-21T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0       67.25                           4
1        1       99.00                           2


[[1270   87]
 [1271   62]
 [1272   63]
 [1273   78]
 [1274  104]
 [1275  105]]


Date: 2023-06-22T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   83.166667                           6


[[1294   87]
 [1295   73]
 [1296   68]
 [1297   87]
 [129

  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%Y/%m/%d')


[[1318   87]
 [1319   71]
 [1320   61]
 [1321   82]
 [1322  101]
 [1323   86]]


Date: 2023-06-24T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   81.333333                           6






l'analyse de site : {} CG350




[[1366   66]
 [1367   43]
 [1368   32]
 [1369   33]
 [1370   30]
 [1371   79]]


Date: 2023-06-18T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   47.166667                           6


[[1390   75]
 [1391   53]
 [1392   49]
 [1393   53]
 [1394   51]
 [1395   94]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_datetime(df3['Time'], format='%H:%M:%S')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['DateTime'] = df3['Date'] + pd.to_timedelta(df3['Time'].dt.strftime('%H:%M:%S'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  donnees_filtrees['Difference'] = donnees_filtrees['Real Con




Date: 2023-06-19T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        62.5                           6


[[1414   82]
 [1415   57]
 [1416   49]
 [1417   50]
 [1418   41]
 [1419   87]]


Date: 2023-06-20T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        61.0                           6


[[1438   97]
 [1439   73]
 [1440   64]
 [1441   58]
 [1442   61]
 [1443  100]]


Date: 2023-06-21T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        75.5                           6


[[1462   92]
 [1463   72]
 [1464   58]
 [1465   55]
 [1466   52]
 [1467   89]]


Date: 2023-06-22T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   69.666667                           6


[[1486   95]
 [1487   75]
 [1488   60]
 [1489   60]
 [1490   51]
 [1491   94]]


Date: 2023-06-23T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0      

  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%Y/%m/%d')





Date: 2023-06-24T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   70.833333                           6






l'analyse de site : {} CG338




[[1558  113]
 [1559  104]
 [1560  100]
 [1561   94]
 [1562  109]
 [1563  140]]


Date: 2023-06-18T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0       104.0                           5
1        1       140.0                           1


[[1582  123]
 [1583  112]
 [1584   98]
 [1585   99]
 [1586   98]
 [1587  131]]


Date: 2023-06-19T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0       106.0                           5
1        1       131.0                           1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_datetime(df3['Time'], format='%H:%M:%S')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['DateTime'] = df3['Date'] + pd.to_timedelta(df3['Time'].dt.strftime('%H:%M:%S'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  donnees_filtrees['Difference'] = donnees_filtrees['Real Con




[[1606   97]
 [1607   83]
 [1608   75]
 [1609   68]
 [1610   75]
 [1611  112]]


Date: 2023-06-20T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        79.6                           5
1        1       112.0                           1


[[1630  100]
 [1631   79]
 [1632   76]
 [1633   70]
 [1634   76]
 [1635  103]]


Date: 2023-06-21T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        84.0                           6


[[1654   66]
 [1655   64]
 [1656   62]
 [1657   60]
 [1658   64]
 [1659   90]]


Date: 2023-06-22T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0   67.666667                           6


[[1678   59]
 [1679   55]
 [1680   48]
 [1681   52]
 [1682   53]
 [1683  101]]


Date: 2023-06-23T00:00:00.000000000


   Cluster  Difference  Nombre d'heures regroupées
0        0        53.4                           5
1        1       101.0                           1


[[1

  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)
  heures_date = heures_nuit.append(heures_nuit_suivante)


# La détection de la surconsommation pour les dimanches.

# L'appel et l'application de l'algorithme de fusion des heures pour les dimanches.

# L'organisation des sorties dans une feuille d'un fichier Excel.

In [6]:
dCode_list=[]
dTalonRef_list=[]
dDate1_list=[]
dDate2_list=[]
dheurev_list=[]
dheuref_list=[]
dNbrHeure_list=[]
ddefrence_list=[]
dsurconso_identifie=[]
dEnergie=[]
dNbrNuit=[]
Impact_conso = []
perSurconso = []
Periode = []


# Load the CSV data
#df = pd.read_csv('dimanche.csv', sep=',')

# Convert 'Date' column to datetime type if needed
df['Date'] = pd.to_datetime(df['Date'])

# Filter the data to keep only Sundays
sundays_data = df[df['Date'].dt.weekday == 6]

# Print the filtered data
#print(sundays_data.head(30))
df = sundays_data
for code in df["salesforceName"].unique():
    df2 = df[df["salesforceName"] == code]

    # Select relevant columns: Date, Time, Real Consumption (kWh), TalonRef, Heure Ouverture, Heure Fermeture, Ouverture dim
    df3 = df2[['Date', 'Time', 'Real Consumption (kWh)', 'TalonRef', 'Heure Ouverture', 'Heure Fermeture', 'Ouverture dim' , 'Margin']]

    # Convert the 'Date' column to datetime type with the appropriate format
    df3['Date'] = pd.to_datetime(df3['Date'], format='%d/%m/%Y')

    # Convert the 'Time' column to timedelta type
    df3['Time'] = pd.to_timedelta(df3['Time'])

    # Combine the 'Date' and 'Time' columns into a single datetime column
    df3['DateTime'] = df3['Date'] + df3['Time']

    # Define the opening and closing hours of the company
    heure_ouverture = df3["Heure Ouverture"].iloc[0]
    heure_fermeture = df3["Heure Fermeture"].iloc[0] + 1
    ouverture_dim = df3["Ouverture dim"].iloc[0]

    # Define the consumption threshold
    talon_consommation = df3["TalonRef"].iloc[0]
    marge=df3["Margin"].iloc[0]

    # Filter the data for hours outside the company's opening hours
    if ouverture_dim == "Journée":
        donnees_filtrees = pd.DataFrame()
    elif ouverture_dim == "Fermé":
        donnees_filtrees = df3[(df3['DateTime'].dt.hour >= heure_ouverture) & (df3['DateTime'].dt.hour < heure_fermeture)]
    elif ouverture_dim == "Matin":
        donnees_filtrees = df3[(df3['DateTime'].dt.hour >= 14) & (df3['DateTime'].dt.hour < heure_fermeture)]

    if len(donnees_filtrees) > 0:
        # Calculate the difference between the actual consumption and the consumption threshold
        donnees_filtrees['Difference'] = donnees_filtrees['Real Consumption (kWh)'] - talon_consommation

        # Filter the overconsumption hours (difference > talon_consommation * 0.08)
        heures_surconsommation = donnees_filtrees[donnees_filtrees['Difference'] > talon_consommation * 0.08]

        # Print the hours of overconsumption
        if len(heures_surconsommation) > 0:
            print(heures_surconsommation)

        print("\n")
        print("\n")
        print("l'analyse de site : {}", code)
        print("\n")
        print("\n")

        print(len(heures_surconsommation))

        # Liste des dates uniques dans votre DataFrame
        dates_uniques = heures_surconsommation['Date'].unique()
        
        # Parcourir chaque date et effectuer le clustering
        for i in range(len(dates_uniques)-1):
            date = dates_uniques[i+1]

            heures_date = heures_surconsommation[(heures_surconsommation['Date'] == date)]
            #print(heures_date.head(10))

            # Convertir les colonnes de surconsommation et d'impact en tableaux Numpy
            surconsommation = heures_date['DateTime'].values.reshape(-1, 1)
            # Extraire les indices et les valeurs de la colonne "Difference" des données heures_date
            indices = heures_date.index
            differences = heures_date['Difference'].values

            # Créer le tableau NumPy à partir des indices et des valeurs
            impact = np.column_stack((indices, differences))

            #print(impact)
            # Créer l'objet DBSCAN
            #if code == 'MO0157':
            clustering_algo = TimeMarginClustering(time_margin=2, value_margin=marge)
            #elif code == 'MO2316':
                #clustering_algo = TimeMarginClustering(time_margin=2, value_margin=5)
            #elif code == 'MO0459':
                #clustering_algo = TimeMarginClustering(time_margin=2, value_margin=20)


            labels = clustering_algo.fit_predict(impact)

            # Ajouter les labels de clustering comme une nouvelle colonne
            heures_date['Cluster'] = labels

            # Calculer la valeur moyenne de l'impact pour chaque cluster
            clusters_moyenne = heures_date.groupby('Cluster')['Difference'].mean().reset_index()

            # Compter le nombre d'heures regroupées dans chaque cluster
            clusters_compte = heures_date.groupby('Cluster')['DateTime'].count().reset_index()
            clusters_compte = clusters_compte.rename(columns={'DateTime': 'Nombre d\'heures regroupées'})

            # Fusionner les informations de valeur moyenne et de compte dans un seul DataFrame
            clusters_info = clusters_moyenne.merge(clusters_compte, on='Cluster')

            #print(clusters_info)

            #saver dans un excel
            for index, row in clusters_info.iterrows():
                dheuref_list.append(heure_fermeture - 1)
                dCode_list.append(code)
                dEnergie.append('Electricité')
                dTalonRef_list.append(talon_consommation)
                date1 = dates_uniques[i+1].to_pydatetime().date()
                dDate1_list.append(date1)
               # date_string = np.datetime_as_string(dates_uniques[i+1], unit='D')
                #date2 = datetime.datetime.strptime(date_string, '%Y-%m-%d').date()
                dDate2_list.append(date1)
                dheurev_list.append(heure_ouverture)
                dNbrHeure_list.append(row['Nombre d\'heures regroupées'])
                ddefrence_list.append(int(row['Difference']))
                dsurconso_identifie.append(arrondir_multiple_de_5(talon_consommation+int(row['Difference'])))
                surconso = row['Nombre d\'heures regroupées'] * int(row['Difference'])
                dNbrNuit.append(1)
                Impact_conso.append(surconso)

                persurconso = (int(row['Difference']) /  talon_consommation)*100
                persurconso = int(persurconso)
                persurconso = str(persurconso)
                persurconso = persurconso + '%'
                perSurconso.append(persurconso)
                Periode.append('Dim')

df_dimOut = pd.DataFrame({'Code':dCode_list, 'Energie': dEnergie,'heure ouverture':dheurev_list,'heure fermetur':dheuref_list, 'TalonRef':dTalonRef_list, 'Début surconsommation':dDate1_list, 'Fin surconsommation':dDate2_list, 'Talon surconso identifie':dsurconso_identifie, 'impact':ddefrence_list,'NbrHeures':dNbrHeure_list, 'NbrNuits':dNbrNuit, 'Impact conso (kWh)':Impact_conso, '% Surconso':perSurconso, "Période d'alerte":Periode})
grouped_df = df_dimOut.groupby("Code").size().reset_index(name="Nombre de lignes")
grouped_df["Pourcentage de précision"] = 100 - (grouped_df["Nombre de lignes"] - 1) * 10
df = df_dimOut.merge(grouped_df[["Code", "Pourcentage de précision"]], on="Code", how="left")
df['Pourcentage de précision'] = df['Pourcentage de précision'].astype(str) + '%'
df = process_data(df , 5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%d/%m/%Y')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_timedelta(df3['Time'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['DateTime'] = df3['Date'] + df3['Time']
A value is trying to be set on a copy of a slice from a D

          Date            Time  Real Consumption (kWh)  TalonRef  \
206 2023-06-18 0 days 14:00:00                     219       125   
207 2023-06-18 0 days 15:00:00                     209       125   
208 2023-06-18 0 days 16:00:00                     218       125   
209 2023-06-18 0 days 17:00:00                     209       125   
210 2023-06-18 0 days 18:00:00                     204       125   
211 2023-06-18 0 days 19:00:00                     195       125   
212 2023-06-18 0 days 20:00:00                     198       125   
213 2023-06-18 0 days 21:00:00                     198       125   
374 2023-06-25 0 days 14:00:00                     313       125   
375 2023-06-25 0 days 15:00:00                     312       125   
376 2023-06-25 0 days 16:00:00                     320       125   
377 2023-06-25 0 days 17:00:00                     313       125   
378 2023-06-25 0 days 18:00:00                     313       125   
379 2023-06-25 0 days 19:00:00                  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  heures_date['Cluster'] = labels
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%d/%m/%Y')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_timedelta(df3['Time'])
A value is trying to be set on a copy of a slice from a DataFrame.
Tr

          Date            Time  Real Consumption (kWh)  TalonRef  \
389 2023-06-18 0 days 05:00:00                      81        45   
390 2023-06-18 0 days 06:00:00                      80        45   
391 2023-06-18 0 days 07:00:00                      81        45   
392 2023-06-18 0 days 08:00:00                      82        45   
393 2023-06-18 0 days 09:00:00                      82        45   
394 2023-06-18 0 days 10:00:00                      85        45   
395 2023-06-18 0 days 11:00:00                      91        45   
396 2023-06-18 0 days 12:00:00                      89        45   
397 2023-06-18 0 days 13:00:00                      91        45   
398 2023-06-18 0 days 14:00:00                      89        45   
399 2023-06-18 0 days 15:00:00                      88        45   
400 2023-06-18 0 days 16:00:00                      85        45   
401 2023-06-18 0 days 17:00:00                      90        45   
402 2023-06-18 0 days 18:00:00                  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%d/%m/%Y')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_timedelta(df3['Time'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['DateTime'] = df3['Date'] + df3['Time']
A value is trying to be set on a copy of a slice from a D

           Date            Time  Real Consumption (kWh)  TalonRef  \
974  2023-06-18 0 days 14:00:00                     210        80   
975  2023-06-18 0 days 15:00:00                     207        80   
976  2023-06-18 0 days 16:00:00                     199        80   
977  2023-06-18 0 days 17:00:00                     198        80   
978  2023-06-18 0 days 18:00:00                     198        80   
979  2023-06-18 0 days 19:00:00                     188        80   
980  2023-06-18 0 days 20:00:00                     191        80   
981  2023-06-18 0 days 21:00:00                     200        80   
982  2023-06-18 0 days 22:00:00                     138        80   
1142 2023-06-25 0 days 14:00:00                     216        80   
1143 2023-06-25 0 days 15:00:00                     177        80   
1144 2023-06-25 0 days 16:00:00                     173        80   
1145 2023-06-25 0 days 17:00:00                     174        80   
1146 2023-06-25 0 days 18:00:00   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['DateTime'] = df3['Date'] + df3['Time']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%d/%m/%Y')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_timedelta(df3['Time'])
A value is trying to be set on a copy of a slice from a D

           Date            Time  Real Consumption (kWh)  TalonRef  \
1358 2023-06-18 0 days 14:00:00                     266       210   
1359 2023-06-18 0 days 15:00:00                     257       210   
1360 2023-06-18 0 days 16:00:00                     265       210   
1361 2023-06-18 0 days 17:00:00                     262       210   
1362 2023-06-18 0 days 18:00:00                     293       210   
1363 2023-06-18 0 days 19:00:00                     294       210   
1364 2023-06-18 0 days 20:00:00                     304       210   
1365 2023-06-18 0 days 21:00:00                     281       210   
1526 2023-06-25 0 days 14:00:00                     352       210   
1527 2023-06-25 0 days 15:00:00                     348       210   
1528 2023-06-25 0 days 16:00:00                     364       210   
1529 2023-06-25 0 days 17:00:00                     352       210   
1530 2023-06-25 0 days 18:00:00                     353       210   
1531 2023-06-25 0 days 19:00:00   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Date'] = pd.to_datetime(df3['Date'], format='%d/%m/%Y')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Time'] = pd.to_timedelta(df3['Time'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['DateTime'] = df3['Date'] + df3['Time']


In [7]:
data = df[["NbrHeures", "Impact conso (kWh)", "% Surconso", "Période d'alerte"]]
data.head()

Unnamed: 0,NbrHeures,Impact conso (kWh),% Surconso,Période d'alerte
0,8.0,648.0,64%,Dim
1,8.0,1504.0,150%,Dim
2,6.0,216.0,80%,Dim
3,9.0,396.0,97%,Dim
4,2.0,64.0,71%,Dim


In [8]:
print(data.dtypes)

NbrHeures             float64
Impact conso (kWh)    float64
% Surconso             object
Période d'alerte       object
dtype: object


In [9]:
# Transformation de la colonne '% Surconso' en float
data['% Surconso'] = data['% Surconso'].apply(lambda x: float(x.strip('%')) / 100)

# Affichage du dataframe après la transformation
data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['% Surconso'] = data['% Surconso'].apply(lambda x: float(x.strip('%')) / 100)


Unnamed: 0,NbrHeures,Impact conso (kWh),% Surconso,Période d'alerte
0,8.0,648.0,0.64,Dim
1,8.0,1504.0,1.5,Dim
2,6.0,216.0,0.8,Dim
3,9.0,396.0,0.97,Dim
4,2.0,64.0,0.71,Dim


In [10]:
# Créer un dictionnaire de mapping
mapping = {"Nuit": 0, "Dim": 1, "Jour": 2, "Jour/nuit": 3, "Jour/Nuit": 3}

# Remplacer les valeurs de la colonne 'Catégorie' par les entiers correspondants
data["Période d'alerte"] = data["Période d'alerte"].replace(mapping)

# Afficher les 20 premières lignes du DataFrame
data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["Période d'alerte"] = data["Période d'alerte"].replace(mapping)


Unnamed: 0,NbrHeures,Impact conso (kWh),% Surconso,Période d'alerte
0,8.0,648.0,0.64,1
1,8.0,1504.0,1.5,1
2,6.0,216.0,0.8,1
3,9.0,396.0,0.97,1
4,2.0,64.0,0.71,1


In [11]:
# Faire des prédictions pour chaque ligne de la dataframe
predictions = loaded_model.predict(data)
df["Catégorie"] = predictions
# Créer un dictionnaire de mapping
mapping = {0: "Investigation en cours", 1: "Changement de comportement", 2: "Eclairage", 3: "Froid", 4: "CVC", 5:"Multi-usage"}

# Remplacer les valeurs de la colonne 'Catégorie' par les entiers correspondants
df['Catégorie'] = df['Catégorie'].replace(mapping)
df.head(15)

Feature names unseen at fit time:
- NbrHeures
Feature names seen at fit time, yet now missing:
- Nb d'heures



Unnamed: 0,Code,Energie,heure ouverture,heure fermetur,TalonRef,Début surconsommation,Fin surconsommation,Talon surconso identifié,impact,NbrHeures,NbrNuits,Impact conso (kWh),% Surconso,Période d'alerte,Pourcentage de précision,Catégorie
0,MO0193,Electricité,5,21,125,2023-06-18,2023-06-18,205.0,81,8.0,1,648.0,64%,Dim,90%,Changement de comportement
1,MO0193,Electricité,5,21,125,2023-06-25,2023-06-25,315.0,188,8.0,1,1504.0,150%,Dim,90%,Changement de comportement
2,MO2814,Electricité,5,21,45,2023-06-18,2023-06-18,80.0,36,6.0,1,216.0,80%,Dim,60%,Changement de comportement
3,MO2814,Electricité,5,21,45,2023-06-18,2023-06-18,90.0,44,9.0,1,396.0,97%,Dim,60%,Changement de comportement
4,MO2814,Electricité,5,21,45,2023-06-18,2023-06-18,75.0,32,2.0,1,64.0,71%,Dim,60%,Changement de comportement
5,MO2814,Electricité,5,21,45,2023-06-25,2023-06-25,90.0,45,15.0,1,675.0,100%,Dim,60%,Changement de comportement
6,MO2814,Electricité,5,21,45,2023-06-25,2023-06-25,80.0,33,2.0,1,66.0,73%,Dim,60%,Changement de comportement
7,CS855,Electricité,4,23,170,2023-06-18,2023-06-18,310.0,140,6.0,1,840.0,82%,Dim,80%,Changement de comportement
8,CS855,Electricité,4,23,170,2023-06-18,2023-06-18,245.0,73,1.0,1,73.0,42%,Dim,80%,Changement de comportement
9,CS855,Electricité,4,23,170,2023-06-18,2023-06-18,202.5,33,11.0,1,99.0,19%,Dim,80%,CVC


In [12]:
df.to_excel(writer, sheet_name='dim', index=False)
#print('---------------------------Sauvegarde---------------------------')
#writer.save()
#writer.close()
#print('---------------------------Fin analyse détection auto---------------------------')

# Création de l'algorithme de machine learning personnalisé que nous allons utiliser dans la fusion des nuits de surconsommation.

# Il prend en compte la successivité des dates, une marge de surconsommation et une marge des heures.

In [13]:
def custom_clustering(X, date_margin, consumption_margin, hours_margin, cond):
    clusters = []
    remaining_points = X.copy()

    while len(remaining_points) > 0:
        current_point = remaining_points[0]
        cluster = [current_point]
        remaining_points = np.delete(remaining_points, 0, axis=0)
        i = 0

        # Sauvegarde de la valeur originale de date_margin
        original_date_margin = date_margin

        while i < len(remaining_points):
            point = remaining_points[i]


            if (abs(point[0] - current_point[0]) <= date_margin and
                abs(point[1] - current_point[1]) <= consumption_margin and
                abs(point[2] - current_point[2]) <= hours_margin):

                current_point = point
                cluster.append(point)
                remaining_points = np.delete(remaining_points, i, axis=0)
            else:
                i += 1

            # Vérification de la condition et mise à jour de date_margin si nécessaire
            if point[2] >= cond:
                date_margin = 1

        # Rétablissement de la valeur originale de date_margin
        date_margin = original_date_margin

        clusters.append(cluster)

    return clusters
def create_cluster_dataframe(clusters):
    cluster_data=[]
    for cluster in clusters:
        consomption_mean = np.mean([point[1] for point in cluster])
        hours_mean = np.mean([point[2] for point in cluster])
        count = len(cluster)
        cluster_data.append([consomption_mean ,hours_mean , count])
    df = pd.DataFrame(cluster_data, columns=['Mean Consomption' , 'Mean Hours', 'Count'])
    return df

# L'appel et l'application de l'algorithme de fusion des nuits sur les données output de fusion des heures pour les nuits.

In [14]:
#writer = pd.ExcelWriter('fusion.xlsx', engine='xlsxwriter')

Code_list=[]
TalonRef_list=[]
Date1_list=[]
Date2_list=[]
heurev_list=[]
heuref_list=[]
NbrHeure_list=[]
defrence_list=[]
surconso_identifie=[]
Energie=[]
NbrNuit_list=[]
NImpact_conso = []
NperSurconso = []
NPeriode = []



for code in df_NuitOut["Code"].unique():
    df_Nuit = df_NuitOut[df_NuitOut["Code"] == code]
    # Votre code précédent ici...

    # Créer un tableau Numpy à partir des colonnes spécifiées
    X = np.column_stack((df_Nuit.index, df_Nuit['Talon surconso identifie'], df_Nuit['NbrHeures']))

    print(X)
    cond = (23 - df_Nuit['heure fermetur'].iloc[0])+df_Nuit['heure ouverture'].iloc[0]
    marge=df_Nuit['Marge'].iloc[0]
    #if code == 'MO0157':
    clusters = custom_clustering(X , date_margin=2 , consumption_margin=marge , hours_margin=2 , cond=cond)
    #elif code == 'MO2316':
        #clusters = custom_clustering(X , date_margin=2 , consumption_margin=5 , hours_margin=1 , cond=cond)
    #elif code == 'MO0459':
        #clusters = custom_clustering(X , date_margin=2 , consumption_margin=15 , hours_margin=1 , cond=cond)
    df = create_cluster_dataframe(clusters)
    print(df.head(5))

    for (index, row), cluster in zip(df.iterrows(), clusters):
        Energie.append('Electricité')
        heuref_list.append(df_Nuit['heure fermetur'].iloc[0])
        Code_list.append(code)
        TalonRef_list.append(df_Nuit['TalonRef'].iloc[0])
        first_index = int(cluster[0][0])
        Date1_list.append(df_NuitOut['Début surconsommation'].iloc[first_index])
        last_index = int(cluster[-1][0])
        Date2_list.append(df_NuitOut['Fin surconsommation'].iloc[last_index])
        heurev_list.append(df_Nuit['heure ouverture'].iloc[0])
        surconso_identifie.append(arrondir_multiple_de_5(int(row['Mean Consomption'])))
        defrence_list.append(int(row['Mean Consomption']) - df_Nuit['TalonRef'].iloc[0])
        NbrHeure_list.append(round(row['Mean Hours']))
        NbrNuit_list.append(row['Count'])
        surconso = round(row['Mean Hours']) * (int(row['Mean Consomption']) - df_Nuit['TalonRef'].iloc[0]) * row['Count']
        NImpact_conso.append(surconso)

        persurconso = ((int(row['Mean Consomption']) - df_Nuit['TalonRef'].iloc[0]) /  df_Nuit['TalonRef'].iloc[0])*100
        persurconso = int(persurconso)
        persurconso = str(persurconso)
        persurconso = persurconso + '%'
        NperSurconso.append(persurconso)
        NPeriode.append('Nuit')

df_Fusion = pd.DataFrame({'Code':Code_list, 'Energie': Energie,'heure ouverture':heurev_list,'heure fermetur':heuref_list, 'TalonRef':TalonRef_list, 'Début surconsommation':Date1_list, 'Fin surconsommation':Date2_list, 'Talon surconso identifie':surconso_identifie, 'impact':defrence_list,'NbrHeures':NbrHeure_list, 'NbrNuits':NbrNuit_list, 'Impact conso (kWh)':NImpact_conso, '% Surconso':NperSurconso, "Période d'alerte":NPeriode})
grouped_df = df_Fusion.groupby("Code").size().reset_index(name="Nombre de lignes")
grouped_df["Pourcentage de précision"] = 100 - (grouped_df["Nombre de lignes"] - 1) * 10
df = df_Fusion.merge(grouped_df[["Code", "Pourcentage de précision"]], on="Code", how="left")
df['Pourcentage de précision'] = df['Pourcentage de précision'].astype(str) + '%'
df = process_data(df , 5)

[[  0. 129.   6.]
 [  1. 127.   6.]
 [  2. 130.   6.]
 [  3. 128.   6.]
 [  4. 122.   6.]
 [  5. 128.   6.]
 [  6. 124.   6.]]
   Mean Consomption  Mean Hours  Count
0        126.857143         6.0      7
[[  7. 193.   7.]
 [  8. 195.   7.]
 [  9. 217.   7.]
 [ 10. 218.   7.]
 [ 11. 193.   7.]
 [ 12. 198.   7.]
 [ 13. 204.   7.]]
   Mean Consomption  Mean Hours  Count
0        202.571429         7.0      7
[[14. 82.  7.]
 [15. 84.  7.]
 [16. 82.  7.]
 [17. 85.  7.]
 [18. 79.  7.]
 [19. 79.  7.]
 [20. 84.  7.]]
   Mean Consomption  Mean Hours  Count
0         83.250000         7.0      4
1         80.666667         7.0      3
[[ 21. 195.   4.]
 [ 22. 194.   4.]
 [ 23. 196.   4.]
 [ 24. 208.   4.]
 [ 25. 185.   2.]]
   Mean Consomption  Mean Hours  Count
0             195.6         3.6      5
[[26. 64.  5.]
 [27. 63.  5.]
 [28. 69.  5.]
 [29. 70.  5.]
 [30. 69.  5.]
 [31. 68.  5.]
 [32. 68.  5.]]
   Mean Consomption  Mean Hours  Count
0         67.285714         5.0      7
[[ 33. 124.   

In [15]:
#import pandas as pd

#df_ref = pd.read_excel('fichier2.xlsx', sheet_name='Conso P60', header=0, skiprows=0)
#df_ref.to_csv('dimanche.csv', index=False, encoding='utf-8-sig')

In [16]:
# data = df_output[["Code", "TalonRef", "Début surconsommation", "Fin surconsommation", "Talon surconso identifié", "impact", "NbrHeures", "NbrNuits", "Impact conso (kWh)", "% Surconso", "Période d'alerte"]]
# data.head()

In [17]:
# from sklearn.preprocessing import LabelEncoder
# # Encodage des variables catégorielles
# label_encoder = LabelEncoder()
# categorical_columns = ["Code", "Début surconsommation", "Fin surconsommation", "% Surconso", "Période d'alerte"]
# for column in categorical_columns:
#     data[column] = label_encoder.fit_transform(data[column])
# data.head()

In [18]:
# # Faire des prédictions pour chaque ligne de la dataframe
# predictions = loaded_model.predict(data)
# df_output["Catégorie"] = predictions
# df_output.head()

In [19]:
# # Créer un dictionnaire de mapping
# mapping = {0: "Investigation en cours", 1: "Changement de comportement", 2: "Eclairage", 3: "Froid", 4: "CVC", 5:"Multi-usage"}

# # Remplacer les valeurs de la colonne 'Catégorie' par les entiers correspondants
# df_output['Catégorie'] = df_output['Catégorie'].replace(mapping)
# df_output.head(15)

In [20]:
data = df[["NbrHeures", "Impact conso (kWh)", "% Surconso", "Période d'alerte"]]
data.head()

Unnamed: 0,NbrHeures,Impact conso (kWh),% Surconso,Période d'alerte
0,6,2352.0,80%,Nuit
1,7,3773.0,61%,Nuit
2,7,1064.0,84%,Nuit
3,7,735.0,77%,Nuit
4,4,500.0,14%,Nuit


In [21]:
print(data.dtypes)

NbrHeures               int64
Impact conso (kWh)    float64
% Surconso             object
Période d'alerte       object
dtype: object


In [22]:
# Transformation de la colonne '% Surconso' en float
data['% Surconso'] = data['% Surconso'].apply(lambda x: float(x.strip('%')) / 100)

# Affichage du dataframe après la transformation
data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['% Surconso'] = data['% Surconso'].apply(lambda x: float(x.strip('%')) / 100)


Unnamed: 0,NbrHeures,Impact conso (kWh),% Surconso,Période d'alerte
0,6,2352.0,0.8,Nuit
1,7,3773.0,0.61,Nuit
2,7,1064.0,0.84,Nuit
3,7,735.0,0.77,Nuit
4,4,500.0,0.14,Nuit


In [23]:
# Créer un dictionnaire de mapping
mapping = {"Nuit": 0, "Dim": 1, "Jour": 2, "Jour/nuit": 3, "Jour/Nuit": 3}

# Remplacer les valeurs de la colonne 'Catégorie' par les entiers correspondants
data["Période d'alerte"] = data["Période d'alerte"].replace(mapping)

# Afficher les 20 premières lignes du DataFrame
data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["Période d'alerte"] = data["Période d'alerte"].replace(mapping)


Unnamed: 0,NbrHeures,Impact conso (kWh),% Surconso,Période d'alerte
0,6,2352.0,0.8,0
1,7,3773.0,0.61,0
2,7,1064.0,0.84,0
3,7,735.0,0.77,0
4,4,500.0,0.14,0


In [24]:
# Faire des prédictions pour chaque ligne de la dataframe
predictions = loaded_model.predict(data)
df["Catégorie"] = predictions
# Créer un dictionnaire de mapping
mapping = {0: "Investigation en cours", 1: "Changement de comportement", 2: "Eclairage", 3: "Froid", 4: "CVC", 5:"Multi-usage"}

# Remplacer les valeurs de la colonne 'Catégorie' par les entiers correspondants
df['Catégorie'] = df['Catégorie'].replace(mapping)
df.head(15)

Feature names unseen at fit time:
- NbrHeures
Feature names seen at fit time, yet now missing:
- Nb d'heures



Unnamed: 0,Code,Energie,heure ouverture,heure fermetur,TalonRef,Début surconsommation,Fin surconsommation,Talon surconso identifié,impact,NbrHeures,NbrNuits,Impact conso (kWh),% Surconso,Période d'alerte,Pourcentage de précision,Catégorie
0,MO0157,Electricité,5,22,70,2023-06-18,2023-06-25,125.0,56,6,7.0,2352.0,80%,Nuit,100%,CVC
1,MO0193,Electricité,5,21,125,2023-06-18,2023-06-25,200.0,77,7,7.0,3773.0,61%,Nuit,100%,CVC
2,MO2814,Electricité,5,21,45,2023-06-18,2023-06-22,85.0,38,7,4.0,1064.0,84%,Nuit,90%,CVC
3,MO2814,Electricité,5,21,45,2023-06-22,2023-06-25,80.0,35,7,3.0,735.0,77%,Nuit,90%,Eclairage
4,CS855,Electricité,4,23,170,2023-06-18,2023-06-23,195.0,25,4,5.0,500.0,14%,Nuit,100%,CVC
5,CS528,Electricité,4,22,45,2023-06-18,2023-06-25,65.0,22,5,7.0,770.0,48%,Nuit,100%,CVC
6,CS625,Electricité,4,22,80,2023-06-18,2023-06-25,125.0,43,5,7.0,1505.0,53%,Nuit,100%,CVC
7,CG820,Electricité,4,21,205,2023-06-18,2023-06-22,280.0,74,6,4.0,1776.0,36%,Nuit,80%,Investigation en cours
8,CG820,Electricité,4,21,205,2023-06-21,2023-06-22,305.0,99,2,1.0,198.0,48%,Nuit,80%,Eclairage
9,CG820,Electricité,4,21,205,2023-06-22,2023-06-25,290.0,83,6,3.0,1494.0,40%,Nuit,80%,CVC


In [25]:
df.to_excel(writer, sheet_name='Nuit', index=False)
print('---------------------------Sauvegarde---------------------------')
#writer.save()
writer.close()
print('---------------------------Fin analyse détection auto---------------------------')

  warn("Calling close() on already closed file.")


---------------------------Sauvegarde---------------------------
---------------------------Fin analyse détection auto---------------------------
