In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from pyod.utils.utility import precision_n_scores
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.utils import column_or_1d
from sklearn.utils.multiclass import type_of_target

In [2]:
def modify_obs_seq(df, perc_rows_2_modif, perc_items_2_modif):
    # Randomly select x% of rows
    num_rows_to_modify = int(len(df) * perc_rows_2_modif / 100)
    rows_to_modify = np.random.choice(df.index, num_rows_to_modify, replace=False)

    # Function to modify 'Obs_seq' for selected rows
    def modify_sequence(seq):
        # Calculate the number of items to modify in the sequence
        num_items_to_modify = max(1, int(len(seq) * perc_items_2_modif / 100))
        # Randomly select indices to modify in the sequence
        indices_to_modify = np.random.choice(len(seq), num_items_to_modify, replace=False)

        # Create random pairs of indices and swap their values
        for i in range(0, len(indices_to_modify) - 1, 2):
            seq[indices_to_modify[i]], seq[indices_to_modify[i + 1]] = (
                seq[indices_to_modify[i + 1]], seq[indices_to_modify[i]]
            )

        return seq

    # Modify 'Obs_seq' column for selected rows and create 'is_ano' column
    df['is_ano'] = 0  # Initialize 'is_ano' column with 0

    # Apply modification function to 'Obs_seq' for selected rows
    df.loc[rows_to_modify, 'Obs_seq'] = df.loc[rows_to_modify, 'Obs_seq'].apply(modify_sequence)
    df.loc[rows_to_modify, 'is_ano'] = 1  # Label modified rows with 1 in 'is_ano' column

    return df

def format_results(cv_results, mean_score_col, std_score):
    results=pd.DataFrame.from_dict(clf.cv_results_)[[parameter_tested_col,mean_score_col,std_score]]
    results['combined_scores'] = results.apply(lambda row: f"{round(row[mean_score_col],3)} \u00B1 ({round(row[std_score],3)})", axis=1)
    df_result = results[[parameter_tested_col, 'combined_scores']].T
    df_result.columns = df_result.iloc[0]
    df_result.columns.name = None
    df_result = df_result[1:]
    df_result.at[df_result.index[0], 'Dataset'] = file_name
    df_result=df_result.set_index('Dataset')
    return df_result
# df2 = modify_obs_seq(data_seq,10,50)
# display(df2)

In [3]:
import pandas as pd
import numpy as np
from seq2patterns import Seq2patterns
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
import os
os.environ["OMP_NUM_THREADS"] = "4"

from sklearn.base import BaseEstimator, ClassifierMixin
class MyAnomalyDetectionObjectFromPM(BaseEstimator, ClassifierMixin): 
    def __init__(self,nb_of_frequent_patterns, min_len_of_frequent_pattern, n_clust, seq_ano_perc, item_ano_perc, ano_method, algo_clustering, aggreg_method):
        
        # for pattern mining
        self.nb_of_frequent_patterns = nb_of_frequent_patterns
        self.min_len_of_frequent_pattern = min_len_of_frequent_pattern
        self.kmeans_is_closed = None
        self.n_clust = n_clust
        self.algo_clustering=algo_clustering
        self.aggreg_method=aggreg_method
        
        # for anomaly detection
        self.seq_ano_perc = seq_ano_perc
        self.item_ano_perc= item_ano_perc
        self.ano_method = ano_method
        
        # model training object
        self.seq2patterns_instance = None
        self.max_len_seq=None
        

    def get_params(self, deep=True): 
        return {
        "nb_of_frequent_patterns":self.nb_of_frequent_patterns,
        "min_len_of_frequent_pattern":self.min_len_of_frequent_pattern,
        # "kmeans_is_closed":self.kmeans_is_closed,
        "algo_clustering":self.algo_clustering,
        "aggreg_method":self.aggreg_method,
        "n_clust":self.n_clust,
        "seq_ano_perc":self.seq_ano_perc,
        "item_ano_perc":self.item_ano_perc,
        "ano_method":self.ano_method
        }

    def set_params(self, **parameters): 
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self 
        
       
    def fit(self, X,y): 
        self.classes_ = np.unique(y, return_inverse=False)
        
        self.max_len_seq = max(X['Obs_seq'].apply(len))
        
        X = X[["Entite", "Obs_seq","Intervals_seq"]] 
        # y=None
        
        
        
        if self.ano_method in ['WCFPOF','WCFPOF_clust']:
            self.kmeans_is_closed==True
        else:
            self.kmeans_is_closed==False
        
        if self.ano_method in ['FPOF','WCFPOF','LFPOF']:
            self.n_clust=1
            
        self.seq2patterns_instance = Seq2patterns(nb_of_frequent_patterns = self.nb_of_frequent_patterns, 
                                                  min_len_of_frequent_pattern = self.min_len_of_frequent_pattern, 
                                                  kmeans_is_closed = self.kmeans_is_closed, 
                                                  n_clust = self.n_clust,
                                                  algo_clustering = self.algo_clustering,
                                                  aggreg_method=self.aggreg_method)
        self.seq2patterns_instance.fit(X)
        return 'fitted'

    def decision_function(self, X): 

        patterns_X = self.seq2patterns_instance.transform(X)
        # pour la jointure après on utilise comme clé le numéro de row, càd l'index.
        X.reset_index(inplace=True)  # Resetting the index
        X['Entite'] = X.index  # Creating 'Entite' column using reset index values


        
        # add the empty pattern to all cases in the patterns table (used to return all cases, not only those with frequent patterns)
        case_distinct_values_list = X['Entite'].unique().tolist()

        # Create an empty list to store dictionaries for new rows
        new_rows = []
        
        # Loop through each distinct value and create a dictionary for each value
        for case in case_distinct_values_list:
            new_row = {'Combination': 'Null_Patn', 'seqIndex': case, 'Cluster': 0, 'freq': 1}
            new_rows.append(new_row)  # Append the dictionary to the list

        # Concatenate the list of dictionaries with the existing DataFrame using pd.concat()
        if new_rows:
            patterns_X = pd.concat([patterns_X, pd.DataFrame(new_rows)], ignore_index=True)
                

            
        X["len_seq"]=X['Obs_seq'].apply(len) # add len of sequence column
        patterns_X = patterns_X.merge(X[['len_seq']], left_on='seqIndex', right_index=True, how='left') # add it to results for further calculations
        
        patterns_X['len_pattern'] = patterns_X['Combination'].apply(lambda x: len(str(x).split('[')[1].split(']')[0].split(',')) if '[' in str(x) and ']' in str(x) else 0)
        
        var_sum_freq = patterns_X[['Combination', 'freq']].drop_duplicates()['freq'].sum()
        max_len_seq2 = max(self.max_len_seq,max(X['Obs_seq'].apply(len)))
        
        
        try:
            
            if self.ano_method == 'FPOF': # sum freq/nb total patterns
                return patterns_X.groupby('seqIndex').apply(lambda x: 1-(x['freq'].sum()/((self.nb_of_frequent_patterns)+1))).tolist() # +1 car on a le pattern vide
            
            elif self.ano_method == 'FPOF_clust': # sum freq/nb total patterns
                return patterns_X.groupby('seqIndex').apply(lambda x: 1-(x['freq'].sum()/((self.nb_of_frequent_patterns)+1))).tolist() # +1 car on a le pattern vide
            
            elif self.ano_method == 'WCFPOF': # sum freq/nb total patterns(avec patterns fermés)
                # return patterns_X.groupby('seqIndex').apply(lambda x: 1-(x['freq'].sum()/((self.nb_of_frequent_patterns)+1))).tolist() # +1 car on a le pattern vide
                return patterns_X.groupby('seqIndex').apply(lambda x: 1 - ((x['freq'] * (x['len_pattern'] / x['len_seq'])).sum() / (self.nb_of_frequent_patterns + 1))).tolist()
            
            elif self.ano_method == 'WCFPOF_clust': # sum freq/nb total patterns(avec patterns fermés)
                # return patterns_X.groupby('seqIndex').apply(lambda x: 1-(x['freq'].sum()/((self.nb_of_frequent_patterns)+1))).tolist() # +1 car on a le pattern vide
                return patterns_X.groupby('seqIndex').apply(lambda x: 1 - ((x['freq'] * (x['len_pattern'] / x['len_seq'])).sum() / (self.nb_of_frequent_patterns + 1))).tolist()
       
        except ValueError:
            print("ERREUR sur les méthodes de calcul de score d'anomalie")
            
            
        
    def predict_proba(self, X): 
        
        return self.decision_function(X)
    
    def predict(self, X):
        def create_top_x_percent_list(input_list, x):
            sorted_list = sorted(input_list, reverse=True)
            threshold_index = int(len(sorted_list) * (x / 100))
            threshold_value = sorted_list[threshold_index]

            new_list = [1 if val >= threshold_value else 0 for val in input_list]
            return new_list
        
        return create_top_x_percent_list(self.decision_function(X), self.seq_ano_perc)

In [4]:
## %%time
from datetime import datetime



list_dataset = [

    "Helpdesk",
    "BPI_Challenge_2012_A",
    "BPI_Challenge_2012_O",
    "BPI_Challenge_2013_closed_problems",
    "bpi_challenge_2013_incidents"
    ]

final_results_precision=None;final_results_recall=None;final_results_f1=None # Pour les résultats finaux

for file_name in list_dataset:
   
    start_time = datetime.now()
    print(start_time)
    
    print("-------------------------------------------------"+file_name+"-------------------------------------------------")
    data = pd.read_pickle(r'data/%s.pkl'%(file_name))
    
    # display(data)
    
    # we may delete the sequences that are too short or too long
    # data = data[data['Obs_seq'].apply(len) >= 1]
    # data = data[data['Obs_seq'].apply(len) <= 500]
    
    
    # we may troncate right the sequences
    # data['Obs_seq'] = data['Obs_seq'].apply(lambda x: x[-20:])
    # data['Intervals_seq'] = data['Intervals_seq'].apply(lambda x: x[-20:])
    # data['Intervals_seq'] = [[round(val, 2) for val in inner_list] for inner_list in data['Intervals_seq']]
    # print("longeur dataset:"+str(len(data)))
    
    # fin ajout
    data['Entite'] = data.index
    # display(data)
    
    # paramètres par défaut
    perc_rows_2_modif=10
    perc_items_2_modif=50


    scoring = {
               "precision":"precision",
               "f1": "f1",
               "recall": "recall"
              }

    # parameters={'ano_method':['FPOF','WCFPOF','FPOF_clust','WCFPOF_clust']}
    # parameters={'aggreg_method':['min','max','mean']}
    # parameters={'n_clust':[1,2,3,4,5,6,7,8,9,10]}
    # parameters={'min_len_of_frequent_pattern':[2,3,4,5,6]}
    # parameters={'nb_of_frequent_patterns':[0.5,0.6,0.7,0.8,0.9,0.9999]}
    parameters={'nb_of_frequent_patterns':[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]}
    

    parameter_tested_col='param_'+list(parameters.keys())[0] # to print at the end
    
    # création instance classe
    MyModel = MyAnomalyDetectionObjectFromPM(nb_of_frequent_patterns=10,
                                        min_len_of_frequent_pattern=3,
                                        n_clust=4,
                                        seq_ano_perc=perc_rows_2_modif, 
                                        item_ano_perc=perc_items_2_modif, 
                                        ano_method='WCFPOF_clust',
                                        algo_clustering="kmeans",
                                        aggreg_method="min")
    # création instance gridsearch
    clf = GridSearchCV(MyModel,
                       cv=StratifiedKFold(shuffle=True,
                                           n_splits=5,
                                           random_state=1),
                       scoring=scoring,
                       error_score="raise",
                       n_jobs=6,
                       refit=False,
                       verbose=0,
                       param_grid=parameters)
    
    # to prepare data
    data_prepared = modify_obs_seq(data,perc_rows_2_modif,perc_items_2_modif)

    clf.fit(data_prepared.loc[:, data_prepared.columns != 'is_ano'], data_prepared['is_ano'])



    df_result_precision=format_results(clf.cv_results_, "mean_test_precision", "std_test_precision")
    df_result_recall=format_results(clf.cv_results_, "mean_test_recall", "std_test_recall")
    df_result_f1=format_results(clf.cv_results_, "mean_test_f1", "std_test_f1")
    
    if final_results_precision is not None:
        final_results_precision = pd.concat([final_results_precision,df_result_precision])
        final_results_recall = pd.concat([final_results_recall,df_result_recall])
        final_results_f1 = pd.concat([final_results_f1,df_result_f1])
    else:
        final_results_precision=df_result_precision
        final_results_recall=df_result_recall
        final_results_f1=df_result_f1

    display(final_results_precision.style.set_caption('Precision_score'))
    display(final_results_recall.style.set_caption('Recall_score'))
    display(final_results_f1.style.set_caption('F1_score'))
    
    end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))

2024-02-20 07:15:24.642217
-------------------------------------------------Helpdesk-------------------------------------------------
longeur dataset:4579


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.45 ± (0.068),0.339 ± (0.035),0.525 ± (0.037),0.488 ± (0.043),0.485 ± (0.051),0.492 ± (0.056),0.466 ± (0.041),0.465 ± (0.031),0.468 ± (0.04),0.472 ± (0.043),0.465 ± (0.031),0.449 ± (0.034),0.451 ± (0.035),0.471 ± (0.037),0.469 ± (0.036)


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.519 ± (0.047),0.536 ± (0.045),0.549 ± (0.033),0.545 ± (0.047),0.554 ± (0.034),0.556 ± (0.034),0.551 ± (0.02),0.551 ± (0.028),0.567 ± (0.031),0.545 ± (0.022),0.538 ± (0.024),0.547 ± (0.018),0.547 ± (0.018),0.54 ± (0.019),0.536 ± (0.02)


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.48 ± (0.051),0.415 ± (0.039),0.537 ± (0.035),0.513 ± (0.035),0.515 ± (0.035),0.52 ± (0.04),0.504 ± (0.023),0.503 ± (0.014),0.511 ± (0.026),0.504 ± (0.027),0.498 ± (0.018),0.493 ± (0.023),0.494 ± (0.025),0.502 ± (0.021),0.499 ± (0.021)


Duration: 0:25:51.069872
2024-02-20 07:41:15.712089
-------------------------------------------------BPI_Challenge_2012_A-------------------------------------------------
longeur dataset:13087


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.45 ± (0.068),0.339 ± (0.035),0.525 ± (0.037),0.488 ± (0.043),0.485 ± (0.051),0.492 ± (0.056),0.466 ± (0.041),0.465 ± (0.031),0.468 ± (0.04),0.472 ± (0.043),0.465 ± (0.031),0.449 ± (0.034),0.451 ± (0.035),0.471 ± (0.037),0.469 ± (0.036)
BPI_Challenge_2012_A,0.119 ± (0.002),0.366 ± (0.135),0.421 ± (0.061),0.437 ± (0.062),0.414 ± (0.083),0.342 ± (0.021),0.362 ± (0.042),0.35 ± (0.025),0.343 ± (0.044),0.33 ± (0.023),0.312 ± (0.014),0.331 ± (0.021),0.306 ± (0.009),0.341 ± (0.028),0.305 ± (0.009)


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.519 ± (0.047),0.536 ± (0.045),0.549 ± (0.033),0.545 ± (0.047),0.554 ± (0.034),0.556 ± (0.034),0.551 ± (0.02),0.551 ± (0.028),0.567 ± (0.031),0.545 ± (0.022),0.538 ± (0.024),0.547 ± (0.018),0.547 ± (0.018),0.54 ± (0.019),0.536 ± (0.02)
BPI_Challenge_2012_A,0.506 ± (0.013),0.491 ± (0.017),0.491 ± (0.036),0.488 ± (0.038),0.517 ± (0.045),0.541 ± (0.009),0.511 ± (0.038),0.496 ± (0.032),0.472 ± (0.032),0.461 ± (0.023),0.466 ± (0.01),0.45 ± (0.027),0.465 ± (0.011),0.441 ± (0.036),0.463 ± (0.01)


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.48 ± (0.051),0.415 ± (0.039),0.537 ± (0.035),0.513 ± (0.035),0.515 ± (0.035),0.52 ± (0.04),0.504 ± (0.023),0.503 ± (0.014),0.511 ± (0.026),0.504 ± (0.027),0.498 ± (0.018),0.493 ± (0.023),0.494 ± (0.025),0.502 ± (0.021),0.499 ± (0.021)
BPI_Challenge_2012_A,0.193 ± (0.002),0.406 ± (0.1),0.449 ± (0.032),0.456 ± (0.03),0.451 ± (0.042),0.419 ± (0.018),0.421 ± (0.019),0.409 ± (0.011),0.395 ± (0.025),0.383 ± (0.009),0.373 ± (0.013),0.38 ± (0.011),0.369 ± (0.009),0.382 ± (0.009),0.368 ± (0.008)


Duration: 0:51:04.674633
2024-02-20 08:32:20.386722
-------------------------------------------------BPI_Challenge_2012_O-------------------------------------------------
longeur dataset:5015


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.45 ± (0.068),0.339 ± (0.035),0.525 ± (0.037),0.488 ± (0.043),0.485 ± (0.051),0.492 ± (0.056),0.466 ± (0.041),0.465 ± (0.031),0.468 ± (0.04),0.472 ± (0.043),0.465 ± (0.031),0.449 ± (0.034),0.451 ± (0.035),0.471 ± (0.037),0.469 ± (0.036)
BPI_Challenge_2012_A,0.119 ± (0.002),0.366 ± (0.135),0.421 ± (0.061),0.437 ± (0.062),0.414 ± (0.083),0.342 ± (0.021),0.362 ± (0.042),0.35 ± (0.025),0.343 ± (0.044),0.33 ± (0.023),0.312 ± (0.014),0.331 ± (0.021),0.306 ± (0.009),0.341 ± (0.028),0.305 ± (0.009)
BPI_Challenge_2012_O,0.735 ± (0.045),0.713 ± (0.018),0.467 ± (0.029),0.461 ± (0.022),0.463 ± (0.022),0.588 ± (0.03),0.574 ± (0.035),0.565 ± (0.035),0.599 ± (0.036),0.599 ± (0.039),0.598 ± (0.037),0.594 ± (0.037),0.59 ± (0.037),0.587 ± (0.04),0.586 ± (0.04)


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.519 ± (0.047),0.536 ± (0.045),0.549 ± (0.033),0.545 ± (0.047),0.554 ± (0.034),0.556 ± (0.034),0.551 ± (0.02),0.551 ± (0.028),0.567 ± (0.031),0.545 ± (0.022),0.538 ± (0.024),0.547 ± (0.018),0.547 ± (0.018),0.54 ± (0.019),0.536 ± (0.02)
BPI_Challenge_2012_A,0.506 ± (0.013),0.491 ± (0.017),0.491 ± (0.036),0.488 ± (0.038),0.517 ± (0.045),0.541 ± (0.009),0.511 ± (0.038),0.496 ± (0.032),0.472 ± (0.032),0.461 ± (0.023),0.466 ± (0.01),0.45 ± (0.027),0.465 ± (0.011),0.441 ± (0.036),0.463 ± (0.01)
BPI_Challenge_2012_O,0.78 ± (0.027),0.721 ± (0.016),0.673 ± (0.036),0.603 ± (0.033),0.603 ± (0.033),0.737 ± (0.032),0.695 ± (0.041),0.655 ± (0.038),0.733 ± (0.029),0.725 ± (0.03),0.721 ± (0.033),0.709 ± (0.036),0.699 ± (0.033),0.689 ± (0.036),0.679 ± (0.041)


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.48 ± (0.051),0.415 ± (0.039),0.537 ± (0.035),0.513 ± (0.035),0.515 ± (0.035),0.52 ± (0.04),0.504 ± (0.023),0.503 ± (0.014),0.511 ± (0.026),0.504 ± (0.027),0.498 ± (0.018),0.493 ± (0.023),0.494 ± (0.025),0.502 ± (0.021),0.499 ± (0.021)
BPI_Challenge_2012_A,0.193 ± (0.002),0.406 ± (0.1),0.449 ± (0.032),0.456 ± (0.03),0.451 ± (0.042),0.419 ± (0.018),0.421 ± (0.019),0.409 ± (0.011),0.395 ± (0.025),0.383 ± (0.009),0.373 ± (0.013),0.38 ± (0.011),0.369 ± (0.009),0.382 ± (0.009),0.368 ± (0.008)
BPI_Challenge_2012_O,0.757 ± (0.035),0.717 ± (0.017),0.551 ± (0.032),0.522 ± (0.024),0.523 ± (0.024),0.654 ± (0.03),0.628 ± (0.037),0.606 ± (0.034),0.659 ± (0.033),0.656 ± (0.034),0.653 ± (0.034),0.646 ± (0.034),0.64 ± (0.033),0.633 ± (0.037),0.629 ± (0.039)


Duration: 0:25:25.484446
2024-02-20 08:57:45.871168
-------------------------------------------------BPI_Challenge_2013_closed_problems-------------------------------------------------
longeur dataset:993


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.45 ± (0.068),0.339 ± (0.035),0.525 ± (0.037),0.488 ± (0.043),0.485 ± (0.051),0.492 ± (0.056),0.466 ± (0.041),0.465 ± (0.031),0.468 ± (0.04),0.472 ± (0.043),0.465 ± (0.031),0.449 ± (0.034),0.451 ± (0.035),0.471 ± (0.037),0.469 ± (0.036)
BPI_Challenge_2012_A,0.119 ± (0.002),0.366 ± (0.135),0.421 ± (0.061),0.437 ± (0.062),0.414 ± (0.083),0.342 ± (0.021),0.362 ± (0.042),0.35 ± (0.025),0.343 ± (0.044),0.33 ± (0.023),0.312 ± (0.014),0.331 ± (0.021),0.306 ± (0.009),0.341 ± (0.028),0.305 ± (0.009)
BPI_Challenge_2012_O,0.735 ± (0.045),0.713 ± (0.018),0.467 ± (0.029),0.461 ± (0.022),0.463 ± (0.022),0.588 ± (0.03),0.574 ± (0.035),0.565 ± (0.035),0.599 ± (0.036),0.599 ± (0.039),0.598 ± (0.037),0.594 ± (0.037),0.59 ± (0.037),0.587 ± (0.04),0.586 ± (0.04)
BPI_Challenge_2013_closed_problems,0.161 ± (0.029),0.165 ± (0.036),0.143 ± (0.026),0.138 ± (0.026),0.138 ± (0.026),0.126 ± (0.038),0.126 ± (0.038),0.139 ± (0.036),0.226 ± (0.075),0.27 ± (0.084),0.232 ± (0.045),0.251 ± (0.049),0.257 ± (0.053),0.206 ± (0.057),0.227 ± (0.035)


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.519 ± (0.047),0.536 ± (0.045),0.549 ± (0.033),0.545 ± (0.047),0.554 ± (0.034),0.556 ± (0.034),0.551 ± (0.02),0.551 ± (0.028),0.567 ± (0.031),0.545 ± (0.022),0.538 ± (0.024),0.547 ± (0.018),0.547 ± (0.018),0.54 ± (0.019),0.536 ± (0.02)
BPI_Challenge_2012_A,0.506 ± (0.013),0.491 ± (0.017),0.491 ± (0.036),0.488 ± (0.038),0.517 ± (0.045),0.541 ± (0.009),0.511 ± (0.038),0.496 ± (0.032),0.472 ± (0.032),0.461 ± (0.023),0.466 ± (0.01),0.45 ± (0.027),0.465 ± (0.011),0.441 ± (0.036),0.463 ± (0.01)
BPI_Challenge_2012_O,0.78 ± (0.027),0.721 ± (0.016),0.673 ± (0.036),0.603 ± (0.033),0.603 ± (0.033),0.737 ± (0.032),0.695 ± (0.041),0.655 ± (0.038),0.733 ± (0.029),0.725 ± (0.03),0.721 ± (0.033),0.709 ± (0.036),0.699 ± (0.033),0.689 ± (0.036),0.679 ± (0.041)
BPI_Challenge_2013_closed_problems,0.355 ± (0.093),0.315 ± (0.094),0.264 ± (0.064),0.254 ± (0.069),0.254 ± (0.069),0.234 ± (0.092),0.234 ± (0.092),0.214 ± (0.087),0.273 ± (0.05),0.293 ± (0.072),0.263 ± (0.058),0.263 ± (0.037),0.273 ± (0.05),0.223 ± (0.042),0.243 ± (0.022)


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.48 ± (0.051),0.415 ± (0.039),0.537 ± (0.035),0.513 ± (0.035),0.515 ± (0.035),0.52 ± (0.04),0.504 ± (0.023),0.503 ± (0.014),0.511 ± (0.026),0.504 ± (0.027),0.498 ± (0.018),0.493 ± (0.023),0.494 ± (0.025),0.502 ± (0.021),0.499 ± (0.021)
BPI_Challenge_2012_A,0.193 ± (0.002),0.406 ± (0.1),0.449 ± (0.032),0.456 ± (0.03),0.451 ± (0.042),0.419 ± (0.018),0.421 ± (0.019),0.409 ± (0.011),0.395 ± (0.025),0.383 ± (0.009),0.373 ± (0.013),0.38 ± (0.011),0.369 ± (0.009),0.382 ± (0.009),0.368 ± (0.008)
BPI_Challenge_2012_O,0.757 ± (0.035),0.717 ± (0.017),0.551 ± (0.032),0.522 ± (0.024),0.523 ± (0.024),0.654 ± (0.03),0.628 ± (0.037),0.606 ± (0.034),0.659 ± (0.033),0.656 ± (0.034),0.653 ± (0.034),0.646 ± (0.034),0.64 ± (0.033),0.633 ± (0.037),0.629 ± (0.039)
BPI_Challenge_2013_closed_problems,0.221 ± (0.042),0.215 ± (0.05),0.185 ± (0.035),0.177 ± (0.036),0.177 ± (0.036),0.162 ± (0.053),0.162 ± (0.053),0.166 ± (0.05),0.245 ± (0.064),0.281 ± (0.079),0.245 ± (0.047),0.256 ± (0.043),0.264 ± (0.051),0.213 ± (0.051),0.234 ± (0.029)


Duration: 0:04:22.680053
2024-02-20 09:02:08.551221
-------------------------------------------------bpi_challenge_2013_incidents-------------------------------------------------
longeur dataset:7543


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.45 ± (0.068),0.339 ± (0.035),0.525 ± (0.037),0.488 ± (0.043),0.485 ± (0.051),0.492 ± (0.056),0.466 ± (0.041),0.465 ± (0.031),0.468 ± (0.04),0.472 ± (0.043),0.465 ± (0.031),0.449 ± (0.034),0.451 ± (0.035),0.471 ± (0.037),0.469 ± (0.036)
BPI_Challenge_2012_A,0.119 ± (0.002),0.366 ± (0.135),0.421 ± (0.061),0.437 ± (0.062),0.414 ± (0.083),0.342 ± (0.021),0.362 ± (0.042),0.35 ± (0.025),0.343 ± (0.044),0.33 ± (0.023),0.312 ± (0.014),0.331 ± (0.021),0.306 ± (0.009),0.341 ± (0.028),0.305 ± (0.009)
BPI_Challenge_2012_O,0.735 ± (0.045),0.713 ± (0.018),0.467 ± (0.029),0.461 ± (0.022),0.463 ± (0.022),0.588 ± (0.03),0.574 ± (0.035),0.565 ± (0.035),0.599 ± (0.036),0.599 ± (0.039),0.598 ± (0.037),0.594 ± (0.037),0.59 ± (0.037),0.587 ± (0.04),0.586 ± (0.04)
BPI_Challenge_2013_closed_problems,0.161 ± (0.029),0.165 ± (0.036),0.143 ± (0.026),0.138 ± (0.026),0.138 ± (0.026),0.126 ± (0.038),0.126 ± (0.038),0.139 ± (0.036),0.226 ± (0.075),0.27 ± (0.084),0.232 ± (0.045),0.251 ± (0.049),0.257 ± (0.053),0.206 ± (0.057),0.227 ± (0.035)
bpi_challenge_2013_incidents,0.16 ± (0.024),0.181 ± (0.023),0.199 ± (0.026),0.214 ± (0.032),0.199 ± (0.021),0.226 ± (0.024),0.24 ± (0.019),0.27 ± (0.034),0.277 ± (0.031),0.188 ± (0.07),0.149 ± (0.016),0.15 ± (0.017),0.152 ± (0.016),0.15 ± (0.018),0.147 ± (0.017)


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.519 ± (0.047),0.536 ± (0.045),0.549 ± (0.033),0.545 ± (0.047),0.554 ± (0.034),0.556 ± (0.034),0.551 ± (0.02),0.551 ± (0.028),0.567 ± (0.031),0.545 ± (0.022),0.538 ± (0.024),0.547 ± (0.018),0.547 ± (0.018),0.54 ± (0.019),0.536 ± (0.02)
BPI_Challenge_2012_A,0.506 ± (0.013),0.491 ± (0.017),0.491 ± (0.036),0.488 ± (0.038),0.517 ± (0.045),0.541 ± (0.009),0.511 ± (0.038),0.496 ± (0.032),0.472 ± (0.032),0.461 ± (0.023),0.466 ± (0.01),0.45 ± (0.027),0.465 ± (0.011),0.441 ± (0.036),0.463 ± (0.01)
BPI_Challenge_2012_O,0.78 ± (0.027),0.721 ± (0.016),0.673 ± (0.036),0.603 ± (0.033),0.603 ± (0.033),0.737 ± (0.032),0.695 ± (0.041),0.655 ± (0.038),0.733 ± (0.029),0.725 ± (0.03),0.721 ± (0.033),0.709 ± (0.036),0.699 ± (0.033),0.689 ± (0.036),0.679 ± (0.041)
BPI_Challenge_2013_closed_problems,0.355 ± (0.093),0.315 ± (0.094),0.264 ± (0.064),0.254 ± (0.069),0.254 ± (0.069),0.234 ± (0.092),0.234 ± (0.092),0.214 ± (0.087),0.273 ± (0.05),0.293 ± (0.072),0.263 ± (0.058),0.263 ± (0.037),0.273 ± (0.05),0.223 ± (0.042),0.243 ± (0.022)
bpi_challenge_2013_incidents,0.164 ± (0.026),0.187 ± (0.025),0.21 ± (0.03),0.219 ± (0.033),0.215 ± (0.019),0.229 ± (0.027),0.245 ± (0.023),0.271 ± (0.034),0.281 ± (0.033),0.44 ± (0.067),0.446 ± (0.048),0.44 ± (0.057),0.418 ± (0.037),0.399 ± (0.05),0.386 ± (0.046)


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Helpdesk,0.48 ± (0.051),0.415 ± (0.039),0.537 ± (0.035),0.513 ± (0.035),0.515 ± (0.035),0.52 ± (0.04),0.504 ± (0.023),0.503 ± (0.014),0.511 ± (0.026),0.504 ± (0.027),0.498 ± (0.018),0.493 ± (0.023),0.494 ± (0.025),0.502 ± (0.021),0.499 ± (0.021)
BPI_Challenge_2012_A,0.193 ± (0.002),0.406 ± (0.1),0.449 ± (0.032),0.456 ± (0.03),0.451 ± (0.042),0.419 ± (0.018),0.421 ± (0.019),0.409 ± (0.011),0.395 ± (0.025),0.383 ± (0.009),0.373 ± (0.013),0.38 ± (0.011),0.369 ± (0.009),0.382 ± (0.009),0.368 ± (0.008)
BPI_Challenge_2012_O,0.757 ± (0.035),0.717 ± (0.017),0.551 ± (0.032),0.522 ± (0.024),0.523 ± (0.024),0.654 ± (0.03),0.628 ± (0.037),0.606 ± (0.034),0.659 ± (0.033),0.656 ± (0.034),0.653 ± (0.034),0.646 ± (0.034),0.64 ± (0.033),0.633 ± (0.037),0.629 ± (0.039)
BPI_Challenge_2013_closed_problems,0.221 ± (0.042),0.215 ± (0.05),0.185 ± (0.035),0.177 ± (0.036),0.177 ± (0.036),0.162 ± (0.053),0.162 ± (0.053),0.166 ± (0.05),0.245 ± (0.064),0.281 ± (0.079),0.245 ± (0.047),0.256 ± (0.043),0.264 ± (0.051),0.213 ± (0.051),0.234 ± (0.029)
bpi_challenge_2013_incidents,0.162 ± (0.025),0.184 ± (0.024),0.204 ± (0.028),0.216 ± (0.032),0.206 ± (0.017),0.228 ± (0.026),0.243 ± (0.021),0.27 ± (0.034),0.279 ± (0.032),0.25 ± (0.042),0.223 ± (0.024),0.223 ± (0.026),0.223 ± (0.023),0.218 ± (0.027),0.213 ± (0.025)


Duration: 0:56:24.429516
