In [1]:
# to transform XES to PKL
import os
import glob
import pandas as pd
# from dateutil.parser import parse
from tqdm import tqdm
import pm4py
from collections import Counter


folder_path = 'data'  # Replace with the path to your folder

# Use glob to get a list of all .xes files in the folder
xes_files = glob.glob(os.path.join(folder_path, '*.xes'))

# Print the list of .xes files
for xes_file in xes_files:
    file_name = os.path.basename(xes_file)
    print(file_name)
    

    
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

do_display=True

def build_entity_sequence(df_input,entity_colname,obs_colname,time_colname,method_time):
    """Crée un jeu de données de séquence, a la maille entité-> passage de la maille "obs/entité" a la maille "entité"
    Parameters
    ----------
    df : dataframe
        La dataframe utilisée
        
    Returns
    ----------
    df_seq : dataframe
        jeu de données de séquences à la maille entité
    """ 
    
    def create_obs_sequence_for_entity(df_indiv): # crée la séquence d'événements pour l'entité
        seq_entite = []
        for j in range(len(df_indiv)):
            seq_entite.append(df_indiv.loc[j, obs_colname])
        return seq_entite

    def create_interval_sequence_for_entity(df_indiv): # utilise les intervalles de temps dans la séquence timestamps
        interv_seq_entite = []
        for j in range(len(df_indiv)):
            if j==len(df_indiv)-1:
                interv_seq_entite.append(0.0)
            else:
                interv_seq_entite.append(df_indiv.loc[j+1, time_colname]-df_indiv.loc[j, time_colname])
        return interv_seq_entite
    
    def create_timestamp_sequence_for_entity(df_indiv): # utilise les timestamps dans la séquence timestamps
        interv_seq_entite = []
        for index, row in df_indiv.iterrows():
            interv_seq_entite.append(row["date"])
        return interv_seq_entite
    def create_empty_time_sequence_for_entity(df_indiv): # remplace la séquence de timestamps par des 0
        interv_seq_entite = []
        for index, row in df_indiv.iterrows():
            interv_seq_entite.append(0)
        return interv_seq_entite
    df=df_input.copy()
    min_date = df['date'].min()
    df['date']=(df.date-min_date).dt.total_seconds()/3600 # timestamp to number of seconds since first obs
    
    dicCol = {x:y for x,y in df.groupby(entity_colname)}
    
    # filter short sequences
    min_length = 1
    dicCol = {key: df for key, df in dicCol.items() if len(df) >= min_length} 
    # print(len(dicCol))
    
    list_seq = []
    columns = ['Entite', 'Obs_seq', 'Intervals_seq']
    
    for entite, df_entite in tqdm(dicCol.items()): 
        
        df_entite = df_entite.sort_values(by=['date'], ignore_index=True)
        
        seq_entite = create_obs_sequence_for_entity(df_entite)
        
        if method_time=="timestamp":
            seq_entite_time = create_timestamp_sequence_for_entity(df_entite)
            
        elif method_time=="interval":
            seq_entite_time = create_interval_sequence_for_entity(df_entite)
            
        elif method_time=="no_timestamp":
            seq_entite_time = create_empty_time_sequence_for_entity(df_entite)
            
        else:
            return None
        
        list_seq.append([entite, seq_entite, seq_entite_time])

    # Créez le DataFrame une fois que toutes les données sont prêtes
    df_seq_entite = pd.DataFrame(list_seq, columns=columns)
    return df_seq_entite

def get_variable_frequency_stats(data,col_name):
    min_date=data['date'].min()
    datax = data[col_name].value_counts().sort_index()
    datay = pd.DataFrame({
      'state': datax.index,
      'Frequency': datax.values,
      'Percent': ((datax.values/datax.values.sum())*100).round(2),
      'Cumulative Frequency': datax.values.cumsum(),
      'Cumulative Percent': ((datax.values.cumsum()/datax.values.sum())*100)\
    .round(2)
    })
    display(datay.sort_values(by=['Frequency']))
    return None



do_display=True
list_dataset = [
    
    "env_permit",
    "Helpdesk",
    "nasa",
    "SEPSIS",
    "BPI_Challenge_2012",
    "BPI_Challenge_2012_A",
    "BPI_Challenge_2012_Complete",
    "BPI_Challenge_2012_O",
    "BPI_Challenge_2012_W",
    "BPI_Challenge_2012_W_Complete",
    "BPI_Challenge_2013_closed_problems",
    "bpi_challenge_2013_incidents",
    "BPI Challenge 2017",
    "BPI_Challenge_2019",

    # "BPIC15_1",
#     "BPIC15_2",
#     "BPIC15_3",
#     "BPIC15_4",
#     "BPIC15_5",
#     "Hospital_log"
]
list_dataset = ["DATA_MCF_1an_prest"]

for file_name in list_dataset: 
    print("======================================")
    print("-------- Dataset: "+file_name)
    print("======================================")
    data = pm4py.read_xes('data/%s.xes'%(file_name))
    data = pm4py.convert_to_dataframe(data)[["case:concept:name","time:timestamp","concept:name"]]
    data.rename(columns={'time:timestamp':'date'}, inplace=True)
    data.rename(columns={'case:concept:name':'entity'}, inplace=True)
    data.rename(columns={'concept:name':'type'}, inplace=True)
    
    if do_display: display(data.head(10))
    data=data[data['entity'].notnull()]
    data["date"] = pd.to_datetime(data["date"])
    data['entity'] = pd.factorize(data['entity'])[0]
    data['type'] = pd.factorize(data['type'])[0]
    
    if do_display: print('df_entity_seq_interval')
    df_entity_seq_interval     = build_entity_sequence(data,'entity',"type","date",method_time="interval") # appel de la fonction ci-dessus avec le param_tre "intervalles"
    if do_display: display(df_entity_seq_interval.head(10))
    df_entity_seq_interval.to_pickle("data/%s.pkl"%(file_name))
    
 

  from .autonotebook import tqdm as notebook_tqdm


BPI Challenge 2017.xes
BPIC15_1.xes
BPIC15_2.xes
BPIC15_3.xes
BPIC15_4.xes
BPIC15_5.xes
BPI_Challenge_2012.xes
BPI_Challenge_2012_A.xes
BPI_Challenge_2012_Complete.xes
BPI_Challenge_2012_O.xes
BPI_Challenge_2012_W.xes
BPI_Challenge_2012_W_Complete.xes
BPI_Challenge_2013_closed_problems.xes
bpi_challenge_2013_incidents.xes
BPI_Challenge_2019.xes
env_permit.xes
Helpdesk.xes
Hospital_log.xes
nasa.xes
SEPSIS.xes
-------- Dataset: env_permit


parsing log, completed traces :: 100%|██████████| 1434/1434 [00:00<00:00, 5855.57it/s]


Unnamed: 0,entity,date,type
0,case-891,2010-10-02 07:20:39.266000+00:00,Confirmation of receipt
1,case-891,2010-10-02 07:21:26.588000+00:00,T02 Check confirmation of receipt
2,case-891,2010-10-02 07:31:12.836000+00:00,T03 Adjust confirmation of receipt
3,case-891,2010-10-02 07:31:40.160000+00:00,T02 Check confirmation of receipt
4,case-891,2010-10-02 07:32:01.401000+00:00,T03 Adjust confirmation of receipt
5,case-891,2010-11-09 13:13:52.563000+00:00,T02 Check confirmation of receipt
6,case-891,2010-11-09 13:14:10.016000+00:00,T04 Determine confirmation of receipt
7,case-891,2010-11-09 13:14:22.628000+00:00,T05 Print and send confirmation of receipt
8,case-891,2010-11-09 13:14:37.300000+00:00,T06 Determine necessity of stop advice
9,case-891,2010-11-09 13:17:36.097000+00:00,T07-1 Draft intern advice aspect 1


df_entity_seq_interval


100%|██████████| 1434/1434 [00:00<00:00, 5603.81it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 1, 2, 1, 2, 1, 3, 4, 5, 6, 5, 6, 5, 7, 8, ...","[0.013145, 0.1628466666666667, 0.0075899999999..."
1,1,"[0, 5, 1, 6, 5, 7, 3, 4]","[0.16701000000000477, 0.8109255555555563, 0.01..."
2,2,"[0, 5, 7, 1, 2, 1, 3, 4]","[0.09359833333333256, 0.013185277777779447, 1...."
3,3,"[0, 5, 1, 2, 1, 12, 5, 7, 2, 1, 2, 1, 3, 4]","[1.3474713888888914, 0.6659052777777674, 1.036..."
4,4,"[0, 1, 3, 4, 5, 7]","[0.014143055555564388, 0.01925055555562949, 0...."
5,5,"[0, 1, 3, 4, 5, 7]","[0.006307777777749379, 0.007517500000005839, 0..."
6,6,"[0, 5, 1, 3, 7, 4]","[0.11074361111116104, 0.004053055555573337, 0...."
7,7,"[0, 5, 12, 6, 13, 14, 15, 16, 17, 1, 3, 4, 5, 7]","[0.2603313888888579, 17.419098055555537, 0.010..."
8,8,"[0, 1, 3, 4, 5, 7]","[6.560104722222263, 43.887286111111166, 2.0038..."
9,9,"[0, 1, 3, 4, 5, 7]","[6.101457500000038, 43.8922766666667, 1.995038..."


-------- Dataset: Helpdesk


parsing log, completed traces :: 100%|██████████| 4580/4580 [00:00<00:00, 8277.85it/s]


Unnamed: 0,entity,date,type
0,Case3608,2010-01-13 06:40:25+00:00,Assign seriousness
1,Case3608,2010-01-29 06:52:27+00:00,Take in charge ticket
2,Case3608,2010-01-29 06:52:34+00:00,Resolve ticket
3,Case3608,2010-02-13 06:52:48+00:00,Closed
4,Case3608,2010-02-13 06:52:48+00:00,Closed
5,Case2748,2010-01-13 10:26:04+00:00,Assign seriousness
6,Case2748,2010-01-19 07:26:05+00:00,Take in charge ticket
7,Case2748,2010-01-19 07:28:50+00:00,Resolve ticket
8,Case2748,2010-02-13 10:00:28+00:00,Closed
9,Case4284,2010-01-13 10:30:37+00:00,Assign seriousness


df_entity_seq_interval


100%|██████████| 4580/4580 [00:00<00:00, 5731.19it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 1, 2, 3, 3]","[384.2005555555556, 0.0019444444444047804, 360..."
1,1,"[0, 1, 2, 3]","[141.0002777777778, 0.04583333333332007, 602.5..."
2,2,"[0, 0, 1, 2, 1, 2, 3, 3]","[337.00944444444445, 0.0019444444444616238, 0...."
3,3,"[0, 1, 2, 3, 3]","[308.68805555555554, 0.0019444444444616238, 43..."
4,4,"[0, 0, 1, 2, 3]","[0.0036111111111107874, 18.28138888888889, 0.0..."
5,5,"[0, 0, 1, 2, 3]","[0.0038888888888877204, 144.07166666666666, 0...."
6,6,"[0, 1, 2, 3]","[629.1391666666666, 186.72833333333335, 0.0016..."
7,7,"[0, 0, 1, 2, 3]","[0.005277777777777715, 433.12611111111113, 263..."
8,8,"[0, 1, 2, 3]","[399.0886111111111, 0.23527777777781012, 360.0..."
9,9,"[0, 1, 2, 3]","[0.00611111111111029, 0.03249999999999886, 105..."


-------- Dataset: nasa


parsing log, completed traces :: 100%|██████████| 2566/2566 [00:04<00:00, 609.73it/s]


Unnamed: 0,entity,date,type
0,cev.TestCEV.test_1(),2017-02-13 14:50:51.610000+00:00,cev.CEV()
1,cev.TestCEV.test_1(),2017-02-13 14:50:51.613000+00:00,cev.ErrorLog()
2,cev.TestCEV.test_1(),2017-02-13 14:50:51.613000+00:00,cev.ErrorLog()
3,cev.TestCEV.test_1(),2017-02-13 14:50:51.617000+00:00,cev.Failures(cev.ErrorLog)
4,cev.TestCEV.test_1(),2017-02-13 14:50:51.621000+00:00,"cev.Failures$Type(java.lang.String,int)"
5,cev.TestCEV.test_1(),2017-02-13 14:50:51.622000+00:00,"cev.Failures$Type(java.lang.String,int)"
6,cev.TestCEV.test_1(),2017-02-13 14:50:51.622000+00:00,"cev.Failures$Type(java.lang.String,int)"
7,cev.TestCEV.test_1(),2017-02-13 14:50:51.622000+00:00,"cev.Failures$Type(java.lang.String,int)"
8,cev.TestCEV.test_1(),2017-02-13 14:50:51.622000+00:00,"cev.Failures$Type(java.lang.String,int)"
9,cev.TestCEV.test_1(),2017-02-13 14:50:51.622000+00:00,"cev.Failures$Type(java.lang.String,int)"


df_entity_seq_interval


100%|██████████| 2566/2566 [00:01<00:00, 1892.62it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 1, 1, 2, 3, 4, 3, 3, 3, 4, 3, 3, 3, 3, 2, ...","[8.333333333333333e-07, 0.0, 1.111111111111111..."
1,1,"[0, 1, 1, 2, 2, 14, 10, 13, 13, 12, 12, 11, 11...","[0.0, 0.0, 0.0, 0.0, 2.7777777777777813e-07, 0..."
2,2,"[0, 1, 1, 2, 2, 5, 17, 18, 18, 17, 10, 13, 13,...","[0.0, 0.0, 0.0, 0.0, 0.0, 2.7777777777777813e-..."
3,3,"[0, 1, 1, 2, 2, 11, 11, 10, 5, 5, 0, 12, 12, 1...","[0.0, 0.0, 0.0, 0.0, 2.7777777777777813e-07, 0..."
4,4,"[0, 1, 1, 2, 2, 5, 5, 0, 10, 11, 13, 12, 11, 1...","[0.0, 0.0, 8.33333333333331e-07, 2.77777777777..."
5,5,"[0, 1, 1, 2, 2, 12, 12, 11, 10, 10, 13, 13, 11...","[0.0, 0.0, 0.0, 0.0, 2.7777777777777813e-07, 0..."
6,6,"[0, 12, 11, 11, 0, 5, 10, 2, 2, 1, 1, 5, 12, 1...","[2.7777777777777135e-07, 0.0, 0.0, 0.0, 0.0, 0..."
7,7,"[0, 1, 1, 2, 2, 5, 5, 0, 10, 11, 11, 12, 12, 1...","[0.0, 0.0, 2.7777777777777813e-07, 0.0, 0.0, 0..."
8,8,"[0, 1, 1, 2, 2, 5, 5, 0, 10, 11, 10, 13, 13, 1...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.777..."
9,9,"[0, 1, 1, 2, 2, 5, 5, 0, 14, 10, 13, 13, 12, 1...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.77777777..."


-------- Dataset: SEPSIS


parsing log, completed traces :: 100%|██████████| 1050/1050 [00:00<00:00, 2453.00it/s]


Unnamed: 0,entity,date,type
0,A,2014-10-22 09:15:41+00:00,ER Registration
1,A,2014-10-22 09:27:00+00:00,Leucocytes
2,A,2014-10-22 09:27:00+00:00,CRP
3,A,2014-10-22 09:27:00+00:00,LacticAcid
4,A,2014-10-22 09:33:37+00:00,ER Triage
5,A,2014-10-22 09:34:00+00:00,ER Sepsis Triage
6,A,2014-10-22 12:03:47+00:00,IV Liquid
7,A,2014-10-22 12:03:47+00:00,IV Antibiotics
8,A,2014-10-22 12:13:19+00:00,Admission NC
9,A,2014-10-24 07:00:00+00:00,CRP


df_entity_seq_interval


100%|██████████| 1050/1050 [00:00<00:00, 2152.00it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 1, 2, 1, 2, ...","[0.18861111111073114, 0.0, 0.0, 0.110277777777..."
1,1,"[0, 4, 2, 3, 1, 5, 6, 7, 8, 2, 2, 9]","[0.21527777777737356, 0.31138888888926886, 0.0..."
2,2,"[0, 4, 5, 1, 2, 6, 7, 8, 8, 1, 2, 1, 2, 9]","[0.5983333333333576, 0.002777777777737356, 0.0..."
3,3,"[0, 4, 5, 2, 3, 1, 6, 7, 8, 1, 2, 9, 10]","[0.23999999999978172, 0.1750000000001819, 0.55..."
4,4,"[0, 4, 5, 6, 2, 1, 3, 7]","[0.018055555556202307, 0.020000000000436557, 0..."
5,5,"[0, 4, 5, 1, 2, 3, 7, 6, 8, 9]","[0.014444444444052351, 0.005277777778246673, 0..."
6,6,"[0, 4, 5, 6, 7, 3, 2, 1, 8, 1, 2, 9]","[0.19777777777744632, 0.014166666666824312, 0...."
7,7,"[0, 4, 5, 2, 3, 1, 11, 8, 2, 1, 2, 8, 12]","[0.017777777778064774, 0.005555555555474712, 0..."
8,8,"[0, 4, 5, 7, 1, 2, 3, 8, 1, 2, 9]","[0.22361111111111143, 0.0036111111111125638, 0..."
9,9,"[0, 4, 5, 2, 1, 3, 6, 7, 8, 2, 9]","[0.02583333333336668, 0.14250000000015461, 0.0..."


-------- Dataset: BPI_Challenge_2012


parsing log, completed traces :: 100%|██████████| 13087/13087 [00:05<00:00, 2210.51it/s]


Unnamed: 0,entity,date,type
0,173688,2011-09-30 22:38:44.546000+00:00,A_SUBMITTED
1,173688,2011-09-30 22:38:44.880000+00:00,A_PARTLYSUBMITTED
2,173688,2011-09-30 22:39:37.906000+00:00,A_PREACCEPTED
3,173688,2011-09-30 22:39:38.875000+00:00,W_Completeren aanvraag
4,173688,2011-10-01 09:36:46.437000+00:00,W_Completeren aanvraag
5,173688,2011-10-01 09:42:43.308000+00:00,A_ACCEPTED
6,173688,2011-10-01 09:45:09.243000+00:00,O_SELECTED
7,173688,2011-10-01 09:45:09.243000+00:00,A_FINALIZED
8,173688,2011-10-01 09:45:11.197000+00:00,O_CREATED
9,173688,2011-10-01 09:45:11.380000+00:00,O_SENT


df_entity_seq_interval


100%|██████████| 13087/13087 [00:05<00:00, 2222.80it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 1, 2, 3, 3, 4, 5, 6, 7, 8, 9, 3, 9, 9, 9, ...","[9.277777777777778e-05, 0.014729444444444444, ..."
1,1,"[0, 1, 2, 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 3, 9, ...","[0.0010941666666672845, 0.015125833333333283, ..."
2,2,"[0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 6, 5, 7, ...","[8.44444444432213e-05, 0.011787500000000506, 0..."
3,3,"[0, 1, 18]","[4.694444444552914e-05, 0.010384722222221754, ..."
4,4,"[0, 1, 18]","[5.777777777726811e-05, 0.011370833333334218, ..."
5,5,"[0, 1, 2, 3, 3, 3, 3, 19, 3]","[4.861111110976424e-05, 0.014508333333333567, ..."
6,6,"[0, 1, 20, 20, 2, 3, 20, 3, 3, 3, 3, 3, 18, 3]","[2.472222222316134e-05, 0.01146111111110848, 0..."
7,7,"[0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 19]","[5.222222222300843e-05, 0.012182222222222094, ..."
8,8,"[0, 1, 20, 20, 2, 3, 20, 3, 3, 3, 3, 3, 19, 3]","[4.0277777777930623e-05, 0.0037133333333336793..."
9,9,"[0, 1, 2, 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 3, 9, ...","[7.999999999874774e-05, 0.008865277777777791, ..."


-------- Dataset: BPI_Challenge_2012_A


parsing log, completed traces :: 100%|██████████| 13087/13087 [00:01<00:00, 7382.33it/s]


Unnamed: 0,entity,date,type
0,173688,2011-09-30 22:38:44.546000+00:00,A_SUBMITTED
1,173688,2011-09-30 22:38:44.880000+00:00,A_PARTLYSUBMITTED
2,173688,2011-09-30 22:39:37.906000+00:00,A_PREACCEPTED
3,173688,2011-10-01 09:42:43.308000+00:00,A_ACCEPTED
4,173688,2011-10-01 09:45:09.243000+00:00,A_FINALIZED
5,173688,2011-10-13 08:37:29.226000+00:00,A_REGISTERED
6,173688,2011-10-13 08:37:29.226000+00:00,A_APPROVED
7,173688,2011-10-13 08:37:29.226000+00:00,A_ACTIVATED
8,173691,2011-10-01 06:08:58.256000+00:00,A_SUBMITTED
9,173691,2011-10-01 06:09:02.195000+00:00,A_PARTLYSUBMITTED


df_entity_seq_interval


100%|██████████| 13087/13087 [00:02<00:00, 4698.26it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 1, 2, 3, 4, 5, 6, 7]","[9.277777777777778e-05, 0.014729444444444444, ..."
1,1,"[0, 1, 2, 3, 4, 6, 5, 7]","[0.0010941666666672845, 0.015125833333333283, ..."
2,2,"[0, 1, 2, 3, 4, 6, 5, 7]","[8.44444444432213e-05, 0.011787500000000506, 5..."
3,3,"[0, 1, 8]","[4.694444444552914e-05, 0.010384722222221754, ..."
4,4,"[0, 1, 8]","[5.777777777726811e-05, 0.011370833333334218, ..."
5,5,"[0, 1, 2, 9]","[4.861111110976424e-05, 0.014508333333333567, ..."
6,6,"[0, 1, 2, 8]","[2.472222222316134e-05, 0.519886944444444, 4.7..."
7,7,"[0, 1, 2, 9]","[5.222222222300843e-05, 0.012182222222222094, ..."
8,8,"[0, 1, 2, 9]","[4.0277777777930623e-05, 0.1983497222222219, 5..."
9,9,"[0, 1, 2, 3, 4, 8]","[7.999999999874774e-05, 0.008865277777777791, ..."


-------- Dataset: BPI_Challenge_2012_Complete


parsing log, completed traces :: 100%|██████████| 13087/13087 [00:04<00:00, 2630.01it/s]


Unnamed: 0,entity,date,type
0,173688,2011-09-30 22:38:44.546000+00:00,A_SUBMITTED
1,173688,2011-09-30 22:38:44.880000+00:00,A_PARTLYSUBMITTED
2,173688,2011-09-30 22:39:37.906000+00:00,A_PREACCEPTED
3,173688,2011-10-01 09:42:43.308000+00:00,A_ACCEPTED
4,173688,2011-10-01 09:45:09.243000+00:00,O_SELECTED
5,173688,2011-10-01 09:45:09.243000+00:00,A_FINALIZED
6,173688,2011-10-01 09:45:11.197000+00:00,O_CREATED
7,173688,2011-10-01 09:45:11.380000+00:00,O_SENT
8,173688,2011-10-01 09:45:13.917000+00:00,W_Completeren aanvraag
9,173688,2011-10-01 10:17:08.924000+00:00,W_Nabellen offertes


df_entity_seq_interval


100%|██████████| 13087/13087 [00:04<00:00, 3173.34it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 9, 11, 1...","[9.277777777777778e-05, 0.014729444444444444, ..."
1,1,"[0, 1, 2, 8, 3, 5, 4, 6, 7, 8, 9, 16, 4, 6, 7,...","[0.0010941666666672845, 0.015125833333333283, ..."
2,2,"[0, 1, 2, 8, 8, 8, 3, 4, 5, 6, 7, 8, 4, 16, 6,...","[8.44444444432213e-05, 0.011787500000000506, 3..."
3,3,"[0, 1, 17]","[4.694444444552914e-05, 0.010384722222221754, ..."
4,4,"[0, 1, 17]","[5.777777777726811e-05, 0.011370833333334218, ..."
5,5,"[0, 1, 2, 8, 18, 8]","[4.861111110976424e-05, 0.014508333333333567, ..."
6,6,"[0, 1, 2, 19, 8, 8, 17, 8]","[2.472222222316134e-05, 0.519886944444444, 0.0..."
7,7,"[0, 1, 2, 8, 8, 8, 8, 18]","[5.222222222300843e-05, 0.012182222222222094, ..."
8,8,"[0, 1, 2, 19, 8, 8, 18, 8]","[4.0277777777930623e-05, 0.1983497222222219, 0..."
9,9,"[0, 1, 2, 8, 3, 4, 5, 6, 7, 8, 9, 10, 9, 17, 2...","[7.999999999874774e-05, 0.008865277777777791, ..."


-------- Dataset: BPI_Challenge_2012_O


parsing log, completed traces :: 100%|██████████| 5015/5015 [00:00<00:00, 8816.91it/s] 


Unnamed: 0,entity,date,type
0,173718,2011-10-01 08:44:40.725000+00:00,O_SELECTED
1,173718,2011-10-01 08:44:42.707000+00:00,O_CREATED
2,173718,2011-10-01 08:44:42.734000+00:00,O_SENT
3,173718,2011-10-01 08:46:52.915000+00:00,O_SELECTED
4,173718,2011-10-01 08:46:52.915000+00:00,O_CANCELLED
5,173718,2011-10-01 08:46:54.210000+00:00,O_CREATED
6,173718,2011-10-01 08:46:54.233000+00:00,O_SENT
7,173718,2011-10-10 08:55:04.984000+00:00,O_SENT_BACK
8,173718,2011-10-27 07:17:53.327000+00:00,O_ACCEPTED
9,173721,2011-10-01 09:20:47.605000+00:00,O_SELECTED


df_entity_seq_interval


100%|██████████| 5015/5015 [00:01<00:00, 4147.97it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 1, 2, 0, 3, 1, 2, 4, 5]","[0.0005505555555555555, 7.50000000000002e-06, ..."
1,1,"[0, 1, 2, 4, 6]","[0.0003947222222221436, 8.888888888947122e-06,..."
2,2,"[0, 1, 2, 4, 5]","[0.000542777777778003, 5.0833333333333286e-05,..."
3,3,"[0, 1, 2, 4, 6]","[0.00034000000000000696, 7.2222222222695365e-0..."
4,4,"[0, 1, 2, 0, 3, 1, 2, 4, 5]","[0.00040888888888934716, 2.63888888887287e-05,..."
5,5,"[0, 1, 2, 4, 5]","[0.00048111111111115434, 7.22222222293567e-06,..."
6,6,"[0, 1, 2, 4, 5]","[0.00032111111111099433, 8.333333333609971e-06..."
7,7,"[0, 1, 2, 3]","[0.0008283333333336529, 6.1111111104850124e-06..."
8,8,"[0, 1, 2, 4, 5]","[0.0002575000000009098, 7.499999999716067e-06,..."
9,9,"[0, 1, 2, 0, 3, 1, 2, 4, 0, 3, 1, 2, 4, 5]","[0.0003852777777773042, 1.111111111207208e-05,..."


-------- Dataset: BPI_Challenge_2012_W


parsing log, completed traces :: 100%|██████████| 9658/9658 [00:05<00:00, 1798.95it/s]


Unnamed: 0,entity,date,type
0,173688,2011-09-30 22:39:38.875000+00:00,W_Completeren aanvraag
1,173688,2011-10-01 09:36:46.437000+00:00,W_Completeren aanvraag
2,173688,2011-10-01 09:45:11.554000+00:00,W_Nabellen offertes
3,173688,2011-10-01 09:45:13.917000+00:00,W_Completeren aanvraag
4,173688,2011-10-01 10:15:41.290000+00:00,W_Nabellen offertes
5,173688,2011-10-01 10:17:08.924000+00:00,W_Nabellen offertes
6,173688,2011-10-08 14:26:57.720000+00:00,W_Nabellen offertes
7,173688,2011-10-08 14:32:00.886000+00:00,W_Nabellen offertes
8,173688,2011-10-10 09:32:22.495000+00:00,W_Nabellen offertes
9,173688,2011-10-10 09:33:04.560000+00:00,W_Valideren aanvraag


df_entity_seq_interval


100%|██████████| 9658/9658 [00:04<00:00, 2160.95it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 0, 1, 0, 1, 1, 1, 1, 1, 2, 1, 2, 2]","[10.952100555555555, 0.14031027777777716, 0.00..."
1,1,"[0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, ...","[3.4591152777777765, 0.0946625000000001, 2.745..."
2,2,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, ...","[3.3366419444444437, 0.0762044444444463, 47.60..."
3,3,"[4, 4, 0, 4, 0, 0, 0, 0, 0, 0]","[0.4903499999999994, 0.018085555555556354, 0.0..."
4,4,"[0, 0, 0, 0, 0]","[1.8577983333333332, 0.043720277777778094, 1.0..."
5,5,"[0, 0, 0, 0, 0, 0, 0, 0]","[0.4707777777777782, 0.00712500000000027, 1.22..."
6,6,"[4, 4, 0, 4, 0, 0, 0, 0, 0, 0]","[0.1646261111111098, 0.030022222222223505, 0.0..."
7,7,"[0, 0, 0, 0, 1, 0, 1, 1, 1, 2, 1, 2, 2]","[1.7380852777777775, 0.03504305555555476, 0.54..."
8,8,"[0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.007514166666666711, 0.09843250000000126, 0...."
9,9,"[0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 5, ...","[0.014198055555556977, 0.1945952777777773, 0.0..."


-------- Dataset: BPI_Challenge_2012_W_Complete


parsing log, completed traces :: 100%|██████████| 9658/9658 [00:02<00:00, 3725.93it/s]


Unnamed: 0,entity,date,type
0,173712,2011-10-01 08:10:25.759000+00:00,W_Afhandelen leads
1,173712,2011-10-01 11:03:35.216000+00:00,W_Completeren aanvraag
2,173712,2011-10-03 09:17:29.105000+00:00,W_Completeren aanvraag
3,173712,2011-10-03 12:42:55.584000+00:00,W_Completeren aanvraag
4,173706,2011-10-01 08:16:49.843000+00:00,W_Afhandelen leads
5,173706,2011-10-01 09:50:14.483000+00:00,W_Completeren aanvraag
6,173706,2011-10-01 09:53:55.769000+00:00,W_Completeren aanvraag
7,173706,2011-10-01 13:00:02.537000+00:00,W_Completeren aanvraag
8,173709,2011-10-01 08:27:07.853000+00:00,W_Completeren aanvraag
9,173709,2011-10-01 09:43:02.623000+00:00,W_Completeren aanvraag


df_entity_seq_interval


100%|██████████| 9658/9658 [00:02<00:00, 4141.85it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 1, 1, 1]","[2.885960277777778, 46.23163583333333, 3.42402..."
1,1,"[0, 1, 1, 1]","[1.5568444444444445, 0.06146833333333346, 3.10..."
2,2,"[1, 1, 1, 1]","[1.2652138888888889, 222.00296305555554, 520.5..."
3,3,"[1, 2, 2, 2, 2, 2, 2, 3, 4, 4, 4, 4, 4, 4, 4, ...","[0.004172222222222244, 0.03268249999999995, 0...."
4,4,"[1, 2, 2, 2, 2, 3, 4, 4, 4, 4]","[0.033923055555555415, 0.16719805555555567, 75..."
5,5,"[1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3]","[47.75632777777777, 2.1891377777777805, 0.1260..."
6,6,"[1, 1]","[1.3634488888888892, 0.0]"
7,7,"[1, 1, 2, 2, 2, 2, 3, 3, 3]","[2.8765591666666666, 0.010278611111111502, 50...."
8,8,"[1, 2, 2, 2, 3]","[0.5319463888888887, 172.24776722222222, 43.01..."
9,9,"[1, 1, 2, 2, 3]","[0.7382813888888893, 172.13144916666667, 44.26..."


-------- Dataset: BPI_Challenge_2013_closed_problems


parsing log, completed traces :: 100%|██████████| 1487/1487 [00:00<00:00, 6139.44it/s]


Unnamed: 0,entity,date,type
0,1-109135791,2006-01-11 14:49:42+00:00,Queued-Awaiting Assignment
1,1-109135791,2012-03-15 10:53:52+00:00,Accepted-In Progress
2,1-109135791,2012-03-15 10:56:17+00:00,Accepted-Assigned
3,1-109135791,2012-03-15 11:09:05+00:00,Accepted-In Progress
4,1-109135791,2012-03-15 11:11:33+00:00,Completed-Closed
5,1-147898401,2006-11-07 09:00:36+00:00,Accepted-In Progress
6,1-147898401,2006-11-07 12:05:44+00:00,Accepted-In Progress
7,1-147898401,2009-12-02 13:24:32+00:00,Accepted-Wait
8,1-147898401,2011-09-03 05:09:09+00:00,Accepted-In Progress
9,1-147898401,2012-01-20 09:23:24+00:00,Accepted-In Progress


df_entity_seq_interval


100%|██████████| 1487/1487 [00:00<00:00, 6301.83it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 1, 2, 1, 3]","[54116.069444444445, 0.040277777778101154, 0.2..."
1,1,"[1, 1, 4, 1, 1, 3]","[3.085555555555402, 26905.313333333335, 15351...."
2,2,"[1, 4, 1, 1, 3]","[23717.301666666666, 15351.772777777776, 3290...."
3,3,"[1, 4, 1, 1, 3]","[22487.044166666667, 15351.773888888885, 3290...."
4,4,"[1, 4, 0, 1, 2, 1, 3]","[20665.057500000003, 6379.808055555557, 9022.1..."
5,5,"[1, 1, 4, 1, 1, 1, 3]","[3645.762777777778, 10126.665833333333, 15321...."
6,6,"[1, 4, 0, 1, 2, 1, 3]","[13107.371111111112, 6379.703055555554, 8972.9..."
7,7,"[1, 1, 1, 0, 1, 0, 1, 3]","[28153.214444444442, 0.26138888888817746, 411...."
8,8,"[1, 4, 1, 1, 3]","[9219.391944444444, 17089.547777777778, 1554.3..."
9,9,"[1, 1, 3]","[24171.378888888892, 0.02555555555591127, 0.0]"


-------- Dataset: bpi_challenge_2013_incidents


parsing log, completed traces :: 100%|██████████| 7554/7554 [00:03<00:00, 2405.88it/s]


Unnamed: 0,entity,date,type
0,1-364285768,2010-03-31 14:59:42+00:00,Accepted
1,1-364285768,2010-03-31 15:00:56+00:00,Accepted
2,1-364285768,2010-03-31 15:45:48+00:00,Queued
3,1-364285768,2010-04-06 14:44:07+00:00,Accepted
4,1-364285768,2010-04-06 14:44:38+00:00,Queued
5,1-364285768,2010-04-06 14:44:47+00:00,Accepted
6,1-364285768,2010-04-06 14:44:51+00:00,Completed
7,1-364285768,2010-04-06 14:45:07+00:00,Queued
8,1-364285768,2010-04-08 10:52:23+00:00,Accepted
9,1-364285768,2010-04-08 10:53:35+00:00,Queued


df_entity_seq_interval


100%|██████████| 7554/7554 [00:02<00:00, 3760.31it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 0, 1, 0, 1, 0, 2, 1, 0, 1, 0, 0, 0, 0, 0, ...","[0.020555555555555556, 0.7477777777777778, 142..."
1,1,"[0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.10611111111120408, 0.005833333333612245, 1...."
2,2,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, ...","[0.0016666666670062114, 0.028611111110876664, ..."
3,3,"[0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, ...","[0.0019444444451437448, 0.05805555555525643, 2..."
4,4,"[0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, ...","[0.04305555555583851, 0.121111111110622, 0.014..."
5,5,"[0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, ...","[0.014166666667733807, 0.09888888888781366, 0...."
6,6,"[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, ...","[0.018333333333430346, 0.11694444444401597, 14..."
7,7,"[0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 2]","[0.002777777777737356, 0.0711111111122591, 0.9..."
8,8,"[0, 0, 0, 0, 0, 0, 2, 2]","[0.012222222221680568, 0.2125000000014552, 3.3..."
9,9,"[0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, ...","[0.00583333333270275, 0.16138888888963265, 92...."


-------- Dataset: BPI Challenge 2017


parsing log, completed traces :: 100%|██████████| 31509/31509 [00:40<00:00, 786.59it/s] 


Unnamed: 0,entity,date,type
0,Application_652823628,2016-01-01 09:51:15.304000+00:00,A_Create Application
1,Application_652823628,2016-01-01 09:51:15.352000+00:00,A_Submitted
2,Application_652823628,2016-01-01 09:51:15.774000+00:00,W_Handle leads
3,Application_652823628,2016-01-01 09:52:36.392000+00:00,W_Handle leads
4,Application_652823628,2016-01-01 09:52:36.403000+00:00,W_Complete application
5,Application_652823628,2016-01-01 09:52:36.413000+00:00,A_Concept
6,Application_652823628,2016-01-02 10:45:22.429000+00:00,W_Complete application
7,Application_652823628,2016-01-02 10:49:28.816000+00:00,W_Complete application
8,Application_652823628,2016-01-02 11:23:04.299000+00:00,A_Accepted
9,Application_652823628,2016-01-02 11:29:03.994000+00:00,O_Create Offer


df_entity_seq_interval


100%|██████████| 31509/31509 [00:23<00:00, 1347.87it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 1, 2, 2, 3, 4, 3, 3, 5, 6, 7, 8, 3, 9, 9, ...","[1.3333333333333333e-05, 0.0001172222222222222..."
1,1,"[0, 1, 2, 2, 3, 4, 3, 3, 3, 5, 6, 7, 8, 3, 9, ...","[1.3611111111089258e-05, 5.3055555555570066e-0..."
2,2,"[0, 1, 2, 2, 3, 4, 3, 5, 6, 7, 8, 3, 9, 9, 10,...","[1.6111111111216658e-05, 0.0001886111111111255..."
3,3,"[0, 1, 2, 2, 2, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, ...","[1.0833333333515327e-05, 0.0001027777777777849..."
4,4,"[0, 1, 2, 2, 3, 4, 3, 5, 6, 7, 8, 3, 9, 9, 10,...","[1.0555555555402663e-05, 9.41666666669505e-05,..."
5,5,"[0, 1, 2, 2, 3, 4, 3, 3, 3, 5, 6, 7, 8, 3, 9, ...","[9.722222221952848e-06, 5.1944444444451676e-05..."
6,6,"[0, 1, 2, 2, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 5, ...","[1.0555555555846752e-05, 5.0277777777996135e-0..."
7,7,"[0, 1, 2, 2, 3, 4, 3, 3, 3, 5, 6, 7, 8, 3, 9, ...","[1.1388888888852478e-05, 5.5833333333588087e-0..."
8,8,"[0, 1, 2, 2, 3, 4, 3, 5, 6, 7, 8, 3, 9, 9, 10,...","[1.4722222221763559e-05, 7.055555555535165e-05..."
9,9,"[0, 1, 2, 2, 3, 4, 3, 3, 5, 6, 7, 8, 3, 9, 9, ...","[1.3055555554863929e-05, 9.916666666676122e-05..."


-------- Dataset: BPI_Challenge_2019


parsing log, completed traces :: 100%|██████████| 251734/251734 [00:57<00:00, 4371.27it/s]


Unnamed: 0,entity,date,type
0,2000000000_00001,2018-01-02 12:53:00+00:00,SRM: Created
1,2000000000_00001,2018-01-02 13:53:00+00:00,SRM: Complete
2,2000000000_00001,2018-01-02 13:53:00+00:00,SRM: Awaiting Approval
3,2000000000_00001,2018-01-02 13:53:00+00:00,SRM: Document Completed
4,2000000000_00001,2018-01-02 13:53:00+00:00,SRM: In Transfer to Execution Syst.
5,2000000000_00001,2018-01-02 13:53:00+00:00,SRM: Ordered
6,2000000000_00001,2018-01-02 13:53:00+00:00,SRM: Change was Transmitted
7,2000000000_00001,2018-01-02 13:53:00+00:00,Create Purchase Order Item
8,2000000000_00001,2018-01-02 22:59:00+00:00,Vendor creates invoice
9,2000000000_00001,2018-03-06 06:44:00+00:00,Record Goods Receipt


df_entity_seq_interval


100%|██████████| 251734/251734 [00:56<00:00, 4458.90it/s]


Unnamed: 0,Entite,Obs_seq,Intervals_seq
0,0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.09999999..."
1,1,"[0, 1, 2, 3, 5, 4, 6, 7, 8, 9, 12, 10, 11, 4, 13]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 181.166666..."
2,2,"[0, 1, 2, 3, 5, 4, 6, 7, 12, 9, 8, 14, 9, 12, ...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 792.599999..."
3,3,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 15.6333333..."
4,4,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 15.6333333..."
5,5,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 15.6333333..."
6,6,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 15.6333333..."
7,7,"[0, 1, 2, 3, 4, 5, 7, 6, 8, 15, 9, 10, 11, 16,...","[1.0, 0.0, 0.0, 0.016666666604578495, 0.0, 0.0..."
8,8,"[0, 1, 2, 3, 4, 5, 7, 6]","[1.0, 0.0, 0.0, 0.016666666604578495, 0.0, 0.0..."
9,9,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 10, 11, 4, 17]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 11.4333333..."
