# Pre-Processing for logs that are not in .xes.gz format

## ACCRE-log

The log has been split into test and train. Each contains only completed traces. Hence, some traces that start in the train data and finish in the test data have been removed. Because we want to avoid detecting drifts due to these removed traces, we only include the train dataset.

In [92]:
import pandas as pd
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.exporter.xes import exporter as xes_exporter


log_path = 'data/real/ACCRE/AcademicCredentials_train.csv.gz' 

df = pd.read_csv(log_path, compression='gzip')
df


Unnamed: 0,activity,case_id,resource,end_time,start_time
0,Traer informacion estudiante - banner,100101,7071,2016-03-12T22:59:55.000,2016-03-12T22:59:55.000
1,Radicar Solicitud Homologacion,100101,7071,2016-03-12T23:00:02.000,2016-03-12T23:00:02.000
2,Validar solicitud,100101,22475,2016-03-12T23:19:56.000,2016-03-12T23:19:49.000
3,Homologacion por grupo de cursos,100101,22478,2016-03-14T13:15:32.000,2016-03-14T13:07:40.000
4,Revisar curso,100101,22475,2016-03-14T13:07:40.000,2016-03-14T13:07:40.000
...,...,...,...,...,...
1940,Cancelar curso,94453,943,2016-03-17T13:49:44.000,2016-03-17T13:49:44.000
1941,Visto Bueno Cierre Proceso,99651,3445,2016-03-16T23:13:48.000,2016-03-16T23:13:48.000
1942,Traer informacion estudiante - banner,99751,19582,2016-03-11T20:49:15.000,2016-03-11T20:49:15.000
1943,Radicar Solicitud Homologacion,99751,19582,2016-03-11T20:49:20.000,2016-03-11T20:49:18.000


In [93]:
# === Convert to PM4Py event log ===
# PM4Py expects: case:concept:name, concept:name, time:timestamp

df['case:concept:name'] = df['case_id']
df['concept:name'] = df['activity']
df['time:timestamp'] = df['start_time']
df['org:resource'] = df['resource']

# Convert timestamp column
df = dataframe_utils.convert_timestamp_columns_in_df(df)

# sort by time
df = df.sort_values(by="time:timestamp").reset_index(drop=True)
df

Unnamed: 0,activity,case_id,resource,end_time,start_time,case:concept:name,concept:name,time:timestamp,org:resource
0,Traer informacion estudiante - banner,81051,8997,2016-02-01 11:23:52+00:00,2016-02-01 11:23:52+00:00,81051,Traer informacion estudiante - banner,2016-02-01 11:23:52+00:00,8997
1,Radicar Solicitud Homologacion,81051,8997,2016-02-01 11:24:02+00:00,2016-02-01 11:24:02+00:00,81051,Radicar Solicitud Homologacion,2016-02-01 11:24:02+00:00,8997
2,Validar solicitud,81051,387,2016-02-01 11:32:06+00:00,2016-02-01 11:27:47+00:00,81051,Validar solicitud,2016-02-01 11:27:47+00:00,387
3,Traer informacion estudiante - banner,81102,6426,2016-02-01 16:53:46+00:00,2016-02-01 16:53:46+00:00,81102,Traer informacion estudiante - banner,2016-02-01 16:53:46+00:00,6426
4,Radicar Solicitud Homologacion,81102,6426,2016-02-01 17:09:56+00:00,2016-02-01 16:53:50+00:00,81102,Radicar Solicitud Homologacion,2016-02-01 16:53:50+00:00,6426
...,...,...,...,...,...,...,...,...,...
1940,Validar solicitud / pre-homologacion,109502,22475,2016-04-14 18:44:23+00:00,2016-04-14 18:44:23+00:00,109502,Validar solicitud / pre-homologacion,2016-04-14 18:44:23+00:00,22475
1941,Validar solicitud / pre-homologacion,109454,22475,2016-04-14 18:45:57+00:00,2016-04-14 18:45:57+00:00,109454,Validar solicitud / pre-homologacion,2016-04-14 18:45:57+00:00,22475
1942,Validar solicitud / pre-homologacion,109503,22475,2016-04-14 18:46:42+00:00,2016-04-14 18:46:42+00:00,109503,Validar solicitud / pre-homologacion,2016-04-14 18:46:42+00:00,22475
1943,Visto Bueno Cierre Proceso,108054,3445,2016-04-14 19:15:29+00:00,2016-04-14 19:00:38+00:00,108054,Visto Bueno Cierre Proceso,2016-04-14 19:00:38+00:00,3445


In [94]:
# === Convert to PM4Py event log ===
event_log = log_converter.apply(df, variant=log_converter.Variants.TO_EVENT_LOG)

# === Export to XES.GZ ===
output_path = "data/real/ACCRE/ACCRE.xes.gz"
xes_exporter.apply(event_log, output_path, parameters={"gzip": True})

exporting log, completed traces :: 100%|██████████| 398/398 [00:00<00:00, 562.44it/s]


## BPIC14

In [95]:
log_path = 'data/real/BPIC14/Detail_incident_Activity.csv.gz'

df = pd.read_csv(log_path, delimiter=';', compression='gzip')
df

Unnamed: 0,Incident ID,DateStamp,IncidentActivity_Number,IncidentActivity_Type,Assignment Group,KM number,Interaction ID
0,IM0000004,07-01-2013 08:17:17,001A3689763,Reassignment,TEAM0001,KM0000553,SD0000007
1,IM0000004,04-11-2013 13:41:30,001A5852941,Reassignment,TEAM0002,KM0000553,SD0000007
2,IM0000004,04-11-2013 13:41:30,001A5852943,Update from customer,TEAM0002,KM0000553,SD0000007
3,IM0000004,04-11-2013 12:09:37,001A5849980,Operator Update,TEAM0003,KM0000553,SD0000007
4,IM0000004,04-11-2013 12:09:37,001A5849979,Assignment,TEAM0003,KM0000553,SD0000007
...,...,...,...,...,...,...,...
466732,IM0047056,31-03-2014 15:42:37,001A6917801,Closed,TEAM0191,KM0001287,SD0146986
466733,IM0047057,31-03-2014 17:24:50,001A6919138,Open,TEAM0199,KM0000182,SD0147088
466734,IM0047057,31-03-2014 17:25:07,001A6919139,Update,TEAM0199,KM0000182,SD0147088
466735,IM0047057,31-03-2014 22:47:33,001A6922086,Closed,TEAM0134,KM0000182,SD0147088


In [96]:
# === Convert to PM4Py event log ===
# PM4Py expects: case:concept:name, concept:name, time:timestamp
df['case:concept:name'] = df['Incident ID']
df['concept:name'] = df['IncidentActivity_Type']
df['time:timestamp'] = df['DateStamp']
# df['org:resource'] =

# Convert timestamp column
df = dataframe_utils.convert_timestamp_columns_in_df(df)
df

Unnamed: 0,Incident ID,DateStamp,IncidentActivity_Number,IncidentActivity_Type,Assignment Group,KM number,Interaction ID,case:concept:name,concept:name,time:timestamp
0,IM0000004,2013-07-01 08:17:17+00:00,001A3689763,Reassignment,TEAM0001,KM0000553,SD0000007,IM0000004,Reassignment,2013-07-01 08:17:17+00:00
1,IM0000004,2013-04-11 13:41:30+00:00,001A5852941,Reassignment,TEAM0002,KM0000553,SD0000007,IM0000004,Reassignment,2013-04-11 13:41:30+00:00
2,IM0000004,2013-04-11 13:41:30+00:00,001A5852943,Update from customer,TEAM0002,KM0000553,SD0000007,IM0000004,Update from customer,2013-04-11 13:41:30+00:00
3,IM0000004,2013-04-11 12:09:37+00:00,001A5849980,Operator Update,TEAM0003,KM0000553,SD0000007,IM0000004,Operator Update,2013-04-11 12:09:37+00:00
4,IM0000004,2013-04-11 12:09:37+00:00,001A5849979,Assignment,TEAM0003,KM0000553,SD0000007,IM0000004,Assignment,2013-04-11 12:09:37+00:00
...,...,...,...,...,...,...,...,...,...,...
466732,IM0047056,2014-03-31 15:42:37+00:00,001A6917801,Closed,TEAM0191,KM0001287,SD0146986,IM0047056,Closed,2014-03-31 15:42:37+00:00
466733,IM0047057,2014-03-31 17:24:50+00:00,001A6919138,Open,TEAM0199,KM0000182,SD0147088,IM0047057,Open,2014-03-31 17:24:50+00:00
466734,IM0047057,2014-03-31 17:25:07+00:00,001A6919139,Update,TEAM0199,KM0000182,SD0147088,IM0047057,Update,2014-03-31 17:25:07+00:00
466735,IM0047057,2014-03-31 22:47:33+00:00,001A6922086,Closed,TEAM0134,KM0000182,SD0147088,IM0047057,Closed,2014-03-31 22:47:33+00:00


In [97]:
df = df.sort_values(by="time:timestamp").reset_index(drop=True)

# === Convert to PM4Py event log ===
event_log = log_converter.apply(df, variant=log_converter.Variants.TO_EVENT_LOG)

# === Export to XES.GZ ===
output_path = "data/real/BPIC14/BPIC14.xes.gz"
xes_exporter.apply(event_log, output_path, parameters={"gzip": True})

exporting log, completed traces :: 100%|██████████| 46616/46616 [02:51<00:00, 272.29it/s]


## CALL
The log has been split into test and train. Each contains only completed traces. Hence, some traces that start in the train data and finish in the test data have been removed. Because we want to avoid detecting drifts due to these removed traces, we only include the train dataset.

In [98]:
# === Load and combine CSV files ===
log_path = 'data/real/CALL/CallCenter_train.csv.gz'
df = pd.read_csv(log_path, delimiter=',')
df

Unnamed: 0,case_id,activity,resource,start_time,end_time,departure_time
0,1826000166,Overseas,44416,2008-01-01T00:06:34.000,2008-01-01T00:07:57.000,2008-01-01T00:07:57.000
1,1826000176,Overseas,44006,2008-01-01T00:05:19.000,2008-01-01T00:09:07.000,2008-01-01T00:09:07.000
2,1826000324,Overseas,44416,2008-01-01T00:11:15.000,2008-01-01T00:11:19.000,2008-01-01T00:11:19.000
3,1826000469,Overseas,44006,2008-01-01T00:15:23.000,2008-01-01T00:19:47.000,2008-01-01T00:19:47.000
4,1826001153,Overseas,44416,2008-01-01T00:43:16.000,2008-01-01T00:45:43.000,2008-01-01T00:45:43.000
...,...,...,...,...,...,...
445562,1833087592,Financial,861,2008-01-08T14:03:57.000,2008-01-08T14:14:55.000,2008-01-08T14:14:55.000
445563,1833086679,Technical,1276,2008-01-08T14:02:05.000,2008-01-08T14:16:55.000,2008-01-08T14:16:55.000
445564,1833088119,Private,43127,2008-01-08T13:50:01.000,2008-01-08T14:20:55.000,2008-01-08T14:20:55.000
445565,1833086198,Financial,869,2008-01-08T13:41:13.000,2008-01-08T14:33:55.000,2008-01-08T14:33:55.000


In [99]:
# === Convert to PM4Py event log ===
# PM4Py expects: case:concept:name, concept:name, time:timestamp

df['case:concept:name'] = df['case_id']
df['concept:name'] = df['activity']
df['time:timestamp'] = df['end_time']
df['org:resource'] = df['resource']


# Convert timestamp column
df = dataframe_utils.convert_timestamp_columns_in_df(df)

# sort by time
df = df.sort_values(by="time:timestamp").reset_index(drop=True)
df

Unnamed: 0,case_id,activity,resource,start_time,end_time,departure_time,case:concept:name,concept:name,time:timestamp,org:resource
0,1826000166,Overseas,44416,2008-01-01 00:06:34+00:00,2008-01-01 00:07:57+00:00,2008-01-01 00:07:57+00:00,1826000166,Overseas,2008-01-01 00:07:57+00:00,44416
1,1826000176,Overseas,44006,2008-01-01 00:05:19+00:00,2008-01-01 00:09:07+00:00,2008-01-01 00:09:07+00:00,1826000176,Overseas,2008-01-01 00:09:07+00:00,44006
2,1826000324,Overseas,44416,2008-01-01 00:11:15+00:00,2008-01-01 00:11:19+00:00,2008-01-01 00:11:19+00:00,1826000324,Overseas,2008-01-01 00:11:19+00:00,44416
3,1826000469,Overseas,44006,2008-01-01 00:15:23+00:00,2008-01-01 00:19:47+00:00,2008-01-01 00:19:47+00:00,1826000469,Overseas,2008-01-01 00:19:47+00:00,44006
4,1826001153,Overseas,44416,2008-01-01 00:43:16+00:00,2008-01-01 00:45:43+00:00,2008-01-01 00:45:43+00:00,1826001153,Overseas,2008-01-01 00:45:43+00:00,44416
...,...,...,...,...,...,...,...,...,...,...
445562,1833087592,Financial,861,2008-01-08 14:03:57+00:00,2008-01-08 14:14:55+00:00,2008-01-08 14:14:55+00:00,1833087592,Financial,2008-01-08 14:14:55+00:00,861
445563,1833086679,Technical,1276,2008-01-08 14:02:05+00:00,2008-01-08 14:16:55+00:00,2008-01-08 14:16:55+00:00,1833086679,Technical,2008-01-08 14:16:55+00:00,1276
445564,1833088119,Private,43127,2008-01-08 13:50:01+00:00,2008-01-08 14:20:55+00:00,2008-01-08 14:20:55+00:00,1833088119,Private,2008-01-08 14:20:55+00:00,43127
445565,1833086198,Financial,869,2008-01-08 13:41:13+00:00,2008-01-08 14:33:55+00:00,2008-01-08 14:33:55+00:00,1833086198,Financial,2008-01-08 14:33:55+00:00,869


In [100]:
# === Convert to PM4Py event log ===
event_log = log_converter.apply(df, variant=log_converter.Variants.TO_EVENT_LOG)

# === Export to XES.GZ ===
output_path = "data/real/CALL/CALL.xes.gz"
xes_exporter.apply(event_log, output_path, parameters={"gzip": True})

exporting log, completed traces :: 100%|██████████| 260889/260889 [03:06<00:00, 1396.46it/s]


## ITHD

In [101]:
# === Load and combine CSV files ===
log_path = 'data/real/ITHD/finale.csv.gz'
df = pd.read_csv(log_path, delimiter=',')
df

Unnamed: 0,Case ID,Activity,Resource,Complete Timestamp,Variant,Variant index,Variant.1,seriousness,customer,product,responsible_section,seriousness_2,service_level,service_type,support_section,workgroup
0,Case 1,Assign seriousness,Value 1,2012/10/09 14:50:17.000,Variant 12,12,Variant 12,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1
1,Case 1,Take in charge ticket,Value 1,2012/10/09 14:51:01.000,Variant 12,12,Variant 12,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1
2,Case 1,Take in charge ticket,Value 2,2012/10/12 15:02:56.000,Variant 12,12,Variant 12,Value 1,Value 1,Value 1,Value 1,Value 1,Value 2,Value 1,Value 1,Value 1
3,Case 1,Resolve ticket,Value 1,2012/10/25 11:54:26.000,Variant 12,12,Variant 12,Value 1,Value 1,Value 1,Value 1,Value 1,Value 2,Value 1,Value 1,Value 1
4,Case 1,Closed,Value 3,2012/11/09 12:54:39.000,Variant 12,12,Variant 12,Value 1,Value 1,Value 1,Value 1,Value 1,Value 2,Value 1,Value 1,Value 1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21343,Case 4579,Closed,Value 5,2010/09/02 10:11:00.000,Variant 1,1,Variant 1,Value 1,Value 71,Value 3,Value 1,Value 1,Value 3,Value 1,Value 1,Value 1
21344,Case 4580,Take in charge ticket,Value 6,2012/01/03 09:33:43.000,Variant 18,18,Variant 18,Value 1,Value 92,Value 3,Value 1,Value 2,Value 2,Value 2,Value 1,Value 1
21345,Case 4580,Wait,Value 6,2012/01/10 15:30:11.000,Variant 18,18,Variant 18,Value 1,Value 92,Value 3,Value 1,Value 2,Value 2,Value 2,Value 1,Value 1
21346,Case 4580,Resolve ticket,Value 6,2012/01/10 17:07:40.000,Variant 18,18,Variant 18,Value 1,Value 92,Value 3,Value 1,Value 2,Value 2,Value 2,Value 1,Value 1


In [102]:
# === Convert to PM4Py event log ===
# PM4Py expects: case:concept:name, concept:name, time:timestamp

df['case:concept:name'] = df['Case ID']
df['concept:name'] = df['Activity']
df['time:timestamp'] = df['Complete Timestamp']
df['org:resource'] = df['Resource']


# Convert timestamp column
df = dataframe_utils.convert_timestamp_columns_in_df(df)

# sort by time
df = df.sort_values(by="time:timestamp").reset_index(drop=True)
df

Unnamed: 0,Case ID,Activity,Resource,Complete Timestamp,Variant,Variant index,Variant.1,seriousness,customer,product,responsible_section,seriousness_2,service_level,service_type,support_section,workgroup,case:concept:name,concept:name,time:timestamp,org:resource
0,Case 3608,Assign seriousness,Value 2,2010-01-13 08:40:25+00:00,Variant 33,33,Variant 33,Value 1,Value 63,Value 3,Value 5,Value 1,Value 2,Value 1,Value 4,Value 3,Case 3608,Assign seriousness,2010-01-13 08:40:25+00:00,Value 2
1,Case 2748,Assign seriousness,Value 2,2010-01-13 12:26:04+00:00,Variant 1,1,Variant 1,Value 1,Value 52,Value 3,Value 7,Value 1,Value 2,Value 1,Value 3,Value 1,Case 2748,Assign seriousness,2010-01-13 12:26:04+00:00,Value 2
2,Case 4284,Assign seriousness,Value 2,2010-01-13 12:30:37+00:00,Variant 219,219,Variant 219,Value 1,Value 9,Value 3,Value 2,Value 2,Value 2,Value 2,Value 3,Value 1,Case 4284,Assign seriousness,2010-01-13 12:30:37+00:00,Value 2
3,Case 1534,Assign seriousness,Value 2,2010-01-13 13:09:31+00:00,Variant 33,33,Variant 33,Value 1,Value 176,Value 3,Value 4,Value 1,Value 2,Value 1,Value 3,Value 3,Case 1534,Assign seriousness,2010-01-13 13:09:31+00:00,Value 2
4,Case 406,Assign seriousness,Value 2,2010-01-13 17:25:25+00:00,Variant 4,4,Variant 4,Value 1,Value 143,Value 3,Value 4,Value 1,Value 2,Value 1,Value 3,Value 1,Case 406,Assign seriousness,2010-01-13 17:25:25+00:00,Value 2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21343,Case 1156,Closed,Value 3,2014-01-03 10:02:38+00:00,Variant 2,2,Variant 2,Value 1,Value 22,Value 1,Value 1,Value 1,Value 2,Value 1,Value 1,Value 1,Case 1156,Closed,2014-01-03 10:02:38+00:00,Value 3
21344,Case 3013,Closed,Value 3,2014-01-03 10:03:58+00:00,Variant 28,28,Variant 28,Value 1,Value 20,Value 1,Value 1,Value 1,Value 2,Value 1,Value 1,Value 1,Case 3013,Closed,2014-01-03 10:03:58+00:00,Value 3
21345,Case 3857,Closed,Value 3,2014-01-03 10:08:18+00:00,Variant 2,2,Variant 2,Value 1,Value 124,Value 6,Value 1,Value 1,Value 3,Value 1,Value 1,Value 1,Case 3857,Closed,2014-01-03 10:08:18+00:00,Value 3
21346,Case 2585,Closed,Value 3,2014-01-03 13:19:48+00:00,Variant 2,2,Variant 2,Value 1,Value 24,Value 1,Value 1,Value 1,Value 2,Value 1,Value 1,Value 1,Case 2585,Closed,2014-01-03 13:19:48+00:00,Value 3


In [103]:
# === Convert to PM4Py event log ===
event_log = log_converter.apply(df, variant=log_converter.Variants.TO_EVENT_LOG)

# === Export to XES.GZ ===
output_path = "data/real/ITHD/ITHD.xes.gz"
xes_exporter.apply(event_log, output_path, parameters={"gzip": True})

exporting log, completed traces :: 100%|██████████| 4580/4580 [00:11<00:00, 389.31it/s]


## MOBIS

In [104]:
# === Load and combine CSV files ===
log_path = 'data/real/MOBIS/10258_mobis_challenge_log_2019.csv.gz'
df = pd.read_csv(log_path, delimiter=';')
df

Unnamed: 0,activity,case,start,end,type,user,travel_start,travel_end,cost
0,pay expenses,1,16.01.2017 13:29,16.01.2017 13:40,Accounting,FI12,,,16752
1,pay expenses,5,16.01.2017 08:38,16.01.2017 08:48,Accounting,JH2172,,,26211
2,calculate payments,6,04.01.2017 06:59,16.01.2017 09:40,Accounting,WE5108,,,41314
3,pay expenses,6,06.02.2017 09:27,06.02.2017 09:36,Accounting,WE5108,,,41314
4,send original documents to archive,7,01.01.2017 03:46,09.01.2017 06:23,Employee,UL2786,,,
...,...,...,...,...,...,...,...,...,...
83251,check if travel request needs preliminary pric...,7267,29.12.2017 18:41,29.12.2017 18:43,,,,,
83252,decide on approval requirements,7267,29.12.2017 18:43,29.12.2017 18:44,,,,,
83253,file travel request,7268,29.12.2017 19:52,29.12.2017 19:54,Employee,RT4514,,,45831
83254,check if travel request needs preliminary pric...,7268,29.12.2017 19:54,29.12.2017 19:56,,,,,


In [105]:
# === Convert to PM4Py event log ===
# PM4Py expects: case:concept:name, concept:name, time:timestamp
df['case:concept:name'] = df['case']
df['concept:name'] = df['activity']
df['time:timestamp'] = df['end']
# df['org:resource'] = df['Resource']


# Convert timestamp column
df["time:timestamp"] = pd.to_datetime(
    df["time:timestamp"], 
    format="%d.%m.%Y %H:%M", 
    utc=True
)
# sort by time
df = df.sort_values(by="time:timestamp").reset_index(drop=True)
df

Unnamed: 0,activity,case,start,end,type,user,travel_start,travel_end,cost,case:concept:name,concept:name,time:timestamp
0,file travel request,1476,02.01.2017 05:59,02.01.2017 06:02,Employee,RS7246,26.01.2017,28.01.2017,27611,1476,file travel request,2017-01-02 06:02:00+00:00
1,check if travel request needs preliminary pric...,1476,02.01.2017 06:02,02.01.2017 06:02,,,26.01.2017,28.01.2017,,1476,check if travel request needs preliminary pric...,2017-01-02 06:02:00+00:00
2,file travel request,1459,02.01.2017 05:59,02.01.2017 06:02,Employee,HA4851,,,43895,1459,file travel request,2017-01-02 06:02:00+00:00
3,check if travel request needs preliminary pric...,1459,02.01.2017 06:02,02.01.2017 06:03,,,,,,1459,check if travel request needs preliminary pric...,2017-01-02 06:03:00+00:00
4,decide on approval requirements,1476,02.01.2017 06:02,02.01.2017 06:03,,,26.01.2017,28.01.2017,,1476,decide on approval requirements,2017-01-02 06:03:00+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...
83251,check booking proposal,6582,29.12.2017 20:44,29.12.2017 20:48,Employee,JQ8400,,,,6582,check booking proposal,2017-12-29 20:48:00+00:00
83252,check booking proposal,6584,29.12.2017 21:10,29.12.2017 21:17,Employee,XQ1540,,,,6584,check booking proposal,2017-12-29 21:17:00+00:00
83253,check if price inquiry is still needed and up ...,6292,29.12.2017 21:40,29.12.2017 21:55,Employee,AK7488,,,,6292,check if price inquiry is still needed and up ...,2017-12-29 21:55:00+00:00
83254,request update of the booking proposal,6292,29.12.2017 21:55,29.12.2017 21:55,Employee,AK7488,,,,6292,request update of the booking proposal,2017-12-29 21:55:00+00:00


In [106]:
# === Convert to PM4Py event log ===
event_log = log_converter.apply(df, variant=log_converter.Variants.TO_EVENT_LOG)

# === Export to XES.GZ ===
output_path = "data/real/MOBIS/MOBIS.xes.gz"
xes_exporter.apply(event_log, output_path, parameters={"gzip": True})

exporting log, completed traces :: 100%|██████████| 6555/6555 [00:33<00:00, 198.33it/s]
