In [1]:
%load_ext autoreload
import pandas as pd
import numpy as np

In [94]:
%autoreload
import xes

In [91]:
activities_xes = xes.xes_to_dataframes(
    filepath="data\Activities of daily living of several individuals_1_all\data\edited_hh102_labour.xes\edited_hh102_labour.xes",
    keep_original=True
)

In [31]:
activities_xes_py = xes.xes_to_python(
    filepath="data\Activities of daily living of several individuals_1_all\data\edited_hh102_labour.xes\edited_hh102_labour.xes"
)

In [105]:
def print_xes_info(xes):
    trace_lengths = np.array([len(trace["events"]) for trace in xes])
    events_count = sum(trace_lengths)
    df_merged = pd.concat([trace["events"] for trace in xes])

    print()
    print(
        "Traces: {0}, Events: {1}".format(
            len(xes),
            events_count
        )
    )

    print(
        "Events: Average: {0}, Min: {1}, Max: {2}".format(
            trace_lengths.mean(),
            trace_lengths.min(),
            trace_lengths.max(),
        )
    )

    column_counts = np.array([len(event) for trace in xes for event in trace["events_original"]])
    print(
        "Columns: Average: {0}, Min: {1}, Max: {2}".format(
            column_counts.mean(),
            column_counts.min(),
            column_counts.max(),
        )
    )

    print()
    concept_name_counts_per_trace = [sum("concept:name" in event for event in trace["events_original"])
                                     for trace in xes]
    concept_name_count = sum(concept_name_counts_per_trace)
    has_concept_name = concept_name_count > 0
    print(
        "Has \"concept:name\": {0} ({1:.2f}%)".format(
            has_concept_name,
            concept_name_count / events_count * 100
        )
    )

    if has_concept_name:
        top_concept_name_count = 5
        print("Top {0} \"concept:name\":".format(top_concept_name_count))
        print(df_merged["concept:name"].value_counts().nlargest(top_concept_name_count))

    print()
    lifecycle_transition_counts_per_trace = [sum("lifecycle:transition" in event for event in trace["events_original"])
                                     for trace in xes]
    lifecycle_transition_count = sum(lifecycle_transition_counts_per_trace)
    has_lifecycle_transition = lifecycle_transition_count > 0
    print(
        "Has \"lifecycle:transition\": {0} ({1:.2f}%)".format(
            has_lifecycle_transition,
            lifecycle_transition_count / events_count * 100
        )
    )

    if has_lifecycle_transition:
        top_lifecycle_transition_count = 5
        print("Top {0} \"lifecycle:transition\":".format(top_lifecycle_transition_count))
        print(df_merged["lifecycle:transition"].value_counts().nlargest(top_lifecycle_transition_count))

    print()
    time_timestamp_counts_per_trace = [sum("time:timestamp" in event for event in trace["events_original"])
                                     for trace in xes]
    time_timestamp_count = sum(time_timestamp_counts_per_trace)
    has_time_timestamp = time_timestamp_count > 0
    print(
        "Has \"time:timestamp\": {0} ({1:.2f}%)".format(
            has_time_timestamp,
            time_timestamp_count / events_count * 100
        )
    )

    if has_time_timestamp:
        print("Average: {0}, Min: {1}, Max: {2}".format(
            # df_merged["time:timestamp"].mean(),
            0,
            df_merged["time:timestamp"].min(),
            df_merged["time:timestamp"].max()
        ))

    # print(f"Has \"time:timestamp\": ")

In [87]:
print_xes_info(activities_xes)


Traces: 18, Events: 1152
Events: Average: 64.0, Min: 46, Max: 82
Columns: Average: 4.0, Min: 4, Max: 4

Has "concept:name": True (100.00%)
Top 5 "concept:name":
relax              190
personalhygiene    154
toilet             152
sleep              120
snack               84
Name: concept:name, dtype: int64

Has "lifecycle:transition": True (100.00%)
Top 5 "lifecycle:transition":
start       576
complete    576
Name: lifecycle:transition, dtype: int64

Has "time:timestamp": True (100.00%)
Average: 2011-06-27 09:57:20.849826560+02:00, Min: 2011-06-15 00:06:32+02:00, Max: 2011-07-09 06:55:29+02:00


In [72]:
df_merged = pd.concat([trace["events"] for trace in activities_xes])

df_merged["time:timestamp"].min()

Timestamp('2011-06-15 00:06:32+0200', tz='tzoffset(None, 7200)')

In [99]:
filepath = "data/NASA Crew Exploration Vehicle (CEV) Software Event Log_1_all/data/nasa-cev-1-10-single-trace.xes/nasa-cev-1-10-single-trace.xes"
xes_nasa = xes.xes_to_dataframes(filepath=filepath, keep_original=True)
print_xes_info(xes_nasa)


Traces: 1, Events: 54
Events: Average: 54.0, Min: 54, Max: 54
Columns: Average: 14.074074074074074, Min: 14, Max: 16

Has "concept:name": True (100.00%)
Top 5 "concept:name":
cev.Failures$Type(java.lang.String,int)      8
cev.CEV()                                    4
cev.ErrorLog()                               4
cev.Failures(cev.ErrorLog)                   4
cev.Spacecraft(cev.Failures,cev.ErrorLog)    4
Name: concept:name, dtype: int64

Has "lifecycle:transition": True (100.00%)
Top 5 "lifecycle:transition":
start       27
complete    27
Name: lifecycle:transition, dtype: int64

Has "time:timestamp": True (100.00%)
Average: 2017-02-10 13:55:00.415055616+01:00, Min: 2017-02-10 13:54:55.423000+01:00, Max: 2017-02-10 13:55:00.628000+01:00


In [100]:
filepath = "data/BPI Challenge 2012_1_all/BPI_Challenge_2012.xes/BPI_Challenge_2012.xes"
xes_bpi_2012 = xes.xes_to_dataframes(filepath=filepath, keep_original=True)


In [103]:
xes_bpi_2012[0]["events"]["time:timestamp"].mean()

Timestamp('2011-10-06 02:44:45.718269184+0200', tz='tzoffset(None, 7200)')

In [106]:
print_xes_info(xes_bpi_2012)


Traces: 13087, Events: 262200
Events: Average: 20.035149384885763, Min: 3, Max: 175
Columns: Average: 3.931311975591152, Min: 3, Max: 4

Has "concept:name": True (100.00%)
Top 5 "concept:name":
W_Completeren aanvraag            54850
W_Nabellen offertes               52016
W_Nabellen incomplete dossiers    25190
W_Valideren aanvraag              20809
W_Afhandelen leads                16566
Name: concept:name, dtype: int64

Has "lifecycle:transition": True (100.00%)
Top 5 "lifecycle:transition":
COMPLETE    164506
START        71376
SCHEDULE     26318
Name: lifecycle:transition, dtype: int64

Has "time:timestamp": True (100.00%)
Average: 0, Min: 2011-10-01 00:38:44.546000+02:00, Max: 2012-03-14 16:04:54.681000+01:00


In [107]:
filepath = "data/Real-life event logs - Hospital log_1_all/Hospital_log.xes\Hospital_log.xes"
xes_hospital = xes.xes_to_dataframes(filepath=filepath, keep_original=True)


In [108]:
print_xes_info(xes_hospital)



Traces: 1143, Events: 150291
Events: Average: 131.48818897637796, Min: 1, Max: 1814
Columns: Average: 8.999787079731986, Min: 7, Max: 9

Has "concept:name": True (100.00%)
Top 5 "concept:name":
aanname laboratoriumonderzoek               15353
ligdagen - alle spec.beh.kinderg.-reval.    10897
190205 klasse 3b        a205                 9351
ordertarief                                  9008
190101 bovenreg.toesl.  a101                 6241
Name: concept:name, dtype: int64

Has "lifecycle:transition": True (100.00%)
Top 5 "lifecycle:transition":
complete    150291
Name: lifecycle:transition, dtype: int64

Has "time:timestamp": True (100.00%)
Average: 0, Min: 2005-01-03 00:00:00+01:00, Max: 2008-03-20 00:00:00+01:00


In [113]:
np.array([len(trace["events"]) for trace in xes_hospital])[824]

1814

In [97]:
files = [
    "data/Activities of daily living of several individuals_1_all/data/edited_hh102_labour.xes/edited_hh102_labour.xes",
    # "data/BPI Challenge 2012_1_all/BPI_Challenge_2012.xes/BPI_Challenge_2012.xes",
    "data/NASA Crew Exploration Vehicle (CEV) Software Event Log_1_all/data/nasa-cev-1-10-single-trace.xes/nasa-cev-1-10-single-trace.xes",
    "data/Real-life event logs - Hospital log_1_all/Hospital_log.xes\Hospital_log.xes"
]

for file in files:
    xes_file = xes.xes_to_dataframes(filepath=file, keep_original=True)
    print()
    print()
    print(file)
    print_xes_info(xes_file)



data/Activities of daily living of several individuals_1_all/data/edited_hh102_labour.xes/edited_hh102_labour.xes

Traces: 18, Events: 1152
Events: Average: 64.0, Min: 46, Max: 82
Columns: Average: 4.0, Min: 4, Max: 4

Has "concept:name": True (100.00%)
Top 5 "concept:name":
relax              190
personalhygiene    154
toilet             152
sleep              120
snack               84
Name: concept:name, dtype: int64

Has "lifecycle:transition": True (100.00%)
Top 5 "lifecycle:transition":
start       576
complete    576
Name: lifecycle:transition, dtype: int64

Has "time:timestamp": True (100.00%)
Average: 2011-06-27 09:57:20.849826560+02:00, Min: 2011-06-15 00:06:32+02:00, Max: 2011-07-09 06:55:29+02:00


data/NASA Crew Exploration Vehicle (CEV) Software Event Log_1_all/data/nasa-cev-1-10-single-trace.xes/nasa-cev-1-10-single-trace.xes

Traces: 1, Events: 54
Events: Average: 54.0, Min: 54, Max: 54
Columns: Average: 14.074074074074074, Min: 14, Max: 16

Has "concept:name": True (

KeyboardInterrupt: 