In [1]:
%load_ext autoreload
import datetime
# import ete3
# import itertools
# import json
# import logging
import math
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import pandas as pd
import typing
# import re
import xml.etree.ElementTree as ET
import xml.dom.minidom
# import xmlschema

In [2]:
%autoreload
from maxes.xes_loader2 import XesLoader, XesLog
# from maxes.xes_file import XesFile
# from maxes.analyze_xes import AnalyzeXes
# import maxes.analyze_sequence
import maxes.serialization.serialize
# import maxes.graphs
import maxes.notebooks.utils
import maxes.utils

In [3]:
maxes.notebooks.utils.init_notebook() # RUN ONLY ONCE

Loading config from: C:\vt\md\maxes\maxes\config.local.yml


In [4]:
import maxes.notebooks.load_files


path = maxes.notebooks.load_files.get_pathes().ccc19
path

'/vt/md/maxes/maxes/data\\Conformance Checking Challenge 2019 (CCC19)_1_all/data/CCC19 - Log XES.xes'

In [5]:
xml_tree = ET.parse(path)
xml_tree

<xml.etree.ElementTree.ElementTree at 0x25ac869f830>

In [6]:
log = xml_tree.getroot()
log

<Element '{http://www.xes-standard.org}log' at 0x0000025AC86F5FD0>

In [7]:
def xml_tag_without_namespace(tag: str):
    return maxes.utils.xml_tag_without_namespace(tag)

In [8]:

def set_event_attribute_type_int(
        xml_tree: ET.ElementTree,
        attribute_name: str,
        xml_namespace: str = "{http://www.xes-standard.org}",
        ignore_missing: bool = False
        ) -> int:
    xml_log = xml_tree.getroot()

    changes_counter = 0
    trace_counter = 0
    for xml_trace in xml_log:
        if xml_tag_without_namespace(xml_trace.tag) != "trace":
            continue

        event_counter = 0
        for xml_event in xml_trace:
            if xml_tag_without_namespace(xml_event.tag) != "event":
                continue

            changed_event_attributes_counter = 0
            for xml_attribute in xml_event:
                if xml_attribute.attrib["key"] != attribute_name:
                    continue
                # Change tag
                xml_attribute.tag = f"{xml_namespace}int"

                # Change value
                value = int(xml_attribute.attrib["value"])
                xml_attribute.attrib["value"] = str(value)

                changes_counter += 1
                changed_event_attributes_counter += 1

            # if changed_event_attributes_counter < 1 and not ignore_missing:
            #     raise RuntimeError(f"Did not find {attribute_name} in event {event_counter} in trace {trace_counter}")

            event_counter += 1

        trace_counter += 1

    return changes_counter

def remove_event_attribute(
        xml_tree: ET.ElementTree,
        attribute_name: str
    ) -> int:
    xml_log = xml_tree.getroot()

    changes_counter = 0
    trace_counter = 0
    for xml_trace in xml_log:
        if xml_tag_without_namespace(xml_trace.tag) != "trace":
            continue

        event_counter = 0
        for xml_event in xml_trace:
            if xml_tag_without_namespace(xml_event.tag) != "event":
                continue

            changed_event_attributes_counter = 0
            for xml_attribute in xml_event:
                if xml_attribute.attrib["key"] != attribute_name:
                    continue

                xml_event.remove(xml_attribute)

                changes_counter += 1
                changed_event_attributes_counter += 1

            # if changed_event_attributes_counter < 1 and not ignore_missing:
            #     raise RuntimeError(f"Did not find {attribute_name} in event {event_counter} in trace {trace_counter}")

            event_counter += 1

        trace_counter += 1

    return changes_counter


In [9]:
import maxes.notebooks.utils


print("Reading")
path = maxes.notebooks.load_files.get_pathes().ccc19
xml_tree = ET.parse(path)
xml_tree


# TODO: Remove attribute

print("Replacing")
set_event_attribute_type_int(xml_tree, "VIDEOSTART")
set_event_attribute_type_int(xml_tree, "VIDEOEND")
remove_event_attribute(xml_tree, "EVENTID")

print("Writing")
ET.register_namespace("", "http://www.xes-standard.org")
ET.indent(xml_tree)

destination_file_path = os.path.join(maxes.notebooks.utils.get_project_path(), "output/data_processed/ccc19.xes")
with open(destination_file_path, "w") as file:
    xml_tree.write(file, encoding="unicode")

Reading
Replacing
Writing


In [29]:
import maxes.notebooks.load_files


log_ccc19 = maxes.notebooks.load_files.load_ccc19()
log_ccc19

<maxes.xes_loader2.XesLog at 0x1c7a07a2ff0>

In [37]:
from maxes.generators.xes_generator.xes_generator1 import XesGenerator1

# GENERATE AND SAVE
ccc_19_generated_path = os.path.join(maxes.notebooks.utils.get_project_path(), "output/ccc19_generated.xes")

# generator = XesGenerator1(debug=True).fit(log_ccc19)
# generated_ccc_19 = generator.generate()
# generated_ccc_19

# generated_log_ET = maxes.serialization.serialize.Serializer().serialize(generated_ccc_19, xml_log_skeleton=log_ccc19.loader.xml_log_skeleton)

# ET.indent(generated_log_ET)

# ET.register_namespace("", "http://www.xes-standard.org")
# with open(ccc_19_generated_path, "w") as file:
#     generated_log_ET.write(file, encoding="unicode")

maxes.notebooks.utils.generate_xes_for_log(log_ccc19, ccc_19_generated_path)

In [38]:
# READ FROM FILE
generated_ccc_19 = XesLoader().load(ccc_19_generated_path)
generated_ccc_19

<maxes.xes_loader2.XesLog at 0x1c7a045ca10>

In [39]:
import maxes.metrics.mean_levenstein_distance

distance_result = maxes.metrics.mean_levenstein_distance.mean_levenstein_distance(
    original_log=log_ccc19,
    generated_log=generated_ccc_19
)
trace_mappings = [(pair.original_sequence.index, pair.generated_sequence.index) for pair in distance_result]
trace_mappings


[(10, 19),
 (11, 13),
 (8, 0),
 (15, 2),
 (1, 12),
 (6, 18),
 (14, 7),
 (17, 14),
 (19, 17),
 (3, 5),
 (13, 6),
 (5, 8),
 (0, 16),
 (12, 3),
 (16, 10),
 (4, 9),
 (9, 11),
 (7, 15),
 (2, 1),
 (18, 4)]

In [42]:
import maxes.metrics.mean_error

error = maxes.metrics.mean_error.calculate_mean_log_error(
    original_log=log_ccc19,
    generated_log=generated_ccc_19,
    trace_mapping=trace_mappings
)
error

TypeError: calculate_mean_log_error() missing 1 required positional argument: 'attribute_weights'