Merge branch 'release' into github-release

# Conflicts: # .gitlab-ci.yml # pm4py/objects/log/importer/xes/variants/__init__.py # requirements_stable.txt
pm4py · May 12, 2022 · f230c29 · f230c29
2 parents 96cc277 + 2bb85d9
commit f230c29
Show file tree

Hide file tree

Showing 34 changed files with 454 additions and 73 deletions.
diff --git a/.gitignore b/.gitignore
@@ -135,3 +135,5 @@ dmypy.json
 # Pyre type checker
 .pyre/
 
+# Renovate bot
+renovate.json5
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,64 @@
-## pm4py 2.2.20
+# Changelog of pm4py
+
+## pm4py 2.2.21 (2022.05.12)
+
+### Added
+* 65ff8ae3d9bca71f0cf7be507c9e0eba68b85c42
+  * add chunk-based xes importer (CHUNK_REGEX)
+
+### Changed
+* d982c534aac373c347a083739b68fd3ac2b29e42
+  * changed dimension of endpoints in BPMN models layouting
+* 7473a72877e29261780adf746d134b406a912dd7
+  * interventions to increase PM4Py's compatibility across different platforms
+
+### Deprecated
+
+### Fixed
+* 882aa20b20ec593e0a7d01e027a6f1afa8d44f84
+  * fixed XES line-by-line importer for booleans attributes
+* f6542cd12413f073eb51173804f68502e3026f46
+  * fixes XES line-by-line deserialization
+* 363580b757c027ff583d33dcff83e00b3be97659
+  * fixed issues with Pandas dataframe's index usage in the library
+* 58a763b4099b40c67f23a6eb45c621d1b9a9d324
+  * fixed OCEL default constructor to set default columns in the dataframes
+* 8470f22047667d1d30415a08965af1015d66adbb
+  * fix division by zero error in alignment-based fitness (side case for empty trace/model combination) 
+
+### Removed
+
+### Other
+
+---
+
+## pm4py 2.2.20.1 (2022.04.10)
+
+### Added
+
+### Changed
+* 344fb7258df17ce0d4ffe7425b678943f6f2ff11
+  * Minor refactoring to management of inhibitor / reset arcs (importing)
+
+### Deprecated
+
+### Fixed
+* ad2cba1d8f9487dbb03ec418643b329b30e80ee0
+  * Minor fixes to the retrieval of the parameters in several parts of the code
+* 65e1f1b0bbd0747fe81eb049780874608a395d6e
+  * Fixed bug in eventually follows filter (simplified interface)
+* 60cd060edeeaa17c8b5bdaba7bb1035fc385d514
+  * Fixed XES exporting when attribute value type is a Numpy type (numpy.int64, numpy.float64, numpy.datetime64)
+* cd5e55e712697a28cbfe0182e96556531b520667
+  * Bug fix feature selection and extraction on Pandas dataframes
+
+### Removed
+
+### Other
+
+---
+
+## pm4py 2.2.20 (2022.04.01)
 
 ### Added
 
@@ -26,8 +86,9 @@
 
 ### Other
 
+---
 
-## pm4py 2.2.19.2
+## pm4py 2.2.19.2 (2022.03.04)
 
 ### Added
 

diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.9
+FROM python:3.10
 
 RUN apt-get update
 RUN apt-get -y upgrade
@@ -13,7 +13,7 @@ RUN apt-get -y install libtool flex bison pkg-config g++ libssl-dev automake
 RUN apt-get -y install libjemalloc-dev libboost-dev libboost-filesystem-dev libboost-system-dev libboost-regex-dev python3-dev autoconf flex bison cmake
 RUN apt-get -y install libxml2-dev libxslt-dev libfreetype6-dev libsuitesparse-dev
 RUN pip install -U wheel six pytest
-RUN pip install asttokens==2.0.5 backcall==0.2.0 colorama==0.4.4 cycler==0.11.0 decorator==5.1.1 deprecation==2.1.0 executing==0.8.3 fonttools==4.31.2 graphviz==0.19.1 intervaltree==3.1.0 ipython==8.2.0 jedi==0.18.1 jinja2==3.1.1 jsonpickle==2.1.0 kiwisolver==1.4.2 lxml==4.8.0 MarkupSafe==2.1.1 matplotlib==3.5.1 matplotlib-inline==0.1.3 mpmath==1.2.1 networkx==2.7.1 numpy==1.22.3 packaging==21.3 pandas==1.4.1 parso==0.8.3 pickleshare==0.7.5 pillow==9.0.1 prompt-toolkit==3.0.28 pure-eval==0.2.2 pydotplus==2.0.2 pygments==2.11.2 pyparsing==3.0.7 python-dateutil==2.8.2 pytz==2022.1 pyvis==0.1.9 scipy==1.8.0 setuptools==61.3.0 six==1.16.0 sortedcontainers==2.4.0 stack-data==0.2.0 stringdist==1.0.9 sympy==1.10.1 tqdm==4.63.1 traitlets==5.1.1 wcwidth==0.2.5 
+RUN pip install asttokens==2.0.5 backcall==0.2.0 colorama==0.4.4 cycler==0.11.0 decorator==5.1.1 deprecation==2.1.0 executing==0.8.3 fonttools==4.32.0 graphviz==0.19.2 intervaltree==3.1.0 ipython==8.2.0 jedi==0.18.1 jinja2==3.1.1 jsonpickle==2.1.0 kiwisolver==1.4.2 lxml==4.8.0 MarkupSafe==2.1.1 matplotlib==3.5.1 matplotlib-inline==0.1.3 mpmath==1.2.1 networkx==2.8 numpy==1.22.3 packaging==21.3 pandas==1.4.2 parso==0.8.3 pickleshare==0.7.5 pillow==9.1.0 prompt-toolkit==3.0.29 pure-eval==0.2.2 pydotplus==2.0.2 pygments==2.11.2 pyparsing==3.0.8 python-dateutil==2.8.2 pytz==2022.1 pyvis==0.1.9 scipy==1.8.0 setuptools==62.0.0 six==1.16.0 sortedcontainers==2.4.0 stack-data==0.2.0 stringdist==1.0.9 sympy==1.10.1 tqdm==4.64.0 traitlets==5.1.1 wcwidth==0.2.5 
 
 COPY . /app
 RUN cd /app && python setup.py install
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -26,7 +26,7 @@
 # The short X.Y version
 version = '2.2'
 # The full version, including alpha/beta/rc tags
-release = '2.2.19.3'
+release = '2.2.21'
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/examples/inhibitor_reset_arcs.py b/examples/inhibitor_reset_arcs.py
@@ -36,11 +36,9 @@ def execute_script():
     add_arc_from_to(p2, trans_C, net)
     add_arc_from_to(trans_C, sink, net)
     add_arc_from_to(trans_inhibitor, p_inhibitor, net)
-    inhibitor_arc = add_arc_from_to(p_inhibitor, trans_B, net)
-    inhibitor_arc.properties["arctype"] = "inhibitor"
+    inhibitor_arc = add_arc_from_to(p_inhibitor, trans_B, net, type="inhibitor")
     add_arc_from_to(trans_free, p_reset, net)
-    reset_arc = add_arc_from_to(p_reset, trans_C, net)
-    reset_arc.properties["arctype"] = "reset"
+    reset_arc = add_arc_from_to(p_reset, trans_C, net, type="reset")
     im = Marking({source: 1})
     fm = Marking({sink: 1})
     pm4py.view_petri_net(net, im, fm, format="svg")

diff --git a/pm4py/__init__.py b/pm4py/__init__.py
@@ -64,7 +64,3 @@
 from pm4py.ocel import ocel_get_object_types, ocel_get_attribute_names, ocel_flattening, ocel_object_type_activities, ocel_objects_ot_count
 
 time.clock = time.process_time
-
-# this package is available only for Python >= 3.5
-if sys.version_info >= (3, 5):
-    from pm4py import streaming
diff --git a/pm4py/algo/conformance/alignments/petri_net/algorithm.py b/pm4py/algo/conformance/alignments/petri_net/algorithm.py
@@ -28,8 +28,6 @@
 import sys
 from pm4py.util.constants import PARAMETER_CONSTANT_ACTIVITY_KEY, PARAMETER_CONSTANT_CASEID_KEY
 import pkgutil
-from concurrent.futures import ProcessPoolExecutor
-import multiprocessing
 from typing import Optional, Dict, Any, Union, Tuple
 from pm4py.objects.log.obj import EventLog, EventStream, Trace
 from pm4py.objects.petri_net.obj import PetriNet, Marking
@@ -135,8 +133,9 @@ def apply_trace(trace, petri_net, initial_marking, final_marking, parameters=Non
 
     ltrace_bwc = trace_cost_function_sum + best_worst_cost
 
-    fitness = 1 - (ali['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST) / (
-                ltrace_bwc // align_utils.STD_MODEL_LOG_MOVE_COST) if ltrace_bwc > 0 else 0
+    fitness_num = ali['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST
+    fitness_den = ltrace_bwc // align_utils.STD_MODEL_LOG_MOVE_COST
+    fitness = 1 - fitness_num / fitness_den if fitness_den > 0 else 0
 
     # other possibility: avoid integer division but proceed to rounding.
     # could lead to small differences with respect to the adopted-since-now fitness
@@ -244,13 +243,17 @@ def apply_multiprocessing(log, petri_net, initial_marking, final_marking, parame
     if parameters is None:
         parameters = {}
 
+    import multiprocessing
+
     num_cores = exec_utils.get_param_value(Parameters.CORES, parameters, multiprocessing.cpu_count() - 2)
 
     best_worst_cost = __get_best_worst_cost(petri_net, initial_marking, final_marking, variant, parameters)
     variants_idxs, one_tr_per_var = __get_variants_structure(log, parameters)
     parameters[Parameters.BEST_WORST_COST_INTERNAL] = best_worst_cost
 
     all_alignments = []
+
+    from concurrent.futures import ProcessPoolExecutor
     with ProcessPoolExecutor(max_workers=num_cores) as executor:
         futures = []
         for trace in one_tr_per_var:

diff --git a/pm4py/algo/conformance/alignments/process_tree/variants/search_graph_pt.py b/pm4py/algo/conformance/alignments/process_tree/variants/search_graph_pt.py
@@ -16,9 +16,7 @@
 '''
 import copy
 import heapq
-import multiprocessing
 import pkgutil
-from concurrent.futures import ProcessPoolExecutor
 from enum import Enum
 from typing import List, Any, Optional
 
@@ -303,6 +301,8 @@ def apply_multiprocessing(obj: Union[EventLog, Trace], pt: ProcessTree, paramete
     if parameters is None:
         parameters = {}
 
+    import multiprocessing
+
     leaves = frozenset(pt_util.get_leaves_as_tuples(pt))
     activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
     num_cores = exec_utils.get_param_value(Parameters.CORES, parameters, multiprocessing.cpu_count() - 2)
@@ -311,6 +311,8 @@ def apply_multiprocessing(obj: Union[EventLog, Trace], pt: ProcessTree, paramete
         variant = tuple(x[activity_key] for x in obj)
         return align_variant(variant, leaves, pt)
     else:
+        from concurrent.futures import ProcessPoolExecutor
+
         with ProcessPoolExecutor(max_workers=num_cores) as executor:
             ret = []
             best_worst_cost = align_variant([], leaves, pt)["cost"]

diff --git a/pm4py/algo/discovery/batches/variants/pandas.py b/pm4py/algo/discovery/batches/variants/pandas.py
@@ -79,10 +79,10 @@ def apply(log: pd.DataFrame, parameters: Optional[Dict[Union[str, Parameters], A
 
     activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
     resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes_constants.DEFAULT_RESOURCE_KEY)
-    start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters,
-                                                     xes_constants.DEFAULT_TIMESTAMP_KEY)
     timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters,
                                                xes_constants.DEFAULT_TIMESTAMP_KEY)
+    start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters,
+                                                     timestamp_key)
     case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
 
     log = log[list({activity_key, resource_key, start_timestamp_key, timestamp_key, case_id_key})]

diff --git a/pm4py/algo/evaluation/precision/variants/align_etconformance.py b/pm4py/algo/evaluation/precision/variants/align_etconformance.py
@@ -25,8 +25,6 @@
 from pm4py.util import exec_utils
 from pm4py.util import xes_constants
 import pkgutil
-from concurrent.futures import ProcessPoolExecutor
-import multiprocessing
 from enum import Enum
 from pm4py.util import constants
 from typing import Optional, Dict, Any, Union, Tuple
@@ -208,6 +206,7 @@ def align_fake_log_stop_marking(fake_log, net, marking, final_marking, parameter
     """
     if parameters is None:
         parameters = {}
+
     show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
     multiprocessing = exec_utils.get_param_value(Parameters.MULTIPROCESSING, parameters, False)
 
@@ -264,6 +263,9 @@ def __align_log_with_multiprocessing_stop_marking(fake_log, net, marking, final_
     if parameters is not None:
         parameters = {}
 
+    import multiprocessing
+    from concurrent.futures import ProcessPoolExecutor
+
     num_cores = exec_utils.get_param_value(Parameters.CORES, parameters, multiprocessing.cpu_count() - 2)
     align_intermediate_result = []
     with ProcessPoolExecutor(max_workers=num_cores) as executor:

diff --git a/pm4py/algo/organizational_mining/sna/variants/pandas/handover.py b/pm4py/algo/organizational_mining/sna/variants/pandas/handover.py
@@ -27,6 +27,7 @@
 class Parameters(Enum):
     ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
     RESOURCE_KEY = constants.PARAMETER_CONSTANT_RESOURCE_KEY
+    CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
     BETA = "beta"
 
 
@@ -59,11 +60,13 @@ def apply(log: pd.DataFrame, parameters: Optional[Dict[Union[str, Parameters], A
 
     resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes.DEFAULT_RESOURCE_KEY)
     beta = exec_utils.get_param_value(Parameters.BETA, parameters, 0)
+    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
 
     parameters_variants = {case_statistics.Parameters.ACTIVITY_KEY: resource_key,
-                           case_statistics.Parameters.ATTRIBUTE_KEY: resource_key}
+                           case_statistics.Parameters.ATTRIBUTE_KEY: resource_key,
+                           case_statistics.Parameters.CASE_ID_KEY: case_id_key}
 
-    variants_occ = {x["variant"]: x["case:concept:name"] for x in
+    variants_occ = {x["variant"]: x[case_id_key] for x in
                     case_statistics.get_variant_statistics(log, parameters=parameters_variants)}
     variants_resources = list(variants_occ.keys())
     resources = [variants_util.get_activities_from_variant(y) for y in variants_resources]

diff --git a/pm4py/algo/organizational_mining/sna/variants/pandas/subcontracting.py b/pm4py/algo/organizational_mining/sna/variants/pandas/subcontracting.py
@@ -27,6 +27,7 @@
 class Parameters(Enum):
     ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
     RESOURCE_KEY = constants.PARAMETER_CONSTANT_RESOURCE_KEY
+    CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
     N = "n"
 
 
@@ -58,10 +59,12 @@ def apply(log: pd.DataFrame, parameters: Optional[Dict[Union[str, Parameters], A
 
     resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes.DEFAULT_RESOURCE_KEY)
     n = exec_utils.get_param_value(Parameters.N, parameters, 2)
+    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
 
     parameters_variants = {case_statistics.Parameters.ACTIVITY_KEY: resource_key,
-                           case_statistics.Parameters.ATTRIBUTE_KEY: resource_key}
-    variants_occ = {x["variant"]: x["case:concept:name"] for x in
+                           case_statistics.Parameters.ATTRIBUTE_KEY: resource_key,
+                           case_statistics.Parameters.CASE_ID_KEY: case_id_key}
+    variants_occ = {x["variant"]: x[case_id_key] for x in
                     case_statistics.get_variant_statistics(log, parameters=parameters_variants)}
     variants_resources = list(variants_occ.keys())
     resources = [variants_util.get_activities_from_variant(y) for y in variants_resources]

diff --git a/pm4py/algo/organizational_mining/sna/variants/pandas/working_together.py b/pm4py/algo/organizational_mining/sna/variants/pandas/working_together.py
@@ -28,6 +28,7 @@
 class Parameters(Enum):
     ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
     RESOURCE_KEY = constants.PARAMETER_CONSTANT_RESOURCE_KEY
+    CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
     METRIC_NORMALIZATION = "metric_normalization"
 
 
@@ -56,10 +57,12 @@ def apply(log: pd.DataFrame, parameters: Optional[Dict[Union[str, Parameters], A
     from pm4py.statistics.traces.generic.pandas import case_statistics
 
     resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes.DEFAULT_RESOURCE_KEY)
+    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
 
     parameters_variants = {case_statistics.Parameters.ACTIVITY_KEY: resource_key,
-                           case_statistics.Parameters.ATTRIBUTE_KEY: resource_key}
-    variants_occ = {x["variant"]: x["case:concept:name"] for x in
+                           case_statistics.Parameters.ATTRIBUTE_KEY: resource_key,
+                           case_statistics.Parameters.CASE_ID_KEY: case_id_key}
+    variants_occ = {x["variant"]: x[case_id_key] for x in
                     case_statistics.get_variant_statistics(log, parameters=parameters_variants)}
     variants_resources = list(variants_occ.keys())
     resources = [variants_util.get_activities_from_variant(y) for y in variants_resources]

diff --git a/pm4py/filtering.py b/pm4py/filtering.py
@@ -392,7 +392,6 @@ def filter_eventually_follows_relation(log: Union[EventLog, pd.DataFrame], relat
                 cases = cases.intersection(this_traces)
         return log[log[constants.CASE_CONCEPT_NAME].isin(cases)]
     else:
-        from pm4py.objects.log.obj import EventLog
         from pm4py.algo.filtering.log.ltl import ltl_checker
         parameters[ltl_checker.Parameters.POSITIVE] = retain
         if retain:

diff --git a/pm4py/meta.py b/pm4py/meta.py
@@ -15,7 +15,7 @@
     along with PM4Py.  If not, see <https://www.gnu.org/licenses/>.
 '''
 __name__ = 'pm4py'
-VERSION = '2.2.20'
+VERSION = '2.2.21'
 __version__ = VERSION
 __doc__ = 'Process Mining for Python (PM4Py)'
 __author__ = 'Fraunhofer Institute for Applied Technology'

diff --git a/pm4py/objects/bpmn/layout/variants/graphviz.py b/pm4py/objects/bpmn/layout/variants/graphviz.py
@@ -124,6 +124,7 @@ def apply(bpmn_graph, parameters=None):
         points = node.split("points=\"")[1].split("\"")[0]
         nodes_pos[inv_nodes_dict[this_id]] = points
 
+    endpoints_wh = exec_utils.get_param_value(Parameters.TASK_WH, parameters, 30)
     task_wh = exec_utils.get_param_value(Parameters.TASK_WH, parameters, 60)
 
     # add node positions to BPMN nodes
@@ -134,12 +135,16 @@ def apply(bpmn_graph, parameters=None):
         pos_y = float(node_pos[1])
         n.set_x(pos_x)
         n.set_y(pos_y)
-        n.set_height(task_wh)
         if isinstance(n, BPMN.Task):
             this_width = min(round(2 * task_wh), round(2 * (len(n.get_name()) + 7) * task_wh / 22.0))
             n.set_width(this_width)
+            n.set_height(task_wh)
+        elif isinstance(n, BPMN.StartEvent) or isinstance(n, BPMN.EndEvent):
+            n.set_width(endpoints_wh)
+            n.set_height(endpoints_wh)
         else:
             n.set_width(task_wh)
+            n.set_height(task_wh)
 
     max_x = max(1, max(abs(node.get_x()) for node in nodes))
     max_y = max(1, max(abs(node.get_y()) for node in nodes))

diff --git a/pm4py/objects/log/exporter/xes/variants/etree_xes_exp.py b/pm4py/objects/log/exporter/xes/variants/etree_xes_exp.py
@@ -48,7 +48,10 @@ class Parameters(Enum):
     "datetime": xes_util.TAG_DATE,
     "Timestamp": xes_util.TAG_DATE,
     "bool": xes_util.TAG_BOOLEAN,
-    "dict": xes_util.TAG_LIST
+    "dict": xes_util.TAG_LIST,
+    "numpy.int64": xes_util.TAG_INT,
+    "numpy.float64": xes_util.TAG_FLOAT,
+    "numpy.datetime64": xes_util.TAG_DATE
 }
 # if a type is not found in the previous list, then default to string
 __DEFAULT_TYPE = xes_util.TAG_STRING

diff --git a/pm4py/objects/log/exporter/xes/variants/line_by_line.py b/pm4py/objects/log/exporter/xes/variants/line_by_line.py
@@ -38,7 +38,10 @@ class Parameters(Enum):
     "datetime": xes_util.TAG_DATE,
     "Timestamp": xes_util.TAG_DATE,
     "bool": xes_util.TAG_BOOLEAN,
-    "dict": xes_util.TAG_LIST
+    "dict": xes_util.TAG_LIST,
+    "numpy.int64": xes_util.TAG_INT,
+    "numpy.float64": xes_util.TAG_FLOAT,
+    "numpy.datetime64": xes_util.TAG_DATE
 }
 # if a type is not found in the previous list, then default to string
 __DEFAULT_TYPE = xes_util.TAG_STRING