Skip to content

Commit

Permalink
Merge branch 'release' into github-release
Browse files Browse the repository at this point in the history
# Conflicts:
#	.gitlab-ci.yml
#	pm4py/objects/log/importer/xes/variants/__init__.py
#	requirements_stable.txt
  • Loading branch information
fit-sebastiaan-van-zelst committed May 12, 2022
2 parents 96cc277 + 2bb85d9 commit f230c29
Show file tree
Hide file tree
Showing 34 changed files with 454 additions and 73 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -135,3 +135,5 @@ dmypy.json
# Pyre type checker
.pyre/

# Renovate bot
renovate.json5
65 changes: 63 additions & 2 deletions CHANGELOG.md
@@ -1,4 +1,64 @@
## pm4py 2.2.20
# Changelog of pm4py

## pm4py 2.2.21 (2022.05.12)

### Added
* 65ff8ae3d9bca71f0cf7be507c9e0eba68b85c42
* add chunk-based xes importer (CHUNK_REGEX)

### Changed
* d982c534aac373c347a083739b68fd3ac2b29e42
* changed dimension of endpoints in BPMN models layouting
* 7473a72877e29261780adf746d134b406a912dd7
* interventions to increase PM4Py's compatibility across different platforms

### Deprecated

### Fixed
* 882aa20b20ec593e0a7d01e027a6f1afa8d44f84
* fixed XES line-by-line importer for booleans attributes
* f6542cd12413f073eb51173804f68502e3026f46
* fixes XES line-by-line deserialization
* 363580b757c027ff583d33dcff83e00b3be97659
* fixed issues with Pandas dataframe's index usage in the library
* 58a763b4099b40c67f23a6eb45c621d1b9a9d324
* fixed OCEL default constructor to set default columns in the dataframes
* 8470f22047667d1d30415a08965af1015d66adbb
* fix division by zero error in alignment-based fitness (side case for empty trace/model combination)

### Removed

### Other

---

## pm4py 2.2.20.1 (2022.04.10)

### Added

### Changed
* 344fb7258df17ce0d4ffe7425b678943f6f2ff11
* Minor refactoring to management of inhibitor / reset arcs (importing)

### Deprecated

### Fixed
* ad2cba1d8f9487dbb03ec418643b329b30e80ee0
* Minor fixes to the retrieval of the parameters in several parts of the code
* 65e1f1b0bbd0747fe81eb049780874608a395d6e
* Fixed bug in eventually follows filter (simplified interface)
* 60cd060edeeaa17c8b5bdaba7bb1035fc385d514
* Fixed XES exporting when attribute value type is a Numpy type (numpy.int64, numpy.float64, numpy.datetime64)
* cd5e55e712697a28cbfe0182e96556531b520667
* Bug fix feature selection and extraction on Pandas dataframes

### Removed

### Other

---

## pm4py 2.2.20 (2022.04.01)

### Added

Expand Down Expand Up @@ -26,8 +86,9 @@

### Other

---

## pm4py 2.2.19.2
## pm4py 2.2.19.2 (2022.03.04)

### Added

Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
@@ -1,4 +1,4 @@
FROM python:3.9
FROM python:3.10

RUN apt-get update
RUN apt-get -y upgrade
Expand All @@ -13,7 +13,7 @@ RUN apt-get -y install libtool flex bison pkg-config g++ libssl-dev automake
RUN apt-get -y install libjemalloc-dev libboost-dev libboost-filesystem-dev libboost-system-dev libboost-regex-dev python3-dev autoconf flex bison cmake
RUN apt-get -y install libxml2-dev libxslt-dev libfreetype6-dev libsuitesparse-dev
RUN pip install -U wheel six pytest
RUN pip install asttokens==2.0.5 backcall==0.2.0 colorama==0.4.4 cycler==0.11.0 decorator==5.1.1 deprecation==2.1.0 executing==0.8.3 fonttools==4.31.2 graphviz==0.19.1 intervaltree==3.1.0 ipython==8.2.0 jedi==0.18.1 jinja2==3.1.1 jsonpickle==2.1.0 kiwisolver==1.4.2 lxml==4.8.0 MarkupSafe==2.1.1 matplotlib==3.5.1 matplotlib-inline==0.1.3 mpmath==1.2.1 networkx==2.7.1 numpy==1.22.3 packaging==21.3 pandas==1.4.1 parso==0.8.3 pickleshare==0.7.5 pillow==9.0.1 prompt-toolkit==3.0.28 pure-eval==0.2.2 pydotplus==2.0.2 pygments==2.11.2 pyparsing==3.0.7 python-dateutil==2.8.2 pytz==2022.1 pyvis==0.1.9 scipy==1.8.0 setuptools==61.3.0 six==1.16.0 sortedcontainers==2.4.0 stack-data==0.2.0 stringdist==1.0.9 sympy==1.10.1 tqdm==4.63.1 traitlets==5.1.1 wcwidth==0.2.5
RUN pip install asttokens==2.0.5 backcall==0.2.0 colorama==0.4.4 cycler==0.11.0 decorator==5.1.1 deprecation==2.1.0 executing==0.8.3 fonttools==4.32.0 graphviz==0.19.2 intervaltree==3.1.0 ipython==8.2.0 jedi==0.18.1 jinja2==3.1.1 jsonpickle==2.1.0 kiwisolver==1.4.2 lxml==4.8.0 MarkupSafe==2.1.1 matplotlib==3.5.1 matplotlib-inline==0.1.3 mpmath==1.2.1 networkx==2.8 numpy==1.22.3 packaging==21.3 pandas==1.4.2 parso==0.8.3 pickleshare==0.7.5 pillow==9.1.0 prompt-toolkit==3.0.29 pure-eval==0.2.2 pydotplus==2.0.2 pygments==2.11.2 pyparsing==3.0.8 python-dateutil==2.8.2 pytz==2022.1 pyvis==0.1.9 scipy==1.8.0 setuptools==62.0.0 six==1.16.0 sortedcontainers==2.4.0 stack-data==0.2.0 stringdist==1.0.9 sympy==1.10.1 tqdm==4.64.0 traitlets==5.1.1 wcwidth==0.2.5

COPY . /app
RUN cd /app && python setup.py install
2 changes: 1 addition & 1 deletion docs/source/conf.py
Expand Up @@ -26,7 +26,7 @@
# The short X.Y version
version = '2.2'
# The full version, including alpha/beta/rc tags
release = '2.2.19.3'
release = '2.2.21'

# -- General configuration ---------------------------------------------------

Expand Down
6 changes: 2 additions & 4 deletions examples/inhibitor_reset_arcs.py
Expand Up @@ -36,11 +36,9 @@ def execute_script():
add_arc_from_to(p2, trans_C, net)
add_arc_from_to(trans_C, sink, net)
add_arc_from_to(trans_inhibitor, p_inhibitor, net)
inhibitor_arc = add_arc_from_to(p_inhibitor, trans_B, net)
inhibitor_arc.properties["arctype"] = "inhibitor"
inhibitor_arc = add_arc_from_to(p_inhibitor, trans_B, net, type="inhibitor")
add_arc_from_to(trans_free, p_reset, net)
reset_arc = add_arc_from_to(p_reset, trans_C, net)
reset_arc.properties["arctype"] = "reset"
reset_arc = add_arc_from_to(p_reset, trans_C, net, type="reset")
im = Marking({source: 1})
fm = Marking({sink: 1})
pm4py.view_petri_net(net, im, fm, format="svg")
Expand Down
4 changes: 0 additions & 4 deletions pm4py/__init__.py
Expand Up @@ -64,7 +64,3 @@
from pm4py.ocel import ocel_get_object_types, ocel_get_attribute_names, ocel_flattening, ocel_object_type_activities, ocel_objects_ot_count

time.clock = time.process_time

# this package is available only for Python >= 3.5
if sys.version_info >= (3, 5):
from pm4py import streaming
11 changes: 7 additions & 4 deletions pm4py/algo/conformance/alignments/petri_net/algorithm.py
Expand Up @@ -28,8 +28,6 @@
import sys
from pm4py.util.constants import PARAMETER_CONSTANT_ACTIVITY_KEY, PARAMETER_CONSTANT_CASEID_KEY
import pkgutil
from concurrent.futures import ProcessPoolExecutor
import multiprocessing
from typing import Optional, Dict, Any, Union, Tuple
from pm4py.objects.log.obj import EventLog, EventStream, Trace
from pm4py.objects.petri_net.obj import PetriNet, Marking
Expand Down Expand Up @@ -135,8 +133,9 @@ def apply_trace(trace, petri_net, initial_marking, final_marking, parameters=Non

ltrace_bwc = trace_cost_function_sum + best_worst_cost

fitness = 1 - (ali['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST) / (
ltrace_bwc // align_utils.STD_MODEL_LOG_MOVE_COST) if ltrace_bwc > 0 else 0
fitness_num = ali['cost'] // align_utils.STD_MODEL_LOG_MOVE_COST
fitness_den = ltrace_bwc // align_utils.STD_MODEL_LOG_MOVE_COST
fitness = 1 - fitness_num / fitness_den if fitness_den > 0 else 0

# other possibility: avoid integer division but proceed to rounding.
# could lead to small differences with respect to the adopted-since-now fitness
Expand Down Expand Up @@ -244,13 +243,17 @@ def apply_multiprocessing(log, petri_net, initial_marking, final_marking, parame
if parameters is None:
parameters = {}

import multiprocessing

num_cores = exec_utils.get_param_value(Parameters.CORES, parameters, multiprocessing.cpu_count() - 2)

best_worst_cost = __get_best_worst_cost(petri_net, initial_marking, final_marking, variant, parameters)
variants_idxs, one_tr_per_var = __get_variants_structure(log, parameters)
parameters[Parameters.BEST_WORST_COST_INTERNAL] = best_worst_cost

all_alignments = []

from concurrent.futures import ProcessPoolExecutor
with ProcessPoolExecutor(max_workers=num_cores) as executor:
futures = []
for trace in one_tr_per_var:
Expand Down
Expand Up @@ -16,9 +16,7 @@
'''
import copy
import heapq
import multiprocessing
import pkgutil
from concurrent.futures import ProcessPoolExecutor
from enum import Enum
from typing import List, Any, Optional

Expand Down Expand Up @@ -303,6 +301,8 @@ def apply_multiprocessing(obj: Union[EventLog, Trace], pt: ProcessTree, paramete
if parameters is None:
parameters = {}

import multiprocessing

leaves = frozenset(pt_util.get_leaves_as_tuples(pt))
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
num_cores = exec_utils.get_param_value(Parameters.CORES, parameters, multiprocessing.cpu_count() - 2)
Expand All @@ -311,6 +311,8 @@ def apply_multiprocessing(obj: Union[EventLog, Trace], pt: ProcessTree, paramete
variant = tuple(x[activity_key] for x in obj)
return align_variant(variant, leaves, pt)
else:
from concurrent.futures import ProcessPoolExecutor

with ProcessPoolExecutor(max_workers=num_cores) as executor:
ret = []
best_worst_cost = align_variant([], leaves, pt)["cost"]
Expand Down
4 changes: 2 additions & 2 deletions pm4py/algo/discovery/batches/variants/pandas.py
Expand Up @@ -79,10 +79,10 @@ def apply(log: pd.DataFrame, parameters: Optional[Dict[Union[str, Parameters], A

activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes_constants.DEFAULT_RESOURCE_KEY)
start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters,
xes_constants.DEFAULT_TIMESTAMP_KEY)
timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters,
xes_constants.DEFAULT_TIMESTAMP_KEY)
start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters,
timestamp_key)
case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)

log = log[list({activity_key, resource_key, start_timestamp_key, timestamp_key, case_id_key})]
Expand Down
Expand Up @@ -25,8 +25,6 @@
from pm4py.util import exec_utils
from pm4py.util import xes_constants
import pkgutil
from concurrent.futures import ProcessPoolExecutor
import multiprocessing
from enum import Enum
from pm4py.util import constants
from typing import Optional, Dict, Any, Union, Tuple
Expand Down Expand Up @@ -208,6 +206,7 @@ def align_fake_log_stop_marking(fake_log, net, marking, final_marking, parameter
"""
if parameters is None:
parameters = {}

show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
multiprocessing = exec_utils.get_param_value(Parameters.MULTIPROCESSING, parameters, False)

Expand Down Expand Up @@ -264,6 +263,9 @@ def __align_log_with_multiprocessing_stop_marking(fake_log, net, marking, final_
if parameters is not None:
parameters = {}

import multiprocessing
from concurrent.futures import ProcessPoolExecutor

num_cores = exec_utils.get_param_value(Parameters.CORES, parameters, multiprocessing.cpu_count() - 2)
align_intermediate_result = []
with ProcessPoolExecutor(max_workers=num_cores) as executor:
Expand Down
Expand Up @@ -27,6 +27,7 @@
class Parameters(Enum):
ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
RESOURCE_KEY = constants.PARAMETER_CONSTANT_RESOURCE_KEY
CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
BETA = "beta"


Expand Down Expand Up @@ -59,11 +60,13 @@ def apply(log: pd.DataFrame, parameters: Optional[Dict[Union[str, Parameters], A

resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes.DEFAULT_RESOURCE_KEY)
beta = exec_utils.get_param_value(Parameters.BETA, parameters, 0)
case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)

parameters_variants = {case_statistics.Parameters.ACTIVITY_KEY: resource_key,
case_statistics.Parameters.ATTRIBUTE_KEY: resource_key}
case_statistics.Parameters.ATTRIBUTE_KEY: resource_key,
case_statistics.Parameters.CASE_ID_KEY: case_id_key}

variants_occ = {x["variant"]: x["case:concept:name"] for x in
variants_occ = {x["variant"]: x[case_id_key] for x in
case_statistics.get_variant_statistics(log, parameters=parameters_variants)}
variants_resources = list(variants_occ.keys())
resources = [variants_util.get_activities_from_variant(y) for y in variants_resources]
Expand Down
Expand Up @@ -27,6 +27,7 @@
class Parameters(Enum):
ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
RESOURCE_KEY = constants.PARAMETER_CONSTANT_RESOURCE_KEY
CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
N = "n"


Expand Down Expand Up @@ -58,10 +59,12 @@ def apply(log: pd.DataFrame, parameters: Optional[Dict[Union[str, Parameters], A

resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes.DEFAULT_RESOURCE_KEY)
n = exec_utils.get_param_value(Parameters.N, parameters, 2)
case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)

parameters_variants = {case_statistics.Parameters.ACTIVITY_KEY: resource_key,
case_statistics.Parameters.ATTRIBUTE_KEY: resource_key}
variants_occ = {x["variant"]: x["case:concept:name"] for x in
case_statistics.Parameters.ATTRIBUTE_KEY: resource_key,
case_statistics.Parameters.CASE_ID_KEY: case_id_key}
variants_occ = {x["variant"]: x[case_id_key] for x in
case_statistics.get_variant_statistics(log, parameters=parameters_variants)}
variants_resources = list(variants_occ.keys())
resources = [variants_util.get_activities_from_variant(y) for y in variants_resources]
Expand Down
Expand Up @@ -28,6 +28,7 @@
class Parameters(Enum):
ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
RESOURCE_KEY = constants.PARAMETER_CONSTANT_RESOURCE_KEY
CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
METRIC_NORMALIZATION = "metric_normalization"


Expand Down Expand Up @@ -56,10 +57,12 @@ def apply(log: pd.DataFrame, parameters: Optional[Dict[Union[str, Parameters], A
from pm4py.statistics.traces.generic.pandas import case_statistics

resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes.DEFAULT_RESOURCE_KEY)
case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)

parameters_variants = {case_statistics.Parameters.ACTIVITY_KEY: resource_key,
case_statistics.Parameters.ATTRIBUTE_KEY: resource_key}
variants_occ = {x["variant"]: x["case:concept:name"] for x in
case_statistics.Parameters.ATTRIBUTE_KEY: resource_key,
case_statistics.Parameters.CASE_ID_KEY: case_id_key}
variants_occ = {x["variant"]: x[case_id_key] for x in
case_statistics.get_variant_statistics(log, parameters=parameters_variants)}
variants_resources = list(variants_occ.keys())
resources = [variants_util.get_activities_from_variant(y) for y in variants_resources]
Expand Down
1 change: 0 additions & 1 deletion pm4py/filtering.py
Expand Up @@ -392,7 +392,6 @@ def filter_eventually_follows_relation(log: Union[EventLog, pd.DataFrame], relat
cases = cases.intersection(this_traces)
return log[log[constants.CASE_CONCEPT_NAME].isin(cases)]
else:
from pm4py.objects.log.obj import EventLog
from pm4py.algo.filtering.log.ltl import ltl_checker
parameters[ltl_checker.Parameters.POSITIVE] = retain
if retain:
Expand Down
2 changes: 1 addition & 1 deletion pm4py/meta.py
Expand Up @@ -15,7 +15,7 @@
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
'''
__name__ = 'pm4py'
VERSION = '2.2.20'
VERSION = '2.2.21'
__version__ = VERSION
__doc__ = 'Process Mining for Python (PM4Py)'
__author__ = 'Fraunhofer Institute for Applied Technology'
Expand Down
7 changes: 6 additions & 1 deletion pm4py/objects/bpmn/layout/variants/graphviz.py
Expand Up @@ -124,6 +124,7 @@ def apply(bpmn_graph, parameters=None):
points = node.split("points=\"")[1].split("\"")[0]
nodes_pos[inv_nodes_dict[this_id]] = points

endpoints_wh = exec_utils.get_param_value(Parameters.TASK_WH, parameters, 30)
task_wh = exec_utils.get_param_value(Parameters.TASK_WH, parameters, 60)

# add node positions to BPMN nodes
Expand All @@ -134,12 +135,16 @@ def apply(bpmn_graph, parameters=None):
pos_y = float(node_pos[1])
n.set_x(pos_x)
n.set_y(pos_y)
n.set_height(task_wh)
if isinstance(n, BPMN.Task):
this_width = min(round(2 * task_wh), round(2 * (len(n.get_name()) + 7) * task_wh / 22.0))
n.set_width(this_width)
n.set_height(task_wh)
elif isinstance(n, BPMN.StartEvent) or isinstance(n, BPMN.EndEvent):
n.set_width(endpoints_wh)
n.set_height(endpoints_wh)
else:
n.set_width(task_wh)
n.set_height(task_wh)

max_x = max(1, max(abs(node.get_x()) for node in nodes))
max_y = max(1, max(abs(node.get_y()) for node in nodes))
Expand Down
5 changes: 4 additions & 1 deletion pm4py/objects/log/exporter/xes/variants/etree_xes_exp.py
Expand Up @@ -48,7 +48,10 @@ class Parameters(Enum):
"datetime": xes_util.TAG_DATE,
"Timestamp": xes_util.TAG_DATE,
"bool": xes_util.TAG_BOOLEAN,
"dict": xes_util.TAG_LIST
"dict": xes_util.TAG_LIST,
"numpy.int64": xes_util.TAG_INT,
"numpy.float64": xes_util.TAG_FLOAT,
"numpy.datetime64": xes_util.TAG_DATE
}
# if a type is not found in the previous list, then default to string
__DEFAULT_TYPE = xes_util.TAG_STRING
Expand Down
5 changes: 4 additions & 1 deletion pm4py/objects/log/exporter/xes/variants/line_by_line.py
Expand Up @@ -38,7 +38,10 @@ class Parameters(Enum):
"datetime": xes_util.TAG_DATE,
"Timestamp": xes_util.TAG_DATE,
"bool": xes_util.TAG_BOOLEAN,
"dict": xes_util.TAG_LIST
"dict": xes_util.TAG_LIST,
"numpy.int64": xes_util.TAG_INT,
"numpy.float64": xes_util.TAG_FLOAT,
"numpy.datetime64": xes_util.TAG_DATE
}
# if a type is not found in the previous list, then default to string
__DEFAULT_TYPE = xes_util.TAG_STRING
Expand Down

0 comments on commit f230c29

Please sign in to comment.