# Notebook for experimentation and testing

In [1]:
from pm4py.objects.log.importer.xes import importer as xes_importer

In [3]:
# load a log

# data settings
data_file_path = 'data\\synthetic\\maardji et al 2013_xes_attributes\\logs\\cb\\cb2.5k.xes'

# load event log
event_log = xes_importer.apply(data_file_path)

parsing log, completed traces :: 100%|██████████| 2500/2500 [00:02<00:00, 1078.20it/s]


Test the runs feature

In [14]:
from processdrift import features
import networkx as nx

def get_runs(traces):
    global_concurrency = features.get_concurrency(traces)
    concurrency_dict = features.get_concurrency_dict(global_concurrency)

    # build a list of runs
    runs = []

    # prior implementation of runs closer to Maaradji et al. 2017
    for trace in traces:
        # get all edges that result from the trace
        # if an edge is among the global concurrency set, add its 
        # abcd, acbd -> ab ac 
        for index_activity_1 in range(len(trace) - 1):
            index_activity_2 = index_activity_1
            activity_1 = trace[index_activity_1]
            activity_2 = trace[index_activity_2]

            for (activity_1, activity_2) in global_concurrency:
                pass

        trace_edges_precede = features.get_alpha_direct_relationships([trace], direction='precedes')
        trace_graph = nx.DiGraph(trace_edges_precede)

        # apply transitive closure (expands the graph)
        trace_transitive_closure_graph = nx.transitive_closure(trace_graph, reflexive=False)

        # remove the global concurrency relations
        trace_transitive_closure_graph.remove_edges_from(global_concurrency)

        # perform transitive reduction only if the graph is acyclic
        reduced_graph = trace_transitive_closure_graph

        try:
            reduced_graph = nx.transitive_reduction(trace_transitive_closure_graph)
        except nx.NetworkXError as e:
            # A networkx error is expected for cyclic graphs
            pass
        
        runs.append(str(sorted(list(reduced_graph.edges))))

    return runs

In [15]:
# two traces with a parallel behavior should result in the same run
example_traces = [['a', 'b', 'c', 'd'], ['a', 'c', 'b', 'd']]

runs = get_runs(example_traces)
set(runs)

{"[('a', 'b'), ('a', 'c'), ('b', 'd'), ('c', 'd')]"}

In [16]:
# two traces with a parallel behavior should result in the same run
example_traces = [['a', 'b', 'c', 'd'] * 2, ['a', 'c', 'b', 'd'] * 3]

runs = features.get_runs(example_traces)
len(set(runs))

1

In [29]:
features.get_concurrency(features._get_traces(event_log))

{('Appraise_property', 'Appraise_property'),
 ('Appraise_property', 'Assess_loan_risk'),
 ('Appraise_property', 'Check_credit_history'),
 ('Approve_application', 'Approve_application'),
 ('Assess_eligibility', 'Assess_eligibility'),
 ('Assess_loan_risk', 'Appraise_property'),
 ('Assess_loan_risk', 'Assess_loan_risk'),
 ('Cancel_application', 'Cancel_application'),
 ('Check__application__form_completeness',
  'Check__application__form_completeness'),
 ('Check_credit_history', 'Appraise_property'),
 ('Check_credit_history', 'Check_credit_history'),
 ('Check_if_home_insurance_quote_is_requested',
  'Check_if_home_insurance_quote_is_requested'),
 ('Loan__application_approved', 'Loan__application_approved'),
 ('Loan__application_canceled', 'Loan__application_canceled'),
 ('Loan__application_received', 'Loan__application_received'),
 ('Loan_application_rejected', 'Loan_application_rejected'),
 ('Prepare_acceptance_pack', 'Prepare_acceptance_pack'),
 ('Receive_updated_application', 'Receive_u

In [24]:
# build a more complex test case
# activities: a, b, c, d, e, f
# b sometimes occurs parallel to c
# e sometimes parallel with f
# should result in a single run 
example_traces = [['a', 'b', 'c', 'd', 'e', 'f'] * 2, 
    ['a', 'c', 'b', 'd', 'e', 'f']  * 3,
    ['a', 'c', 'b', 'd', 'f', 'e']  * 3,
    ['a', 'c', 'd', 'e', 'f']  * 3, 
    ['a', 'b', 'd', 'e', 'f']  * 3, 
    ['a', 'b', 'b', 'd', 'e', 'f'] ]
runs = features.get_runs(example_traces)
print(features.get_concurrency(example_traces))
print(set(runs))
print(len(set(runs)))

{('c', 'b'), ('e', 'f'), ('f', 'e'), ('b', 'b'), ('b', 'c')}
{"[('a', 'b'), ('a', 'c'), ('b', 'c'), ('b', 'd'), ('c', 'b'), ('c', 'd'), ('d', 'e'), ('d', 'f'), ('e', 'a'), ('e', 'f'), ('f', 'a'), ('f', 'e')]", "[('a', 'b'), ('a', 'c'), ('b', 'b'), ('b', 'c'), ('b', 'd'), ('c', 'b'), ('c', 'd'), ('d', 'e'), ('d', 'f'), ('e', 'f'), ('f', 'e')]", "[('a', 'b'), ('a', 'c'), ('b', 'd'), ('c', 'd'), ('d', 'e'), ('d', 'f'), ('e', 'a'), ('e', 'f'), ('f', 'a'), ('f', 'e')]"}
3


In [None]:
traces = features._get_traces(event_log)
runs = features.get_runs(traces)