In [1]:
import numpy as np
import pandas as pd
import streamlit as st
import plotly.express as px
from pycelonis import get_celonis
from pycelonis.celonis_api.pql.pql import PQL, PQLColumn, PQLFilter
from pyinsights import Connector
import itertools

In [2]:
celonis_url = "https://christian-fiedler1-rwth-aachen-de.training.celonis.cloud/"
api_token = "MzdhNWNlNDItOTJhNC00ZTE1LThlMGMtOTc4MGVmOWNjYjIyOjVTcW8wSlVmbFVkMG84bFZTRUw4bTJDZVNIazVZWlJsZWQ2bTUzbWtLSDJM"
connector = Connector(api_token=api_token, url=celonis_url, key_type="USER_KEY")

# choose data model
print("Available datamodels:")
print(connector.celonis.datamodels)
print("Input id of datamodel:")

connector.set_parameters(model_id="bf7dfa1e-5e86-470e-9f7c-5672e8b1637f")#, end_timestamp="END_DATE")

datamodel = connector.datamodel
activity_table = connector.activity_table()
case_col = connector.case_col()
act_col = connector.activity_col()
timestamp = connector.timestamp()
transition_mode = "ANY_OCCURRENCE[] TO ANY_OCCURRENCE[]"


2022-12-06 14:28:02.691 INFO    pycelonis.Celonis: Initial connect successful! Hello Christian Fiedler. PyCelonis Version: 1.7.3


Available datamodels:
[
<Datamodel, id 32b0abb8-bbcf-4700-8123-d11443e57bdd, name deviation_test_small>,
<Datamodel, id 376145f1-790d-4deb-8e20-083a4dfd7ca7, name bac>,
<Datamodel, id 5bafe966-eae5-455d-8435-1a3f51f71179, name sepsis-cases>,
<Datamodel, id 799586e2-4d1f-4e5d-85ff-2bb17a491d1b, name bpi-12>,
<Datamodel, id 99e62af6-ce51-445d-a1e6-634aaeafff11, name waiting_test_small>,
<Datamodel, id be94a068-0970-4400-b2a1-c604b174503e, name bpi-challenge-2017>,
<Datamodel, id 01184974-3604-49fc-b410-ad88143f9802, name mlapm-bac-sampled>,
<Datamodel, id bf7dfa1e-5e86-470e-9f7c-5672e8b1637f, name mlapm-running-example>,
]
Input id of datamodel:


In [3]:
query = PQL()
query.add(PQLColumn(name=case_col,
                        query=f"""DISTINCT "{activity_table}"."{case_col}"  """))
query.add(PQLColumn(name=act_col,
                        query=f""" "{activity_table}"."{act_col}"  """))
query.add(PQLColumn(
        name="max nr", query=f"""
        PU_MAX( DOMAIN_TABLE("{activity_table}"."{case_col}", "{activity_table}"."{act_col}"),
        ACTIVATION_COUNT ( "{activity_table}"."{act_col}" ) ) """))
query += PQLFilter(f"""
    PU_MAX( DOMAIN_TABLE("{activity_table}"."{case_col}", "{activity_table}"."{act_col}"),
        ACTIVATION_COUNT ( "{activity_table}"."{act_col}" ) ) >= 1
    """)
df = datamodel.get_data_frame(query)
print(df.to_string())


2022-12-06 14:28:05.486 INFO    pycelonis.ComputeNode: PQL Export started...
2022-12-06 14:28:06.599 INFO    pycelonis.ComputeNode: PQL Export status: DONE


    CASE:CONCEPT:NAME            ACTIVITY  max nr
0                   1        check ticket       1
1                   1              decide       1
2                   1  examine thoroughly       1
3                   1    register request       1
4                   1      reject request       1
5                   2        check ticket       1
6                   2              decide       1
7                   2    examine casually       1
8                   2    pay compensation       1
9                   2    register request       1
10                  3        check ticket       2
11                  3              decide       2
12                  3    examine casually       1
13                  3  examine thoroughly       1
14                  3    pay compensation       1
15                  3    register request       1
16                  3  reinitiate request       1
17                  4        check ticket       1
18                  4              decide       1


In [4]:
df_toy = pd.DataFrame({case_col: [1,1,2], act_col: ['a','b','a'], "max nr":[1,1,1]})
grouped = df_toy.groupby(by=[case_col, "max nr"], axis=0)
act_list = df_toy[act_col].unique()
pairs = itertools.combinations(act_list,2)
print(list(pairs))
print(grouped.groups)

[('a', 'b')]
{(1, 1): [0, 1], (2, 1): [2]}


In [5]:
singletons = []
relation = []
groups = grouped.groups
for cases in grouped.groups.values():
    if len(cases) < 2:
        singletons.append(df_toy.loc[cases, act_col].unique()[0])

    else:
        activities = df_toy.loc[cases, act_col]
        activities.drop(labels=singletons)
        pairs = itertools.combinations(activities,2)
        for pair in pairs:
            relation.append(pair)

print(singletons)
print(relation)


['a']
[('a', 'b')]


In [19]:
grouped2 = df.groupby(by=[act_col], axis=0)
groups = grouped2.groups
print(groups)
d2 = {k: df.loc[v, [case_col, "max nr"]] for k, v in groups.items()}
print(d2)
combs = itertools.permutations(d2.keys(),2)
list2 = set()
for pair in combs:

    if len(d2[pair[0]].merge(d2[pair[1]])) == len(d2[pair[0]]):
        if tuple((pair[1], pair[0])) not in list2:
            list2.add(pair)

print(set(list2))



{'check ticket': [0, 5, 10, 17, 22, 28], 'decide': [1, 6, 11, 18, 23, 29], 'examine casually': [7, 12, 24, 30], 'examine thoroughly': [2, 13, 19], 'pay compensation': [8, 14, 31], 'register request': [3, 9, 15, 20, 25, 32], 'reinitiate request': [16, 26], 'reject request': [4, 21, 27]}
{'check ticket':     CASE:CONCEPT:NAME  max nr
0                   1       1
5                   2       1
10                  3       2
17                  4       1
22                  5       3
28                  6       1, 'decide':     CASE:CONCEPT:NAME  max nr
1                   1       1
6                   2       1
11                  3       2
18                  4       1
23                  5       3
29                  6       1, 'examine casually':     CASE:CONCEPT:NAME  max nr
7                   2       1
12                  3       1
24                  5       3
30                  6       1, 'examine thoroughly':     CASE:CONCEPT:NAME  max nr
2                   1       1
13         

In [11]:
from pm4py.algo.discovery.log_skeleton import algorithm as lsk_discovery
import pm4py
log = pm4py.read_xes("../../event-logs/running-example.xes")

skeleton = lsk_discovery.apply(log, parameters={lsk_discovery.Variants.CLASSIC.value.Parameters.NOISE_THRESHOLD: 0.0})
skeleton


ModuleNotFoundError: No module named 'pm4py.algo'; 'pm4py' is not a package