In [1]:
import itertools

import numpy as np
import pandas as pd
import streamlit as st
import plotly.express as px
from pycelonis import get_celonis
from pycelonis.celonis_api.pql.pql import PQL, PQLColumn, PQLFilter
from pyinsights import Connector
from pyinsights.organisational_profiling import ResourceProfiler
from pyinsights.temporal_profiling import TemporalProfiler
from pyinsights.conformance import alignment_scores
from pm4py.algo.discovery.temporal_profile import algorithm as temporal_profile_discovery
from pm4py.algo.conformance.temporal_profile import algorithm as temporal_profile_conformance
import itertools

In [63]:
celonis_url = "https://christian-fiedler1-rwth-aachen-de.training.celonis.cloud/"
api_token = "MzdhNWNlNDItOTJhNC00ZTE1LThlMGMtOTc4MGVmOWNjYjIyOjVTcW8wSlVmbFVkMG84bFZTRUw4bTJDZVNIazVZWlJsZWQ2bTUzbWtLSDJM"
connector = Connector(api_token=api_token, url=celonis_url, key_type="USER_KEY")

# choose data model
print("Available datamodels:")
print(connector.celonis.datamodels)
print("Input id of datamodel:")

connector.set_parameters(model_id="bf7dfa1e-5e86-470e-9f7c-5672e8b1637f")#, end_timestamp="END_DATE")

datamodel = connector.datamodel
activity_table = connector.activity_table()
case_col = connector.case_col()
act_col = connector.activity_col()
timestamp = connector.timestamp()
transition_mode = "ANY_OCCURRENCE[] TO ANY_OCCURRENCE[]"


2022-12-06 12:28:44.406 INFO    pycelonis.Celonis: Initial connect successful! Hello Christian Fiedler. PyCelonis Version: 1.7.3


Available datamodels:
[
<Datamodel, id 32b0abb8-bbcf-4700-8123-d11443e57bdd, name deviation_test_small>,
<Datamodel, id 376145f1-790d-4deb-8e20-083a4dfd7ca7, name bac>,
<Datamodel, id 5bafe966-eae5-455d-8435-1a3f51f71179, name sepsis-cases>,
<Datamodel, id 799586e2-4d1f-4e5d-85ff-2bb17a491d1b, name bpi-12>,
<Datamodel, id 99e62af6-ce51-445d-a1e6-634aaeafff11, name waiting_test_small>,
<Datamodel, id be94a068-0970-4400-b2a1-c604b174503e, name bpi-challenge-2017>,
<Datamodel, id 01184974-3604-49fc-b410-ad88143f9802, name mlapm-bac-sampled>,
<Datamodel, id bf7dfa1e-5e86-470e-9f7c-5672e8b1637f, name mlapm-running-example>,
]
Input id of datamodel:


In [65]:
query = PQL()
query.add(PQLColumn(name=case_col,
                        query=f"""DISTINCT "{activity_table}"."{case_col}"  """))
query.add(PQLColumn(name=act_col,
                        query=f""" "{activity_table}"."{act_col}"  """))
query.add(PQLColumn(
        name="max nr", query=f"""
        PU_MAX( DOMAIN_TABLE("{activity_table}"."{case_col}", "{activity_table}"."{act_col}"),
        ACTIVATION_COUNT ( "{activity_table}"."{act_col}" ) ) """))
query += PQLFilter(f"""
    PU_MAX( DOMAIN_TABLE("{activity_table}"."{case_col}", "{activity_table}"."{act_col}"),
        ACTIVATION_COUNT ( "{activity_table}"."{act_col}" ) ) >= 1
    """)
df = datamodel.get_data_frame(query)
print(df.to_string())


2022-12-06 12:29:24.346 INFO    pycelonis.ComputeNode: PQL Export started...
2022-12-06 12:29:24.490 INFO    pycelonis.ComputeNode: PQL Export status: DONE


    CASE:CONCEPT:NAME            ACTIVITY  max nr
0                   1        check ticket       1
1                   1              decide       1
2                   1  examine thoroughly       1
3                   1    register request       1
4                   1      reject request       1
5                   2        check ticket       1
6                   2              decide       1
7                   2    examine casually       1
8                   2    pay compensation       1
9                   2    register request       1
10                  3        check ticket       2
11                  3              decide       2
12                  3    examine casually       1
13                  3  examine thoroughly       1
14                  3    pay compensation       1
15                  3    register request       1
16                  3  reinitiate request       1
17                  4        check ticket       1
18                  4              decide       1


In [20]:
df_toy = pd.DataFrame({case_col: [1,1,2], act_col: ['a','b','a'], "max nr":[1,1,1]})
grouped = df_toy.groupby(by=[case_col, "max nr"], axis=0)
act_list = df_toy[act_col].unique()
pairs = itertools.combinations(act_list,2)
print(list(pairs))
print(grouped.groups)

[('a', 'b')]
{(1, 1): [0, 1], (2, 1): [2]}


In [54]:
singletons = []
relation = []
groups = grouped.groups
for cases in grouped.groups.values():
    if len(cases) < 2:
        singletons.append(df_toy.loc[cases, act_col].unique()[0])

    else:
        activities = df_toy.loc[cases, act_col]
        activities.drop(labels=singletons)
        pairs = itertools.combinations(activities,2)
        for pair in pairs:
            relation.append(pair)

print(singletons)
print(relation)


['a']
[('a', 'b')]


In [93]:
grouped2 = df.groupby(by=[case_col, "max nr"], axis=0)

groups = grouped2.groups


{(1, 1): {'register request', 'reject request', 'decide', 'check ticket', 'examine thoroughly'}, (2, 1): {'examine casually', 'register request', 'pay compensation', 'decide', 'check ticket'}, (3, 1): {'examine casually', 'register request', 'pay compensation', 'reinitiate request', 'examine thoroughly'}, (3, 2): {'decide', 'check ticket'}, (4, 1): {'register request', 'reject request', 'decide', 'check ticket', 'examine thoroughly'}, (5, 1): {'register request', 'reject request'}, (5, 2): {'reinitiate request'}, (5, 3): {'examine casually', 'decide', 'check ticket'}, (6, 1): {'examine casually', 'register request', 'pay compensation', 'decide', 'check ticket'}}
{'reject request', 'decide', 'register request', 'check ticket', 'examine thoroughly'}
{'reject request', 'examine thoroughly'}
{'reject request'}
{'reject request'}
set()
set()
set()
set()
set()
set()
set()


In [83]:
from pm4py.algo.discovery.log_skeleton import algorithm as lsk_discovery
import pm4py
log = pm4py.read_xes("../../event-logs/running-example.xes")

skeleton = lsk_discovery.apply(log, parameters={lsk_discovery.Variants.CLASSIC.value.Parameters.NOISE_THRESHOLD: 0.0})
skeleton


parsing log, completed traces ::   0%|          | 0/6 [00:00<?, ?it/s]

{'equivalence': {('check ticket', 'decide'),
  ('decide', 'check ticket'),
  ('examine thoroughly', 'register request'),
  ('pay compensation', 'examine casually'),
  ('pay compensation', 'register request'),
  ('reject request', 'register request')},
 'always_after': {('check ticket', 'decide'),
  ('examine casually', 'check ticket'),
  ('examine casually', 'decide'),
  ('examine thoroughly', 'decide'),
  ('register request', 'check ticket'),
  ('register request', 'decide'),
  ('register request', 'examine casually'),
  ('reinitiate request', 'check ticket'),
  ('reinitiate request', 'decide'),
  ('reinitiate request', 'examine casually')},
 'always_before': {('check ticket', 'register request'),
  ('decide', 'check ticket'),
  ('decide', 'examine casually'),
  ('decide', 'register request'),
  ('examine casually', 'register request'),
  ('examine thoroughly', 'register request'),
  ('pay compensation', 'check ticket'),
  ('pay compensation', 'decide'),
  ('pay compensation', 'examin