# Attribute importance ranking for synthetic data with known change points
The process change points are known. Hence, no change point detection algorithm needs to be used.

In [1]:
import pm4py
import helper
import pandas as pd

In [2]:
# data_file_path = 'data/synthetic/maardji et al 2013_xes_attributes/logs/ROI/ROI2.5k.xes'
# data_file_path = 'data/synthetic/maardji et al 2013_xes_attributes/logs/cb/cb10k.xes'
data_file_path = 'data/synthetic/generated/new_attribute_values/2500_sudden_5_00.xes'

## Load an event log

In [3]:
# Display the data info
data_info = helper.get_data_information(data_file_path)
data_info

change_points = data_info['change_points']

In [4]:
# load the event log into pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
event_log = xes_importer.apply(data_file_path)

  from .autonotebook import tqdm as notebook_tqdm
parsing log, completed traces :: 100%|██████████| 5000/5000 [00:05<00:00, 996.56it/s] 


In [5]:
from processdrift.framework import drift_detection
from processdrift.framework import drift_explanation
from processdrift.framework import feature_extraction
from processdrift.framework import pop_comparison
from processdrift.framework import windowing

In [6]:
primary_process_drift_detector = drift_detection.DriftDetectorTrueKnown(change_points)

In [7]:
# build the secondary process drift detectors
secondary_window_generator = windowing.AdaptiveWindowGenerator(initial_window_size=100)
secondary_population_comparer = pop_comparison.HellingerDistanceComparer()
secondary_threshold = 0.7
secondary_exclude_attributes = ['concept:name']
secondary_drift_detectors = drift_detection.get_all_attribute_drift_detectors(event_log,
                                                                         secondary_window_generator, 
                                                                         secondary_population_comparer, 
                                                                         threshold=secondary_threshold,
                                                                         exclude_attributes=secondary_exclude_attributes)
# only select one secondary drift detector for debugging
secondary_drift_detectors = secondary_drift_detectors                                                      

In [8]:
drift_explainer = drift_explanation.DriftExplainer(primary_process_drift_detector, secondary_drift_detectors)

In [9]:
observed_change_point_explanations = drift_explainer.attribute_importance_per_primary_change_point(event_log, max_distance=100)
observed_change_point_explanations

{250: [{'detector': 'relevant_attribute_1',
   'change_point': 223,
   'distance': -27}],
 500: [{'detector': 'relevant_attribute_2',
   'change_point': 438,
   'distance': -62}],
 750: [{'detector': 'relevant_attribute_3',
   'change_point': 724,
   'distance': -26}],
 1000: [{'detector': 'relevant_attribute_4',
   'change_point': 981,
   'distance': -19},
  {'detector': 'relevant_attribute_4', 'change_point': 972, 'distance': -28}],
 1250: [{'detector': 'relevant_attribute_5',
   'change_point': 1241,
   'distance': -9}],
 1500: [],
 1750: [],
 2000: [],
 2250: []}

Evaluate the explanations

In [10]:
from processdrift.framework import evaluation

In [11]:
observed_change_point_explanations
display(observed_change_point_explanations)
true_change_point_explanations = data_info['change_point_explanations']
display(true_change_point_explanations)

{250: [{'detector': 'relevant_attribute_1',
   'change_point': 223,
   'distance': -27}],
 500: [{'detector': 'relevant_attribute_2',
   'change_point': 438,
   'distance': -62}],
 750: [{'detector': 'relevant_attribute_3',
   'change_point': 724,
   'distance': -26}],
 1000: [{'detector': 'relevant_attribute_4',
   'change_point': 981,
   'distance': -19},
  {'detector': 'relevant_attribute_4', 'change_point': 972, 'distance': -28}],
 1250: [{'detector': 'relevant_attribute_5',
   'change_point': 1241,
   'distance': -9}],
 1500: [],
 1750: [],
 2000: [],
 2250: []}

[{'attribute_name': 'relevant_attribute_1',
  'base_distribution': [0.15507678534396094,
   0.4605471280640772,
   0.3843760865919619],
  'explain_change_point': 250,
  'change_point': 250,
  'drift_type': 'sudden'},
 {'attribute_name': 'relevant_attribute_2',
  'base_distribution': [0.16487550177139298,
   0.7426848905124938,
   0.09243960771611343],
  'explain_change_point': 500,
  'change_point': 500,
  'drift_type': 'sudden'},
 {'attribute_name': 'relevant_attribute_3',
  'base_distribution': [0.7104320528086568,
   0.2323879020103484,
   0.05718004518099477],
  'explain_change_point': 750,
  'change_point': 750,
  'drift_type': 'sudden'},
 {'attribute_name': 'relevant_attribute_4',
  'base_distribution': [0.6376810784231896,
   0.32200858055524667,
   0.04031034102156378],
  'explain_change_point': 1000,
  'change_point': 1000,
  'drift_type': 'sudden'},
 {'attribute_name': 'relevant_attribute_5',
  'base_distribution': [0.37358488274714935, 0.6264151172528507, 0],
  'explain_chan

In [14]:
def get_simple_change_point_format_from_data_info(data_info):
    complex_cp_explanations = data_info['change_point_explanations']
    simple_cp_explanations = []
    for complex_cp_explanation in complex_cp_explanations:
        cp_location = complex_cp_explanation['change_point']
        attribute_change = complex_cp_explanation['attribute_name']
        simple_cp_explanations.append((cp_location, attribute_change))
    return simple_cp_explanations

In [20]:
def get_simple_change_point_list_from_explainer(change_point_explanations):
    # flatten explanations into single list
    change_point_explanations_list = sum(change_point_explanations.values(), [])

    # get cp tuples
    cp_tuple_list = [(cp_explanation['change_point'], cp_explanation['detector']) for cp_explanation in change_point_explanations_list]
    
    return cp_tuple_list

In [23]:
true_change_point_explanations = get_simple_change_point_format_from_data_info(data_info)
detected_change_point_explanations =  get_simple_change_point_list_from_explainer(observed_change_point_explanations)

evaluation.evaluate_explanations(true_change_point_explanations, detected_change_point_explanations, max_distance=40)

{'precision': 0.6666666666666666,
 'recall': 0.8,
 'f1_score': 0.7272727272727273,
 'mean_lag': 22.5,
 'all_lags': [27, 26, 28, 9]}