# Attribute importance ranking for synthetic data with known change points
The process change points are known. Hence, no change point detection algorithm needs to be used.

In [1]:
import pm4py
import helper
import pandas as pd

In [2]:
# data_file_path = 'data/synthetic/maardji et al 2013_xes_attributes/logs/ROI/ROI2.5k.xes'
data_file_path = 'data/synthetic/maardji et al 2013_xes_attributes/logs/cb/cb10k.xes'

## Load an event log

In [3]:
# Display the data info
data_info = helper.get_data_information(data_file_path)
data_info

change_points = data_info['change_points']

In [4]:
# load the event log into pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
event_log = xes_importer.apply(data_file_path)

  from .autonotebook import tqdm as notebook_tqdm
parsing log, completed traces :: 100%|█████████████████████████████████████████| 10000/10000 [00:08<00:00, 1129.45it/s]


In [5]:
from concept_drift import framework
from concept_drift import drift_point_detectors
from concept_drift import window_creators
from concept_drift import importance_measurers

In [6]:
drift_point_detector = drift_point_detectors.DriftPointDetectorTrueKnown(change_points)

In [7]:
window_size = 300
window_creator = window_creators.WindowCreator(window_size=window_size)

In [8]:
# threshold = 0.03
# difference_measure = importance_measurers.DifferenceMeasureChiSquare(threshold)

In [9]:
difference_measure = importance_measurers.DifferenceMeasureHellinger()

In [10]:
start_before_change_point = window_size + 100
attribute_importance_measurer = importance_measurers.AttributeImportanceMeasurer(window_creator, start_before_change_point, difference_measure)

In [11]:
concept_drift_explainer = framework.ConceptDriftExplainer(drift_point_detector, attribute_importance_measurer)

In [12]:
attribute_importance_per_changepoint = concept_drift_explainer.get_attribute_importance_per_changepoint(event_log)
attribute_importance_per_changepoint

{1000: concept:name              1.000000
 relevant_attribute_1      0.109996
 relevant_attribute_2      0.046328
 relevant_attribute_3      0.042032
 irrelevant_attribute_1    0.034365
 irrelevant_attribute_2    0.041559
 irrelevant_attribute_3    0.040423
 dtype: float64,
 2000: concept:name              1.000000
 relevant_attribute_1      0.044729
 relevant_attribute_2      0.198724
 relevant_attribute_3      0.026353
 irrelevant_attribute_1    0.062430
 irrelevant_attribute_2    0.026691
 irrelevant_attribute_3    0.042188
 dtype: float64,
 3000: concept:name              1.000000
 relevant_attribute_1      0.051015
 relevant_attribute_2      0.037972
 relevant_attribute_3      0.412242
 irrelevant_attribute_1    0.038421
 irrelevant_attribute_2    0.051659
 irrelevant_attribute_3    0.042854
 dtype: float64,
 4000: concept:name              1.000000
 relevant_attribute_1      0.019136
 relevant_attribute_2      0.039838
 relevant_attribute_3      0.095872
 irrelevant_attribute_1  

In [13]:
concept_drift_explainer.get_attribute_importance(event_log)

concept:name              9.000000
relevant_attribute_3      0.778594
relevant_attribute_2      0.529606
irrelevant_attribute_1    0.425065
relevant_attribute_1      0.417058
irrelevant_attribute_3    0.352276
irrelevant_attribute_2    0.333302
dtype: float64