# Attribute importance ranking for synthetic data with known change points
The process change points are known. Hence, no change point detection algorithm needs to be used.

In [1]:
import pm4py
import helper
import pandas as pd

In [2]:
# data_file_path = 'data/synthetic/maardji et al 2013_xes_attributes/logs/ROI/ROI2.5k.xes'
# data_file_path = 'data/synthetic/maardji et al 2013_xes_attributes/logs/cb/cb10k.xes'
data_file_path = 'data/synthetic/generated/new_attribute_values/2500_sudden_5_00.xes'

## Load an event log

In [3]:
# Display the data info
data_info = helper.get_data_information(data_file_path)
data_info

change_points = data_info['change_points']

In [4]:
# load the event log into pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
event_log = xes_importer.apply(data_file_path)

  from .autonotebook import tqdm as notebook_tqdm
parsing log, completed traces :: 100%|███████████████████████████████████████████| 2500/2500 [00:01<00:00, 1524.55it/s]


In [5]:
from concept_drift import framework
from concept_drift import drift_point_detectors
from concept_drift import window_creators
from concept_drift import importance_measurers
from concept_drift import feature_extractors

In [None]:
feature_extractor = feature_extractors.FeatureExtractor

In [6]:
drift_point_detector = drift_point_detectors.DriftPointDetectorTrueKnown(change_points)

In [8]:
window_size = 50
window_creator = window_creators.WindowCreator(window_size=window_size, slide_by=10)

In [9]:
# threshold = 0.03
# difference_measure = importance_measurers.DifferenceMeasureChiSquare(threshold)

In [10]:
difference_measure = importance_measurers.DifferenceMeasureHellinger()

In [11]:
start_before_change_point = window_size + 100
attribute_importance_measurer = importance_measurers.AttributeImportanceMeasurer(window_creator, start_before_change_point, difference_measure)

In [12]:
concept_drift_explainer = framework.ConceptDriftExplainer(drift_point_detector, attribute_importance_measurer)

In [13]:
attribute_importance_per_changepoint = concept_drift_explainer.get_attribute_importance_per_changepoint(event_log)
attribute_importance_per_changepoint

{250: concept:name                  None
 relevant_attribute_1      0.532692
 relevant_attribute_2      0.142622
 relevant_attribute_3      0.166103
 relevant_attribute_4      0.116633
 relevant_attribute_5      0.112231
 irrelevant_attribute_1    0.148712
 irrelevant_attribute_2    0.158225
 irrelevant_attribute_3    0.226481
 irrelevant_attribute_4    0.096482
 irrelevant_attribute_5    0.153222
 dtype: object,
 500: concept:name                  None
 relevant_attribute_1      0.307393
 relevant_attribute_2      0.351641
 relevant_attribute_3      0.150742
 relevant_attribute_4       0.14426
 relevant_attribute_5      0.094913
 irrelevant_attribute_1    0.100316
 irrelevant_attribute_2    0.173867
 irrelevant_attribute_3    0.292974
 irrelevant_attribute_4    0.166651
 irrelevant_attribute_5    0.197426
 dtype: object,
 750: concept:name                  None
 relevant_attribute_1      0.289806
 relevant_attribute_2      0.122003
 relevant_attribute_3      0.577314
 relevant_attribu

In [14]:
concept_drift_explainer.get_attribute_importance(event_log)

relevant_attribute_1      1.979416
relevant_attribute_3      1.818820
irrelevant_attribute_3    1.762033
relevant_attribute_4      1.680535
relevant_attribute_2      1.657604
irrelevant_attribute_5    1.604038
irrelevant_attribute_1    1.514850
relevant_attribute_5      1.479807
irrelevant_attribute_4    1.463294
irrelevant_attribute_2    1.454516
concept:name              0.000000
dtype: float64