# Attribute importance ranking for synthetic data with known change points
The process change points are known. Hence, no change point detection algorithm needs to be used.

In [1]:
import pm4py
import helper
import pandas as pd

In [2]:
# data_file_path = 'data/synthetic/maardji et al 2013_xes_attributes/logs/ROI/ROI2.5k.xes'
# data_file_path = 'data/synthetic/maardji et al 2013_xes_attributes/logs/cb/cb10k.xes'
data_file_path = 'data/synthetic/generated/new_attribute_values/2500_sudden_5_00.xes'

## Load an event log

In [3]:
# Display the data info
data_info = helper.get_data_information(data_file_path)
data_info

change_points = data_info['change_points']

In [4]:
# load the event log into pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
event_log = xes_importer.apply(data_file_path)

  from .autonotebook import tqdm as notebook_tqdm
parsing log, completed traces :: 100%|███████████████████████████████████████████| 2500/2500 [00:02<00:00, 1154.76it/s]


In [5]:
from concept_drift import framework

In [6]:
feature_extractor = framework.RelationalEntropyFeatureExtractor()

In [7]:
window_generator = framework.FixedSizeWindowGenerator(window_size=100, slide_by=10)

In [8]:
population_comparer = framework.KSTestPopulationComparer()

In [9]:
process_drift_detector = framework.DriftDetector(feature_extractor, window_generator, population_comparer, threshold=0.6)

In [10]:
# process_change_series = process_drift_detector.get_change_series(event_log)
# process_change_series.plot()

In [11]:
secondary_window_generator = window_generator
secondary_population_comparer = framework.HellingerDistanceComparer()
attribute_drift_detectors = framework.get_all_attribute_drift_detectors(event_log,
                                                                         secondary_window_generator, 
                                                                         secondary_population_comparer, 
                                                                         threshold=0.6,
                                                                         exclude_attributes=['concept:name'])

In [12]:
drift_explainer = framework.DriftExplainer(process_drift_detector, attribute_drift_detectors)
drift_explainer

<concept_drift.framework.DriftExplainer at 0x21864f4f550>

In [None]:
attribute_importances = drift_explainer.get_primary_and_secondary_change_series(event_log)
attribute_importances

In [None]:
import matplotlib.pyplot as plt

def plot(attribute_importances):
    primary_change = attribute_importances[0]
    secondary_change_series_dict = attribute_importances[1]
    
    # plot the primary value axis in red
    plt.plot(primary_change, color='red')
    
    for attribute_name, secondary_change_series in secondary_change_series_dict.items():
        plt.plot(secondary_change_series, label=attribute_name)
    
    plt.legend()
    plt.show()

In [None]:
plot(attribute_importances)