# Secondary Drift Detection for Simple Configuration

## Setup
### Imports

In [1]:
# imports
import helper
from pm4py.objects.log.importer.xes import importer as xes_importer

from processdrift.framework import drift_detection
from processdrift.framework import drift_explanation
from processdrift.framework import feature_extraction
from processdrift.framework import pop_comparison
from processdrift.framework import windowing
from processdrift.framework import evaluation

import time
import os

### Data settings and event log loading

In [2]:
input_path = r'data\synthetic\attribute_drift\simple\10000'
output_path = r'results\synthetic\attribute_drift\simple\10000'
size = 10000
number_relevant_attributes = 5

results_path = r'results\simple\10000\results.csv'
delete_if_results_exists = True

window_generator_types = ['fixed', 'adaptive']
window_sizes= [100, 200]
population_comparers = [pop_comparison.HellingerDistanceComparer()]
thresholds = [0.5, 0.8]
max_distances = [200]
slide_bys = [5, 10]

# always exclude the concept name as an attribute
exclude_attributes = ['concept:name']

In [3]:
# get the true change points and true change point explanations
true_change_points = helper.get_change_points_maardji_et_al_2013(10000)
true_change_point_explanations = [(true_change_points[i], f'relevant_attribute_{i+1:02d}') for i in range(number_relevant_attributes)]

In [4]:
# load all event logs from the input path
event_log_file_paths = helper.get_all_files_in_dir(input_path, include_files_in_subdirs=True)

In [5]:
# primary drift detector stays always the same
primary_process_drift_detector = drift_detection.DriftDetectorTrueKnown(true_change_points)

In [6]:
# build all possible configuration:
configurations = []
for window_generator_type in window_generator_types:
    for window_size in window_sizes:
        for population_comparer in population_comparers:
            for threshold in thresholds:
                for max_distance in max_distances:
                    for slide_by in slide_bys:
                        configurations.append({
                            'window_generator_type': window_generator_type,
                            'window_size': window_size,
                            'population_comparer': population_comparer,
                            'threshold': threshold,
                            'max_distance': max_distance,
                            'slide_by': slide_by
                        })

In [7]:
# build a cache of loaded event logs to make process faster
loaded_event_logs = {}

# delete results file if exists
if delete_if_results_exists:
    os.path.exists(results_path):
    os.remove(results_path)

In [8]:
# for each configuration, get the results over all datasets
for configuration in configurations:
    window_generator_type = configuration['window_generator_type']
    window_size = configuration['window_size']
    population_comparer = configuration['population_comparer']
    threshold = configuration['threshold']
    max_distance = configuration['max_distance']
    slide_by = configuration['slide_by']

    print(f'\nEvaluating configuration {configuration}')

    # iterate all datasets with according settings
    for i, event_log_file_path in enumerate(event_log_file_paths):
        print(f'Event log {i}')
        start_time = time.now()
        
        # see if event log is already in cache
        event_log = None
        if event_log_file_path in loaded_event_logs:
            event_log = loaded_event_logs[event_log_file_path]
        else:
            # load the event log into pm4py
            event_log = xes_importer.apply(event_log_file_path)
            # add event log to chache
            loaded_event_logs[event_log_file_path] = event_log

        window_generator = None
        # build the secondary drift detector
        if window_generator_type == 'fixed':
            window_generator = windowing.FixedSizeWindowGenerator(window_size, slide_by=slide_by)
        elif window_generator_type == 'adaptive':
            window_generator = windowing.AdaptiveWindowGenerator(window_size, slide_by=slide_by)

        secondary_drift_detectors = drift_detection.get_all_attribute_drift_detectors(event_log,
                                                                            window_generator, 
                                                                            population_comparer, 
                                                                            threshold=threshold,
                                                                            exclude_attributes=exclude_attributes)

        drift_explainer = drift_explanation.DriftExplainer(primary_process_drift_detector, secondary_drift_detectors)

        # calculate the drift explanations
        observed_changes = drift_explainer.get_primary_and_secondary_changes(event_log, max_distance)
        observed_drift_point_explanations = drift_explanation.attribute_importance_per_primary_change_point(observed_changes, max_distance)
        
        # evaluate the change point explanations
        observed_drift_point_explanations_simple =  helper.get_simple_change_point_list_from_explainer(observed_drift_point_explanations)    

        result = evaluation.evaluate_explanations(true_change_point_explanations, observed_drift_point_explanations_simple, max_distance=window_size)
        
        # get end time
        end_time = time.now()
        # get the compute time and write into results
        compute_time = end_time - start_time
        
        # write the configuration results to file
        helper.append_config_results(results_path, event_log_file_path, configuration, result, compute_time)

  from .autonotebook import tqdm as notebook_tqdm
parsing log, completed traces :: 100%|██████████| 10000/10000 [00:11<00:00, 867.10it/s]
parsing log, completed traces :: 100%|██████████| 10000/10000 [00:16<00:00, 624.85it/s]


KeyboardInterrupt: 