# Secondary Drift Detection for Simple Configuration

## Setup
### Imports

In [1]:
# imports
import helper
from pm4py.objects.log.importer.xes import importer as xes_importer

from processdrift.framework import drift_detection
from processdrift.framework import drift_explanation
from processdrift.framework import feature_extraction
from processdrift.framework import population_comparison
from processdrift.framework import windowing
from processdrift.framework import evaluation
from processdrift.framework import change_point_extraction


import time
import os
import math

### Data settings and event log loading

In [2]:
# original settings
input_path = r'data\synthetic\attribute_drift\sudden_3_attribute_values\10000'
size = 10000
number_relevant_attributes = 5

results_path = r'results\sudden_3_attribute_values\10000\100_iter_results.csv'
delete_if_results_exists = True

window_generator_types = ['fixed', 'adaptive']
window_sizes= [100, 150, 200]
population_comparers = [population_comparison.GTestPC()] #[pop_comparison.ChiSquaredComparer(), pop_comparison.KSTestPopComparer()] # pop_comparison.HellingerDistanceComparer()]# pop_comparison.KSTestPopComparer(), pop_comparison.ChiSquaredComparer()]
thresholds = [0.05]
max_distances = [300]
slide_bys = [10, 20] # , 10]
proportional_phis = [0.25, 0.5, 1]
rho = 1

# always exclude the concept name as an attribute
exclude_attributes = ['concept:name']

# limit iterations
limit_iterations = None

In [3]:
# input_path = r'data\synthetic\attribute_drift\sudden_3_attribute_values\10000'
# size = 10000
# number_relevant_attributes = 5

# results_path = r'results\sudden_3_attribute_values\10000\100_iter_results.csv'
# delete_if_results_exists = False

# window_generator_types = ['adaptive']
# window_sizes= [150] # [100, 200]
# population_comparers = [pop_comparison.GTestComparer()] #[pop_comparison.ChiSquaredComparer(), pop_comparison.KSTestPopComparer()] # pop_comparison.HellingerDistanceComparer()]# pop_comparison.KSTestPopComparer(), pop_comparison.ChiSquaredComparer()]
# thresholds = [0.05]
# max_distances = [300]
# slide_bys = [1, 15, 20, 25, 30] # , 10]
# proportional_phi = [1]
# rho = 1

# # always exclude the concept name as an attribute
# exclude_attributes = ['concept:name']

# # limit iterations
# limit_iterations = None

In [4]:
# get the true change points and true change point explanations
true_change_points = helper.get_change_points_maardji_et_al_2013(10000)
true_change_point_explanations = [(true_change_points[i], f'relevant_attribute_{i+1:02d}') for i in range(number_relevant_attributes)]

In [5]:
# load all event logs from the input path
event_log_file_paths = helper.get_all_files_in_dir(input_path, include_files_in_subdirs=True)

In [6]:
# primary drift detector stays always the same
primary_process_drift_detector = drift_detection.TrueKnownDD(true_change_points)

In [7]:
# build all possible configuration:
configurations = []
for window_generator_type in window_generator_types:
    for window_size in window_sizes:
        for population_comparer in population_comparers:
            for threshold in thresholds:
                for max_distance in max_distances:
                    for slide_by in slide_bys:
                        for proportional_phi in proportional_phis:
                            configurations.append({
                                'window_generator_type': window_generator_type,
                                'window_size': window_size,
                                'population_comparer': population_comparer,
                                'threshold': threshold,
                                'max_distance': max_distance,
                                'slide_by': slide_by,
                                'proportional_phi': proportional_phi
                            })

In [8]:
# delete results file if exists
if delete_if_results_exists:
    if os.path.exists(results_path):
        os.remove(results_path)

In [9]:
# iterate all datasets with all settings
for i, event_log_file_path in enumerate(event_log_file_paths):
    if limit_iterations is not None:
        if i >= limit_iterations: break
    
    print(f'Event log {i}')
    event_log = xes_importer.apply(event_log_file_path)

    for configuration in configurations:
        print(f'\nEvaluating configuration {configuration}')
        
        start_time = time.time()

        window_generator_type = configuration['window_generator_type']
        window_size = configuration['window_size']
        population_comparer = configuration['population_comparer']
        threshold = configuration['threshold']
        max_distance = configuration['max_distance']
        slide_by = configuration['slide_by']
        proportional_phi = configuration['proportional_phi']
        
        window_generator = None
        # build the secondary drift detector
        if window_generator_type == 'fixed':
            window_generator = windowing.FixedWG(window_size, slide_by=slide_by)
        elif window_generator_type == 'adaptive':
            window_generator = windowing.AdaptiveWG(window_size, slide_by=slide_by)
        
        phi = math.ceil(proportional_phi * window_size / slide_by)
        
        change_point_extractor = change_point_extraction.PhiFilterCPE(threshold, phi, rho)

        secondary_drift_detectors = drift_detection.get_all_attribute_drift_detectors(event_log,
                                                                            window_generator, 
                                                                            population_comparer,
                                                                            change_point_extractor=change_point_extractor,
                                                                            exclude_attributes=exclude_attributes)
        
        drift_explainer = drift_explanation.DriftExplainer(primary_process_drift_detector, secondary_drift_detectors)

        # calculate the drift explanations
        drift_explanation_result = drift_explainer.get_possible_drift_explanations(event_log, max_distance)
        
        # evaluate the change point explanations
        observed_drift_point_explanations_simple =  helper.get_simple_change_point_list_from_dictonary(drift_explanation_result.possible_drift_explanations)    

        result = evaluation.evaluate_explanations(true_change_point_explanations, observed_drift_point_explanations_simple, max_distance=window_size)
        
        # get end time
        end_time = time.time()
        # get the compute time and write into results
        compute_time = end_time - start_time
        
        # write the configuration results to file
        helper.append_config_results(results_path, event_log_file_path, configuration, result, compute_time)

Event log 0


  from .autonotebook import tqdm as notebook_tqdm
parsing log, completed traces :: 100%|██████████| 10000/10000 [00:08<00:00, 1144.07it/s]



Evaluating configuration {'window_generator_type': 'fixed', 'window_size': 100, 'population_comparer': GTestPC, 'threshold': 0.05, 'max_distance': 300, 'slide_by': 10, 'proportional_phi': 0.25}

Evaluating configuration {'window_generator_type': 'fixed', 'window_size': 100, 'population_comparer': GTestPC, 'threshold': 0.05, 'max_distance': 300, 'slide_by': 10, 'proportional_phi': 0.5}

Evaluating configuration {'window_generator_type': 'fixed', 'window_size': 100, 'population_comparer': GTestPC, 'threshold': 0.05, 'max_distance': 300, 'slide_by': 10, 'proportional_phi': 1}

Evaluating configuration {'window_generator_type': 'fixed', 'window_size': 100, 'population_comparer': GTestPC, 'threshold': 0.05, 'max_distance': 300, 'slide_by': 20, 'proportional_phi': 0.25}

Evaluating configuration {'window_generator_type': 'fixed', 'window_size': 100, 'population_comparer': GTestPC, 'threshold': 0.05, 'max_distance': 300, 'slide_by': 20, 'proportional_phi': 0.5}

Evaluating configuration {'wi

KeyboardInterrupt: 

In [None]:
import winsound
for i in range(1, 10):
    winsound.Beep(500, 200)