# Single Prefix Inference

In [1]:
import importlib
import sys
import torch
import pickle
import os
import numpy as np

sys.path.insert(0, '..')
sys.path.insert(0, '../..')
sys.path.insert(0, '../../..')
sys.path.insert(0, '../../../..')

from model.dropout_uncertainty_enc_dec_LSTM.dropout_uncertainty_model import DropoutUncertaintyEncoderDecoderLSTM

import event_log_loader.new_event_log_loader
importlib.reload(event_log_loader.new_event_log_loader)
from event_log_loader.new_event_log_loader import EventLogLoader, EventLogDataset

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

np.random.seed(17)

Preprocessing

In [2]:
single_case_location = '../../../data/test_case_helpdesk.csv'


event_log_properties = {
    'case_name' : 'Case ID',
    'concept_name' : 'Activity',
    'timestamp_name' : 'Complete Timestamp',
    'date_format' : '%Y/%m/%d %H:%M:%S.%f',
    'time_since_case_start_column' : 'case_elapsed_time',
    'time_since_last_event_column' : 'event_elapsed_time',
    'day_in_week_column' : 'day_in_week',
    'seconds_in_day_column' : 'seconds_in_day',
    'min_suffix_size' : 5,
    'train_validation_size' : 0.15,
    'test_validation_size' : 0.2,
    'window_size' : 'auto',
    'categorical_columns' : ['Activity', 'Resource', 'Variant index', 'seriousness', 'customer', 'product', 'responsible_section', 'seriousness_2', 'service_level', 'service_type', 'support_section', 'workgroup'],
    'continuous_columns' : ['case_elapsed_time', 'event_elapsed_time', 'day_in_week', 'seconds_in_day'],
    'continuous_positive_columns' : [],
}


# 1) loads event log
# 2) adds EOS to cases
# 3) normalizes numerical features
# 4) imputes features (replaces n.a. categories with a new class for categorical features and average values for numerical featues)
event_log_loader = EventLogLoader(single_case_location, event_log_properties)

dataset = event_log_loader.get_dataset('all')

torch.save(dataset, '../../../encoded_data/temporary/temporary.pkl')
print(dataset.all_categories)


categorical tensors:   0%|          | 0/12 [00:00<?, ?it/s]

Activity:   0%|          | 0/1 [00:00<?, ?it/s]

Resource:   0%|          | 0/1 [00:00<?, ?it/s]

Variant index:   0%|          | 0/1 [00:00<?, ?it/s]

seriousness:   0%|          | 0/1 [00:00<?, ?it/s]

customer:   0%|          | 0/1 [00:00<?, ?it/s]

product:   0%|          | 0/1 [00:00<?, ?it/s]

responsible_section:   0%|          | 0/1 [00:00<?, ?it/s]

seriousness_2:   0%|          | 0/1 [00:00<?, ?it/s]

service_level:   0%|          | 0/1 [00:00<?, ?it/s]

service_type:   0%|          | 0/1 [00:00<?, ?it/s]

support_section:   0%|          | 0/1 [00:00<?, ?it/s]

workgroup:   0%|          | 0/1 [00:00<?, ?it/s]

continouous tensors:   0%|          | 0/4 [00:00<?, ?it/s]

case_elapsed_time:   0%|          | 0/1 [00:00<?, ?it/s]

event_elapsed_time:   0%|          | 0/1 [00:00<?, ?it/s]

day_in_week:   0%|          | 0/1 [00:00<?, ?it/s]

seconds_in_day:   0%|          | 0/1 [00:00<?, ?it/s]

([('Activity', 4, {'Assign seriousness': 1, 'EOS': 2, 'Take in charge ticket': 3}), ('Resource', 4, {'EOS': 1, 'Value 1': 2, 'Value 2': 3}), ('Variant index', 3, {'12.0': 1, nan: 2}), ('seriousness', 3, {'EOS': 1, 'Value 1': 2}), ('customer', 3, {'EOS': 1, 'Value 1': 2}), ('product', 3, {'EOS': 1, 'Value 1': 2}), ('responsible_section', 3, {'EOS': 1, 'Value 1': 2}), ('seriousness_2', 3, {'EOS': 1, 'Value 1': 2}), ('service_level', 4, {'EOS': 1, 'Value 1': 2, 'Value 2': 3}), ('service_type', 3, {'EOS': 1, 'Value 1': 2}), ('support_section', 3, {'EOS': 1, 'Value 1': 2}), ('workgroup', 3, {'EOS': 1, 'Value 1': 2})], [('case_elapsed_time', 1, {}), ('event_elapsed_time', 1, {}), ('day_in_week', 1, {}), ('seconds_in_day', 1, {})])


inference:

In [3]:
#load model
file_path_model = '../../../src/notebooks/training_variational_dropout/Helpdesk/Helpdesk_full_grad_norm_new_4layer.pkl'
model = DropoutUncertaintyEncoderDecoderLSTM.load(file_path_model, dropout=0.1)

# Load the dataset
file_path_data_set = '../../../encoded_data/temporary/temporary.pkl'
helpdesk_test_dataset = torch.load(file_path_data_set, weights_only=False)

Data set categories:  ([('Activity', 16, {'Assign seriousness': 1, 'Closed': 2, 'Create SW anomaly': 3, 'DUPLICATE': 4, 'EOS': 5, 'INVALID': 6, 'Insert ticket': 7, 'RESOLVED': 8, 'Require upgrade': 9, 'Resolve SW anomaly': 10, 'Resolve ticket': 11, 'Schedule intervention': 12, 'Take in charge ticket': 13, 'VERIFIED': 14, 'Wait': 15}), ('Resource', 24, {'EOS': 1, 'Value 1': 2, 'Value 10': 3, 'Value 11': 4, 'Value 12': 5, 'Value 13': 6, 'Value 14': 7, 'Value 15': 8, 'Value 16': 9, 'Value 17': 10, 'Value 18': 11, 'Value 19': 12, 'Value 2': 13, 'Value 20': 14, 'Value 21': 15, 'Value 22': 16, 'Value 3': 17, 'Value 4': 18, 'Value 5': 19, 'Value 6': 20, 'Value 7': 21, 'Value 8': 22, 'Value 9': 23}), ('VariantIndex', 175, {'1.0': 1, '10.0': 2, '100.0': 3, '103.0': 4, '104.0': 5, '107.0': 6, '109.0': 7, '11.0': 8, '110.0': 9, '112.0': 10, '113.0': 11, '114.0': 12, '115.0': 13, '117.0': 14, '118.0': 15, '12.0': 16, '120.0': 17, '122.0': 18, '123.0': 19, '124.0': 20, '125.0': 21, '126.0': 22, '12

In [4]:
import evaluation.probabilistic_evaluation
importlib.reload(evaluation.probabilistic_evaluation)
from evaluation.probabilistic_evaluation import ProbabilisticEvaluation

new_eval = ProbabilisticEvaluation(model=model, 
                                   dataset=helpdesk_test_dataset,
                                   concept_name='Activity',
                                   num_processes=16,
                                   #growing_num_values = [],
                                   growing_num_values = ['case_elapsed_time'],
                                   samples_per_case = 1000,
                                   sample_argmax = False,
                                   use_variance_cat = True,
                                   use_variance_num = True,
                                   all_cat=['Activity', 'Resource'],
                                   all_num=['case_elapsed_time', 'event_elapsed_time'])

In [5]:

def save_chunk(results, i):
    chunk_number = (i + 1)
    filename = os.path.join(output_dir, f'results_part_{chunk_number:03d}.pkl')
    with open(filename, 'wb') as f:
        pickle.dump(results, f)
    print(f"Saved {len(results)} results to {filename}")

output_dir = '../../../../../../../evaluation_results/Helpdesk'

save_every = 10

results = {}


for i, (case_name, prefix_len, prefix, predicted_suffixes, suffix, mean_prediction) in enumerate(new_eval.evaluate(random_order=True)):
    print("pred: ",mean_prediction, )
    print("pred length: ",len(mean_prediction), )
    print("suffix: ",suffix, )
    print("suffix length: ",len(suffix),)
    assert((case_name, prefix_len) not in results)
    results[(case_name, prefix_len)] = (prefix, suffix, mean_prediction, predicted_suffixes)
    print(prefix_len, len(suffix))
    if (i + 1) % save_every == 0:
        save_chunk(results, i)
        results = {}

if len(results):
    save_chunk(results, i)

  0%|          | 0/1 [00:00<?, ?it/s]

KeyError: 3