In [3]:
import importlib
import sys
import torch
import numpy as np

sys.path.insert(0, '..')
sys.path.insert(1, '../..')

In [4]:
import event_log_loader.new_event_log_loader
importlib.reload(event_log_loader.new_event_log_loader)
from event_log_loader.new_event_log_loader import EventLogLoader, EventLogDataset

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

np.random.seed(17)

event_log_location = '../../../data/helpdesk.csv'
result_name = 'helpdesk_all'

event_log_properties = {
    'case_name' : 'CaseID',
    'concept_name' : 'Activity',
    'timestamp_name' : 'CompleteTimestamp',
    'date_format' : '%Y/%m/%d %H:%M:%S.%f',
    'time_since_case_start_column' : 'case_elapsed_time',
    'time_since_last_event_column' : 'event_elapsed_time',
    'day_in_week_column' : 'day_in_week',
    'seconds_in_day_column' : 'seconds_in_day',
    'min_suffix_size' : 5,
    'train_validation_size' : 0.15,
    'test_validation_size' : 0.2,
    'window_size' : 'auto',
    'categorical_columns' : ['Activity', 'Resource', 'VariantIndex', 'seriousness', 'customer', 'product', 'responsible_section', 'seriousness_2', 'service_level', 'service_type', 'support_section', 'workgroup'],
    'continuous_columns' : ['case_elapsed_time', 'event_elapsed_time', 'day_in_week', 'seconds_in_day', ],
    'continuous_positive_columns' : [],
}


# 1) loads event log
# 2) adds EOS to cases
# 3) normalizes numerical features
# 4) imputes features (replaces n.a. categories with a new class for categorical features and average values for numerical featues)
event_log_loader = EventLogLoader(event_log_location, event_log_properties)

In [5]:
print(event_log_loader.encoder_decoder.window_size)

18


In [6]:
train_dataset = event_log_loader.get_dataset('train')
torch.save(train_dataset, '../../../encoded_data/'+result_name+'_'+str(event_log_loader.encoder_decoder.min_suffix_size)+'_train.pkl')
print(train_dataset.all_categories)

categorical tensors:   0%|          | 0/12 [00:00<?, ?it/s]

Activity:   0%|          | 0/2977 [00:00<?, ?it/s]

Resource:   0%|          | 0/2977 [00:00<?, ?it/s]

VariantIndex:   0%|          | 0/2977 [00:00<?, ?it/s]

seriousness:   0%|          | 0/2977 [00:00<?, ?it/s]

customer:   0%|          | 0/2977 [00:00<?, ?it/s]

product:   0%|          | 0/2977 [00:00<?, ?it/s]

responsible_section:   0%|          | 0/2977 [00:00<?, ?it/s]

seriousness_2:   0%|          | 0/2977 [00:00<?, ?it/s]

service_level:   0%|          | 0/2977 [00:00<?, ?it/s]

service_type:   0%|          | 0/2977 [00:00<?, ?it/s]

support_section:   0%|          | 0/2977 [00:00<?, ?it/s]

workgroup:   0%|          | 0/2977 [00:00<?, ?it/s]

continouous tensors:   0%|          | 0/4 [00:00<?, ?it/s]

case_elapsed_time:   0%|          | 0/2977 [00:00<?, ?it/s]

event_elapsed_time:   0%|          | 0/2977 [00:00<?, ?it/s]

day_in_week:   0%|          | 0/2977 [00:00<?, ?it/s]

seconds_in_day:   0%|          | 0/2977 [00:00<?, ?it/s]

([('Activity', 16, {'Assign seriousness': 1, 'Closed': 2, 'Create SW anomaly': 3, 'DUPLICATE': 4, 'EOS': 5, 'INVALID': 6, 'Insert ticket': 7, 'RESOLVED': 8, 'Require upgrade': 9, 'Resolve SW anomaly': 10, 'Resolve ticket': 11, 'Schedule intervention': 12, 'Take in charge ticket': 13, 'VERIFIED': 14, 'Wait': 15}), ('Resource', 24, {'EOS': 1, 'Value 1': 2, 'Value 10': 3, 'Value 11': 4, 'Value 12': 5, 'Value 13': 6, 'Value 14': 7, 'Value 15': 8, 'Value 16': 9, 'Value 17': 10, 'Value 18': 11, 'Value 19': 12, 'Value 2': 13, 'Value 20': 14, 'Value 21': 15, 'Value 22': 16, 'Value 3': 17, 'Value 4': 18, 'Value 5': 19, 'Value 6': 20, 'Value 7': 21, 'Value 8': 22, 'Value 9': 23}), ('VariantIndex', 175, {'1.0': 1, '10.0': 2, '100.0': 3, '103.0': 4, '104.0': 5, '107.0': 6, '109.0': 7, '11.0': 8, '110.0': 9, '112.0': 10, '113.0': 11, '114.0': 12, '115.0': 13, '117.0': 14, '118.0': 15, '12.0': 16, '120.0': 17, '122.0': 18, '123.0': 19, '124.0': 20, '125.0': 21, '126.0': 22, '127.0': 23, '129.0': 24,

In [7]:
test_dataset = event_log_loader.get_dataset('test')
torch.save(test_dataset, '../../../encoded_data/'+result_name+'_'+str(event_log_loader.encoder_decoder.min_suffix_size)+'_test.pkl')

categorical tensors:   0%|          | 0/12 [00:00<?, ?it/s]

Activity:   0%|          | 0/916 [00:00<?, ?it/s]

Resource:   0%|          | 0/916 [00:00<?, ?it/s]

VariantIndex:   0%|          | 0/916 [00:00<?, ?it/s]

seriousness:   0%|          | 0/916 [00:00<?, ?it/s]

customer:   0%|          | 0/916 [00:00<?, ?it/s]

product:   0%|          | 0/916 [00:00<?, ?it/s]

responsible_section:   0%|          | 0/916 [00:00<?, ?it/s]

seriousness_2:   0%|          | 0/916 [00:00<?, ?it/s]

service_level:   0%|          | 0/916 [00:00<?, ?it/s]

service_type:   0%|          | 0/916 [00:00<?, ?it/s]

support_section:   0%|          | 0/916 [00:00<?, ?it/s]

workgroup:   0%|          | 0/916 [00:00<?, ?it/s]

continouous tensors:   0%|          | 0/4 [00:00<?, ?it/s]

case_elapsed_time:   0%|          | 0/916 [00:00<?, ?it/s]

event_elapsed_time:   0%|          | 0/916 [00:00<?, ?it/s]

day_in_week:   0%|          | 0/916 [00:00<?, ?it/s]

seconds_in_day:   0%|          | 0/916 [00:00<?, ?it/s]

In [8]:
val_dataset = event_log_loader.get_dataset('val')
torch.save(val_dataset, '../../../encoded_data/'+result_name+'_'+str(event_log_loader.encoder_decoder.min_suffix_size)+'_val.pkl')

categorical tensors:   0%|          | 0/12 [00:00<?, ?it/s]

Activity:   0%|          | 0/687 [00:00<?, ?it/s]

Resource:   0%|          | 0/687 [00:00<?, ?it/s]

VariantIndex:   0%|          | 0/687 [00:00<?, ?it/s]

seriousness:   0%|          | 0/687 [00:00<?, ?it/s]

customer:   0%|          | 0/687 [00:00<?, ?it/s]

product:   0%|          | 0/687 [00:00<?, ?it/s]

responsible_section:   0%|          | 0/687 [00:00<?, ?it/s]

seriousness_2:   0%|          | 0/687 [00:00<?, ?it/s]

service_level:   0%|          | 0/687 [00:00<?, ?it/s]

service_type:   0%|          | 0/687 [00:00<?, ?it/s]

support_section:   0%|          | 0/687 [00:00<?, ?it/s]

workgroup:   0%|          | 0/687 [00:00<?, ?it/s]

continouous tensors:   0%|          | 0/4 [00:00<?, ?it/s]

case_elapsed_time:   0%|          | 0/687 [00:00<?, ?it/s]

event_elapsed_time:   0%|          | 0/687 [00:00<?, ?it/s]

day_in_week:   0%|          | 0/687 [00:00<?, ?it/s]

seconds_in_day:   0%|          | 0/687 [00:00<?, ?it/s]