In [1]:
"""
Use of same experimental setup as for our Probabilistic Suffix Prediction.

Reimplementation for comparison: 
- Paper: Weytjens, Hans, and Jochen De Weerdt. "Learning uncertainty with artificial neural networks for predictive process monitoring." Applied Soft Computing 125 (2022): 109134.
- Github (code) from: https://github.com/hansweytjens/uncertainty-remaining_time/blob/main/LSTM.ipynb
"""

'\nUse of same experimental setup as for our Probabilistic Suffix Prediction.\n\nReimplementation for comparison: \n- Paper: Weytjens, Hans, and Jochen De Weerdt. "Learning uncertainty with artificial neural networks for predictive process monitoring." Applied Soft Computing 125 (2022): 109134.\n- Github (code) from: https://github.com/hansweytjens/uncertainty-remaining_time/blob/main/LSTM.ipynb\n'

# Imports

In [2]:
import importlib
import sys
import torch

sys.path.insert(0, '..')
sys.path.insert(0, '../..')
sys.path.insert(0, '../../..')
sys.path.insert(0, '../../../..')
sys.path.insert(0, '../../../../..')

# Data

### Load Data Files

In [3]:
# Path to your pickle file (saved with torch.save)
file_path_train = '../../../../../../encoded_data/compare_weytjens/helpdesk_all_5_train.pkl'
# Load the dataset using torch.load
helpdesk_train_dataset = torch.load(file_path_train, weights_only=False)
# Check the type of the loaded dataset
print(type(helpdesk_train_dataset))

# Path to your pickle file (saved with torch.save)
file_path_val = '../../../../../../encoded_data/compare_weytjens/helpdesk_all_5_val.pkl'
# Load the dataset using torch.load
helpdesk_val_dataset = torch.load(file_path_val, weights_only=False)
# Check the type of the loaded dataset
print(type(helpdesk_val_dataset))


<class 'event_log_loader.new_event_log_loader.EventLogDataset'>
<class 'event_log_loader.new_event_log_loader.EventLogDataset'>


### Train Data Insights

In [4]:
# Helpdesk Dataset Categories, Features:
helpdesk_all_categories = helpdesk_train_dataset.all_categories

helpdesk_all_categories_cat = helpdesk_all_categories[0]
print(helpdesk_all_categories_cat)

helpdesk_all_categories_num = helpdesk_all_categories[1]
print(helpdesk_all_categories_num)

for i, cat in enumerate(helpdesk_all_categories_cat):
     print(f"Helpdesk (5) Categorical feature: {cat[0]}, Index position in categorical data list: {i}")
     print(f"Helpdesk (5) Total Amount of Category labels: {cat[1]}")

print('\n')    

for i, num in enumerate(helpdesk_all_categories_num):
     print(f"Helpdesk (5) Numerical feature: {num[0]}, Index position in categorical data list: {i}")
     print(f"Helpdesk (5) Amount Numerical: {num[1]}")
     
# Get concept_name id:
# 
concept_name = 'Activity'
concept_name_id = [i for i, cat in enumerate(helpdesk_all_categories[0]) if cat[0] == concept_name][0]

print("ID concet name in cat list: ", concept_name_id)
    
# Id of EOS token in activity
eos_value = 'EOS'
eos_id = [v for k, v in helpdesk_all_categories[0][concept_name_id][2].items() if k == eos_value][0]
# Get EOS id of concept name list:

print("ID EOS in concept name tensor: ", eos_id)


[('Activity', 16, {'Assign seriousness': 1, 'Closed': 2, 'Create SW anomaly': 3, 'DUPLICATE': 4, 'EOS': 5, 'INVALID': 6, 'Insert ticket': 7, 'RESOLVED': 8, 'Require upgrade': 9, 'Resolve SW anomaly': 10, 'Resolve ticket': 11, 'Schedule intervention': 12, 'Take in charge ticket': 13, 'VERIFIED': 14, 'Wait': 15})]
[('case_elapsed_time', 1, {})]
Helpdesk (5) Categorical feature: Activity, Index position in categorical data list: 0
Helpdesk (5) Total Amount of Category labels: 16


Helpdesk (5) Numerical feature: case_elapsed_time, Index position in categorical data list: 0
Helpdesk (5) Amount Numerical: 1
ID concet name in cat list:  0
ID EOS in concept name tensor:  5


### Input Features for Encoder and Decoder

In [5]:
# Create lists with name of Model features (input)
model_feat_cat = ['Activity']
model_feat_num = ['case_elapsed_time']
model_feat = [model_feat_cat, model_feat_num]

print("Features model: ", model_feat)


Features model:  [['Activity'], ['case_elapsed_time']]


# Model

In [6]:
import stochasticLSTM.model
importlib.reload(stochasticLSTM.model)
from stochasticLSTM.model import StochasticLSTMWeytjens

"""
Specific model parameters from paper: 
"""

# Size hidden layer
hidden_size=10

# Number of LSTM cells
num_layers=2

# One numerical output to predict
input_size=1

# Fixed Dropout probability 
p_fix=0.1

# Lambda for L2 (weight, bias, dropout) regularization: According to formula: 1/2N
regularization_term = 0.1

# Hans Weytjens LSTM model
model = StochasticLSTMWeytjens(data_set_categories=helpdesk_all_categories,
                               model_feat=model_feat,
                               hidden_size=hidden_size,
                               num_layers=num_layers,
                               input_size=input_size,
                               weight_reg=regularization_term,
                               p_fix=p_fix,
                               device='cuda')

Data set categories:  ([('Activity', 16, {'Assign seriousness': 1, 'Closed': 2, 'Create SW anomaly': 3, 'DUPLICATE': 4, 'EOS': 5, 'INVALID': 6, 'Insert ticket': 7, 'RESOLVED': 8, 'Require upgrade': 9, 'Resolve SW anomaly': 10, 'Resolve ticket': 11, 'Schedule intervention': 12, 'Take in charge ticket': 13, 'VERIFIED': 14, 'Wait': 15})], [('case_elapsed_time', 1, {})])
Model input features:  [['Activity'], ['case_elapsed_time']]


Embeddings:  ModuleList(
  (0): Embedding(16, 8)
)
Total embedding feature size:  8
Input feature size:  9
Cells hidden size:  10
Number of LSTM layer:  2
Dropout rate:  0.1


Output feature list of dicts (featue name, tensor index in dataset):  {'case_elapsed_time': 0}


# Loss Object Creation

In [7]:
import loss_hans.losses
importlib.reload(loss_hans.losses)
from loss_hans.losses import Loss

loss_obj = Loss()

# Training Configuration

In [8]:
import training.train
importlib.reload(training.train)
from training.train import Training

from torch.optim.lr_scheduler import ReduceLROnPlateau

from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter(comment="Full_helpdesk_weytjens_rem_time")

# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

"""
Parameter of Probabilistic Suffix Prediction experimental design, to ensure fair comparison:
"""

# Start learning rate
learning_rate = 1e-5

# Optimizer and Scheduler
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=0)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=4, min_lr=1e-10)

# Epochs
num_epochs = 100

# Batch of model input
batch_size = 128

# shuffle data
shuffle = True

optimize_values = {"optimizer":optimizer,
                   "scheduler": scheduler,
                   "epochs":num_epochs,
                   "mini_batches":batch_size,
                   "shuffle": shuffle}

number_tasks = len(model_feat)

trainer = Training(model=model,
                   device=device,
                   data_train=helpdesk_train_dataset,
                   data_val=helpdesk_val_dataset,
                   concept_name_id=concept_name_id,
                   eos_id=eos_id,
                   loss_obj=loss_obj,
                   optimize_values=optimize_values,
                   writer=writer,
                   save_model_n_th_epoch=1,
                   saving_path="Helpdesk_weytjens_rem_time_1_suffix_length5.pkl")

# Train the model:
trainer.train()

Device:  cuda
Optimizer:  Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 1e-05
    maximize: False
    weight_decay: 0
)
Scheduler:  <torch.optim.lr_scheduler.ReduceLROnPlateau object at 0x7fc8acc28d70>
Epochs:  100
Mini baches:  128
Shuffle batched dataset:  True


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch [1/100], Learning Rate: 1e-05
Training: Avg Attenuated Training Loss: 15.9828
Validation: Avg Standard Validation Loss: 6.4970
Validation: Avg Attenuated Validation Loss: 4.4784
Validation Loss for Scheduler: 6.4970
saving model
Epoch [2/100], Learning Rate: 1e-05
Training: Avg Attenuated Training Loss: 15.9513
Validation: Avg Standard Validation Loss: 6.4723
Validation: Avg Attenuated Validation Loss: 4.4484
Validation Loss for Scheduler: 6.4723
saving model
Epoch [3/100], Learning Rate: 1e-05
Training: Avg Attenuated Training Loss: 15.9155
Validation: Avg Standard Validation Loss: 6.4161
Validation: Avg Attenuated Validation Loss: 4.4006
Validation Loss for Scheduler: 6.4161
saving model
Epoch [4/100], Learning Rate: 1e-05
Training: Avg Attenuated Training Loss: 15.8829
Validation: Avg Standard Validation Loss: 6.4026
Validation: Avg Attenuated Validation Loss: 4.3786
Validation Loss for Scheduler: 6.4026
saving model
Epoch [5/100], Learning Rate: 1e-05
Training: Avg Attenuated