In [1]:
"""
Use of same experimental setup as for our Probabilistic Suffix Prediction.

Reimplementation for comparison: 
- Paper: Camargo, Manuel, Marlon Dumas, and Oscar González-Rojas. "Learning accurate LSTM models of business processes." International Conference on Business Process Management. Cham: Springer International Publishing, 2019.
- Github (code) from: https://github.com/AdaptiveBProcess/GenerativeLSTM/tree/master/
"""

'\nUse of same experimental setup as for our Probabilistic Suffix Prediction.\n\nReimplementation for comparison: \n- Paper: Camargo, Manuel, Marlon Dumas, and Oscar González-Rojas. "Learning accurate LSTM models of business processes." International Conference on Business Process Management. Cham: Springer International Publishing, 2019.\n- Github (code) from: https://github.com/AdaptiveBProcess/GenerativeLSTM/tree/master/\n'

# Imports

In [2]:
import importlib
import sys
import torch

sys.path.insert(0, '..')
sys.path.insert(0, '../..')
sys.path.insert(0, '../../..')
sys.path.insert(0, '../../../..')
sys.path.insert(0, '../../../../..')

# Data

### Load Data Files

In [3]:
# Path to your pickle file (saved with torch.save)
file_path_train = '../../../../../../encoded_data/compare_camargo/repair_shop_5_train.pkl'
# Load the dataset using torch.load
repair_train_dataset = torch.load(file_path_train, weights_only=False)
# Check the type of the loaded dataset
print(type(repair_train_dataset))

# Path to your pickle file (saved with torch.save)
file_path_val = '../../../../../../encoded_data/compare_camargo/repair_shop_5_val.pkl'
# Load the dataset using torch.load
repair_val_dataset = torch.load(file_path_val, weights_only=False)
# Check the type of the loaded dataset
print(type(repair_val_dataset))


<class 'event_log_loader.new_event_log_loader.EventLogDataset'>
<class 'event_log_loader.new_event_log_loader.EventLogDataset'>


### Train Data Insights

In [4]:
# Repair Dataset Categories, Features:
repair_all_categories = repair_train_dataset.all_categories

repair_all_categories_cat = repair_all_categories[0]
print(repair_all_categories_cat)

repair_all_categories_num = repair_all_categories[1]
print(repair_all_categories_num)

for i, cat in enumerate(repair_all_categories_cat):
     print(f"repair (1) Categorical feature: {cat[0]}, Index position in categorical data list: {i}")
     print(f"repair (1) Total Amount of Category labels: {cat[1]}")

print('\n')    

for i, num in enumerate(repair_all_categories_num):
     print(f"repair (1) Numerical feature: {num[0]}, Index position in categorical data list: {i}")
     print(f"repair (1) Amount Numerical: {num[1]}")
     
# Get concept_name id:
concept_name = 'concept:name'
concept_name_id = [i for i, cat in enumerate(repair_all_categories[0]) if cat[0] == concept_name][0]
print("ID concet name in cat list: ", concept_name_id)

# Output size
concept_name = 'concept:name'
concept_name_size = [cat[1] for _, cat in enumerate(repair_all_categories[0]) if cat[0] == concept_name][0]
print("ID concet name in cat list: ", concept_name_size)

# Id of EOS token in activity
eos_value = 'EOS'
eos_id = [v for k, v in repair_all_categories[0][concept_name_id][2].items() if k == eos_value][0]
# Get EOS id of concept name list
print("ID EOS in concept name tensor: ", eos_id)


[('concept:name', 9, {'ACKNOWLEDGEMENT': 1, 'CREATE_INVOICE': 2, 'DISASSEMBLY': 3, 'EOS': 4, 'QUALITY_CONTROL': 5, 'RECEPTION': 6, 'REPAIR': 7, 'SHIPPING': 8})]
[('case_elapsed_time', 1, {})]
repair (1) Categorical feature: concept:name, Index position in categorical data list: 0
repair (1) Total Amount of Category labels: 9


repair (1) Numerical feature: case_elapsed_time, Index position in categorical data list: 0
repair (1) Amount Numerical: 1
ID concet name in cat list:  0
ID concet name in cat list:  9
ID EOS in concept name tensor:  4


### Input Features for Encoder and Decoder

In [5]:
# Create lists with name of Model features (input)
model_feat_cat = []
model_feat_num = []
for cat in repair_all_categories_cat:
    model_feat_cat.append(cat[0])
for num in repair_all_categories_num:
    model_feat_num.append(num[0])
model_feat = [model_feat_cat, model_feat_num]
print("Input features encoder: ", model_feat)

Input features encoder:  [['concept:name'], ['case_elapsed_time']]


# Model

In [6]:
import joinLSTM.model
importlib.reload(joinLSTM.model)
from joinLSTM.model import FullShared_Join_LSTM

"""
Specific model parameters from paper: 
"""

# Size hidden layer
hidden_size= 50

# Number of LSTM cells
num_layers = 1

# STANDARD: One numerical output to predict
input_size = 1

# Hans Weytjens LSTM model
model = FullShared_Join_LSTM(data_set_categories=repair_all_categories,
                             hidden_size=hidden_size,
                             num_layers=num_layers,
                             model_feat=model_feat,
                             input_size=input_size,
                             output_size_act=concept_name_size)

Data set categories:  ([('concept:name', 9, {'ACKNOWLEDGEMENT': 1, 'CREATE_INVOICE': 2, 'DISASSEMBLY': 3, 'EOS': 4, 'QUALITY_CONTROL': 5, 'RECEPTION': 6, 'REPAIR': 7, 'SHIPPING': 8})], [('case_elapsed_time', 1, {})])
Model input features:  [['concept:name'], ['case_elapsed_time']]


Embeddings:  ModuleList(
  (0): Embedding(9, 5)
)
Total embedding feature size:  5
Input feature size:  6
Cells hidden size:  50
Number of LSTM layer:  1




# Training Configuration

In [7]:
import training.train
importlib.reload(training.train)
from training.train import Training

from torch.optim.lr_scheduler import ReduceLROnPlateau

from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter(comment="Full_repair_camargo_act")

# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

"""
Parameter of Probabilistic Suffix Prediction experimental design, to ensure fair comparison:
"""

# Start learning rate
learning_rate = 1e-4

# Optimizer and Scheduler
optimizer = torch.optim.AdamW(params=model.parameters(), lr=learning_rate, weight_decay=0)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=1e-5, patience=1, min_lr=1e-10)

# Epochs
num_epochs = 100

# Batch of model input
batch_size = 128

# shuffle data
shuffle = True

optimize_values = {"optimizer":optimizer,
                   "scheduler": scheduler,
                   "epochs":num_epochs,
                   "mini_batches":batch_size,
                   "shuffle": shuffle}

number_tasks = len(model_feat)

trainer = Training(model=model,
                   device=device,
                   data_train=repair_train_dataset,
                   data_val=repair_val_dataset,
                   optimize_values=optimize_values,
                   concept_name_id=concept_name_id,
                   eos_id=eos_id,
                   writer=writer,
                   save_model_n_th_epoch=1,
                   saving_path="Repair_camargo_act_1_suffix_length5.pkl")

# Train the model:
trainer.train()

Device:  cuda
Optimizer:  AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.0001
    maximize: False
    weight_decay: 0
)
Scheduler:  <torch.optim.lr_scheduler.ReduceLROnPlateau object at 0x7f2e3232cd70>
Epochs:  100
Mini baches:  128
Shuffle batched dataset:  True


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch [1/100], Learning Rate: 0.0001
Training: Avg Attenuated Training Loss: 1.8931
Validation: Avg Validation Loss: 1.6410
Validation Loss for Scheduler: 1.6410
saving model
Epoch [2/100], Learning Rate: 0.0001
Training: Avg Attenuated Training Loss: 1.5456
Validation: Avg Validation Loss: 1.4883
Validation Loss for Scheduler: 1.4883
saving model
Epoch [3/100], Learning Rate: 0.0001
Training: Avg Attenuated Training Loss: 1.4826
Validation: Avg Validation Loss: 1.4684
Validation Loss for Scheduler: 1.4684
saving model
Epoch [4/100], Learning Rate: 0.0001
Training: Avg Attenuated Training Loss: 1.4716
Validation: Avg Validation Loss: 1.4623
Validation Loss for Scheduler: 1.4623
saving model
Epoch [5/100], Learning Rate: 0.0001
Training: Avg Attenuated Training Loss: 1.4671
Validation: Avg Validation Loss: 1.4585
Validation Loss for Scheduler: 1.4585
saving model
Epoch [6/100], Learning Rate: 0.0001
Training: Avg Attenuated Training Loss: 1.4649
Validation: Avg Validation Loss: 1.4574
V