In [1]:
import sys
import pickle
import os
import torch
from tqdm.notebook import tqdm
from tqdm import tqdm
import random
from typing import Optional, Tuple, List

sys.path.insert(0, '..')
sys.path.insert(0, '../..')
sys.path.insert(0, '../../..')
sys.path.insert(0, '../../../..')
sys.path.insert(0, '../../../../..')
sys.path.insert(0, '../../../../../..')

from joinLSTM.model import FullShared_Join_LSTM

In [2]:
#load model
file_path_model = '../../training/Helpdesk/Helpdesk_camargo_act_1_suffix_length5.pkl'

model = FullShared_Join_LSTM.load(file_path_model)

# Load the dataset
file_path_data_set = '../../../../../../encoded_data/compare_camargo/helpdesk_all_5_test.pkl'
helpdesk_test_dataset = torch.load(file_path_data_set, weights_only=False)

Data set categories:  ([('Activity', 16, {'Assign seriousness': 1, 'Closed': 2, 'Create SW anomaly': 3, 'DUPLICATE': 4, 'EOS': 5, 'INVALID': 6, 'Insert ticket': 7, 'RESOLVED': 8, 'Require upgrade': 9, 'Resolve SW anomaly': 10, 'Resolve ticket': 11, 'Schedule intervention': 12, 'Take in charge ticket': 13, 'VERIFIED': 14, 'Wait': 15}), ('Resource', 24, {'EOS': 1, 'Value 1': 2, 'Value 10': 3, 'Value 11': 4, 'Value 12': 5, 'Value 13': 6, 'Value 14': 7, 'Value 15': 8, 'Value 16': 9, 'Value 17': 10, 'Value 18': 11, 'Value 19': 12, 'Value 2': 13, 'Value 20': 14, 'Value 21': 15, 'Value 22': 16, 'Value 3': 17, 'Value 4': 18, 'Value 5': 19, 'Value 6': 20, 'Value 7': 21, 'Value 8': 22, 'Value 9': 23})], [('case_elapsed_time', 1, {})])
Model input features:  [['Activity', 'Resource'], ['case_elapsed_time']]


Embeddings:  ModuleList(
  (0): Embedding(16, 8)
  (1): Embedding(24, 9)
)
Total embedding feature size:  17
Input feature size:  18
Cells hidden size:  50
Number of LSTM layer:  1




In [3]:
# Global placeholders for multiprocessing workers

# Model
global_model = None
# Number of samples
global_samples_per_case = None
# Categorical categories, tensors:
global_cat_categories = None
# Global scaler params for case_elapsed_time
global_scaler_params = None
#
global_dict_cat_class_id = None

def init_worker(model: FullShared_Join_LSTM,
                samples_per_case: int,
                cat_categories,
                scaler_params,
                dict_cat_class_ids,
                ):
    """
    Initializer for each worker process, setting global variables.
    """
    global global_model, global_samples_per_case, global_cat_categories, global_scaler_params, global_dict_cat_class_id
    
    # Models have already been moved to CPU before forking
    model.eval()
    
    global_model = model
    global_samples_per_case = samples_per_case
    global_cat_categories = cat_categories
    global_scaler_params = scaler_params
    global_dict_cat_class_id = dict_cat_class_ids


In [4]:
@torch.no_grad()
def iterate_case(full_case: Tuple[List[torch.Tensor], List[torch.Tensor]],
                 concept_name_id: int,
                 min_suffix_size: int):
    
    cats_full, nums_full, _ = full_case
    seq_len = cats_full[0].size(0)
    window_size = seq_len - min_suffix_size

    # Initialize with all‐zero padding, batch dim = 1
    cats_prefix: List[torch.Tensor] = [torch.zeros((1, window_size), dtype=cat.dtype) for cat in cats_full]
    nums_prefix: List[torch.Tensor] = [torch.zeros((1, window_size), dtype=num.dtype) for num in nums_full]

    prefix_length = 0

    # Slide the window one event at a time
    for i in range(window_size):
        # Roll left by 1 and insert the new event at the rightmost slot
        for j, cat_stream in enumerate(cats_full):
            cats_prefix[j][0] = torch.roll(cats_prefix[j][0], shifts=-1, dims=0)
            cats_prefix[j][0, -1] = cat_stream[i]

        for j, num_stream in enumerate(nums_full):
            nums_prefix[j][0] = torch.roll(nums_prefix[j][0], shifts=-1, dims=0)
            nums_prefix[j][0, -1] = num_stream[i]

        # Only start yielding once we've seen at least one real “activity” token
        if prefix_length > 0 or cats_prefix[concept_name_id][0, -1] != 0:
            prefix_length += 1
            
            yield prefix_length, (cats_prefix, nums_prefix)

In [5]:
@torch.no_grad()
def _evaluate_case(case_name: str,
                   full_case: Tuple[List[torch.Tensor], List[torch.Tensor], str],
                   concept_name_id: int,
                   min_suffix_size: int,
                   ):

    # List of tensors for test samples:
    cats_full, nums_full, _ = full_case
    # Denormalization values for numerical variables:
    mean_s, std_s = global_scaler_params
    
    act2idx, res2idx = global_dict_cat_class_id  # expect tuple of two dicts
    # Invert them:
    idx2act = {ix:name for name, ix in act2idx.items() }
    idx2res = {ix:name for name, ix in res2idx.items() }

    results = []
    # iterate_case already defined elsewhere
    for prefix_length, (cats_pref, nums_pref) in iterate_case(full_case, concept_name_id, min_suffix_size):

        # prefix_prep
        acts = cats_pref[0][0].tolist()
        ress = cats_pref[1][0].tolist()
        times = nums_pref[0][0].tolist()
        # Build the prefix
        prefix_prep = [{"Activity": idx2act[a], "Resource": idx2res[r], "case_elapsed_time": t * std_s + mean_s} for a, r, t in zip(acts, ress, times) if a != 0]

        # true target: Get from the activity full tensor all indices of the last n values which are not zero
        non_zero_ids = (cats_full[0] != 0).nonzero(as_tuple=True)[0]
        
        # Get the activity ids without the EOS:
        true_acts = cats_full[0][(non_zero_ids[0]+prefix_length):-1].tolist()
        true_ress = cats_full[1][(non_zero_ids[0]+prefix_length):-1].tolist()
        true_nums = nums_full[0][(non_zero_ids[0]+prefix_length):-1].tolist()
        
        # Build target as list of dicts:
        target = [{"Activity": idx2act[a]} for a in true_acts if idx2act[a] != "EOS"]
        if target == []:
            continue

        # MOST LIKELY
        cats_pref_clone = [t.clone() for t in cats_pref]
        # print(cats_pref_clone)
        nums_pref_clone = [t.clone() for t in nums_pref]
        ml_list = []
        # Iterate through window size - pref len:
        for i in range(len(cats_pref[0][0])-prefix_length):
            # Predictions
            act_probs = global_model((cats_pref_clone, nums_pref_clone))
            # Index of most likely prediction
            index_act = act_probs.argmax(dim=-1).item()
            
            # NaN is predicted new value at positon 0
            if index_act == 0:
                act = 'NaN'
            
            #  Stop the suffix creation if EOS is predicted
            elif idx2act[index_act] == 'EOS':
                break
            
            else:
                act = idx2act[index_act]
            
            # Add to Most-likely:
            ml_list.append({"Activity": act})
                        
            # Update Prefix Most Likely
            cats_pref_clone[0] = torch.cat([cats_pref_clone[0][:, 1:], torch.tensor([[index_act]])], dim=1)
            if i < len(true_acts):
                cats_pref_clone[1] = torch.cat([cats_pref_clone[1][:, 1:], torch.tensor([[true_ress[i]]])], dim=1)
                nums_pref_clone[0] = torch.cat([nums_pref_clone[0][:, 1:], torch.tensor([[true_nums[i]]])], dim=1)
                
        most_likely = ml_list
        
        # RANDOM SAMPLING
        samples_lists = []
        for _ in range(global_samples_per_case):
            cats_pref_clone_samples = [t.clone() for t in cats_pref]
            nums_pref_clone_samples = [t.clone() for t in nums_pref]
            # Iterate through window size - pref len:
            samples = []
            for i in range(len(cats_pref[0][0])-prefix_length):
                # Predictions
                act_probs_sample = global_model((cats_pref_clone_samples, nums_pref_clone_samples)).squeeze(0)              
                # Ranodm Smapling:
                random_index_act = torch.multinomial(act_probs_sample, num_samples=1).item()    
                
                # NaN is predicted new value at positon 0
                if random_index_act == 0:
                   act = 'NaN'
                
                #  Stop the suffix creation if EOS is predicted
                elif idx2act[random_index_act] == 'EOS':
                    break
                
                else:
                    act = idx2act[random_index_act]
                
                samples.append({"Activity": act })
                
                # Update Prefix Most Likely
                cats_pref_clone_samples[0] = torch.cat([cats_pref_clone_samples[0][:, 1:], torch.tensor([[random_index_act]])], dim=1)
                if i < len(true_acts):
                    cats_pref_clone_samples[1] = torch.cat([cats_pref_clone_samples[1][:, 1:], torch.tensor([[true_ress[i]]])], dim=1)
                    nums_pref_clone_samples[0] = torch.cat([nums_pref_clone_samples[0][:, 1:], torch.tensor([[true_nums[i]]])], dim=1)
            
            samples_lists.append(samples)
            
        random_suffixes = samples_lists

        results.append((case_name, prefix_length, prefix_prep, random_suffixes, target, most_likely))
        
        # print("Case Name: ", case_name)
        # print("Prefix length: ", prefix_length)
        # print("Prefix prepared: ", prefix_prep)
        # print("Random Suffixes: ", random_suffixes)
        # print("Target: ", target)
        # print("Most Likely: ", most_likely)

    return results


In [6]:
def evaluate_seq_processing(model: FullShared_Join_LSTM,
                            dataset,
                            device,
                            samples_per_case: int = 1000,
                            random_order: Optional[bool]= False,
                            ):
    """
    Sequential evaluation yielding tuples per case and prefix length.
    """
    
    # Move models to CPU
    model.to('cpu')
    
    # Category names and ids
    concept_name = 'Activity'
    # Id of activity in cat list
    concept_name_id = [i for i, cat in enumerate(helpdesk_test_dataset.all_categories[0]) if cat[0] == concept_name][0]
    
    # Dict with key: act class, value: index position
    act_classes_id =  helpdesk_test_dataset.all_categories[0][0][2]
    # Dict with key: res class, value: index position
    res_classes_id =  helpdesk_test_dataset.all_categories[0][1][2]
    
    # Id of EOS token in activity
    eos_value = 'EOS'
    # index of EOS value in activity dict:
    eos_id = [v for k, v in helpdesk_test_dataset.all_categories[0][concept_name_id][2].items() if k == eos_value][0]
    
    cases = {}
    for event in dataset:
        # Get suffix being the last 
        suffix = event[0][concept_name_id][-dataset.encoder_decoder.min_suffix_size:]
        if torch.all(suffix  == eos_id).item():
            cases[event[2]] = event
            
    case_items = list(cases.items())
    if random_order:
        case_items = random.sample(case_items, len(case_items))
    
    # Tuple of category (e.g., Activity) with amount classes, dict with class and index
    cat_categories, _ = model.data_set_categories
    # print(cat_categories)
    
    # Scaler used in dataset to normalize/ denormalize the numerical attributes:
    scaler = dataset.encoder_decoder.continuous_encoders['case_elapsed_time']
    scaler_params = (scaler.mean_.item(), scaler.scale_.item())
    
    # Initialize globals for identical logic
    init_worker(model, samples_per_case, cat_categories, scaler_params, (act_classes_id, res_classes_id))
    
    # for cats, nums, case_name in tqdm(cases, total=len(cases)):
    for _, (case_name, full_case) in tqdm(enumerate(case_items), total=len(cases)):
        
        # Get a list with the results for all cases of one case:
        results = _evaluate_case(case_name=case_name,
                                 full_case=full_case,
                                 concept_name_id=concept_name_id,
                                 min_suffix_size=dataset.encoder_decoder.min_suffix_size)
        
        # Return the results for inserting:
        for res in results:
            yield res

In [7]:
output_dir = '../../../../../../../../data/Helpdesk/eval_camargo_sl5/'

def save_chunk(results, i):
    chunk_number = (i + 1)
    filename = os.path.join(output_dir, f'results_part_{chunk_number:03d}.pkl')
    with open(filename, 'wb') as f:
        pickle.dump(results, f)
    print(f"Saved {len(results)} results to {filename}")

In [8]:
# Is set to four in the method parameters -> call if change
num_processes=16

save_every = 50

results = {}

for i, (case_name, prefix_len, prefix, sampled_cets, target_cet, mean_cet) in enumerate(evaluate_seq_processing(model=model,
                                                                                                                dataset=helpdesk_test_dataset,
                                                                                                                device = torch.device("cpu"),
                                                                                                                samples_per_case=1000)
                                                                                                                ):

# for i, (case_name, prefix_len, prefix, sampled_cets, target_cet, mean_cet) in enumerate(evaluate_parallel_processing(model=model,
#                                                                                                                      dataset=helpdesk_test_dataset,
#                                                                                                                      samples_per_case=1000,
#                                                                                                                      processes=num_processes)):

    # print(case_name, prefix_len)
    # if (case_name != '1016'):
    #    break
    
    assert((case_name, prefix_len) not in results)
    
    results[(case_name, prefix_len)] = (prefix, target_cet, mean_cet, sampled_cets)
    
    if (i + 1) % save_every == 0:
        save_chunk(results, i)
        results = {}

if len(results):
    save_chunk(results, i)

  2%|▏         | 13/535 [00:59<56:12,  6.46s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_050.pkl


  5%|▌         | 27/535 [01:43<26:42,  3.15s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_100.pkl


  8%|▊         | 43/535 [02:27<25:38,  3.13s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_150.pkl


 10%|█         | 55/535 [03:12<26:52,  3.36s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_200.pkl


 12%|█▏        | 66/535 [04:06<36:31,  4.67s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_250.pkl


 15%|█▌        | 81/535 [04:51<22:10,  2.93s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_300.pkl


 18%|█▊        | 94/535 [05:40<29:38,  4.03s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_350.pkl


 20%|██        | 107/535 [06:26<24:08,  3.38s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_400.pkl


 23%|██▎       | 121/535 [07:15<27:33,  3.99s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_450.pkl


 25%|██▌       | 135/535 [07:58<19:55,  2.99s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_500.pkl


 28%|██▊       | 148/535 [08:42<20:21,  3.16s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_550.pkl


 30%|███       | 162/535 [09:29<24:32,  3.95s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_600.pkl


 33%|███▎      | 175/535 [10:17<25:13,  4.20s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_650.pkl


 36%|███▌      | 190/535 [11:00<16:09,  2.81s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_700.pkl


 38%|███▊      | 205/535 [11:47<16:13,  2.95s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_750.pkl


 41%|████      | 217/535 [12:37<28:38,  5.40s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_800.pkl


 43%|████▎     | 232/535 [13:20<14:42,  2.91s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_850.pkl


 46%|████▌     | 247/535 [14:05<13:39,  2.84s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_900.pkl


 48%|████▊     | 259/535 [14:52<18:50,  4.10s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_950.pkl


 51%|█████     | 272/535 [15:40<15:39,  3.57s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1000.pkl


 53%|█████▎    | 284/535 [16:26<16:38,  3.98s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1050.pkl


 56%|█████▌    | 298/535 [17:10<13:29,  3.42s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1100.pkl


 58%|█████▊    | 312/535 [17:54<11:15,  3.03s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1150.pkl


 61%|██████▏   | 328/535 [18:45<13:54,  4.03s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1200.pkl


 64%|██████▎   | 341/535 [19:26<08:09,  2.52s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1250.pkl


 67%|██████▋   | 356/535 [20:11<10:05,  3.38s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1300.pkl


 69%|██████▉   | 371/535 [20:57<08:22,  3.06s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1350.pkl


 72%|███████▏  | 387/535 [21:39<06:20,  2.57s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1400.pkl


 75%|███████▌  | 402/535 [22:24<07:05,  3.20s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1450.pkl


 78%|███████▊  | 416/535 [23:13<07:34,  3.82s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1500.pkl


 80%|████████  | 430/535 [24:02<05:54,  3.37s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1550.pkl


 83%|████████▎ | 442/535 [24:51<05:05,  3.29s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1600.pkl


 85%|████████▌ | 455/535 [25:38<03:40,  2.75s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1650.pkl


 88%|████████▊ | 470/535 [26:26<03:49,  3.54s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1700.pkl


 90%|█████████ | 483/535 [27:11<02:23,  2.75s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1750.pkl


 93%|█████████▎| 497/535 [27:59<03:18,  5.23s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1800.pkl


 95%|█████████▌| 509/535 [28:45<01:31,  3.54s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1850.pkl


 98%|█████████▊| 523/535 [29:33<00:36,  3.07s/it]

Saved 50 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1900.pkl


100%|██████████| 535/535 [30:12<00:00,  3.39s/it]

Saved 46 results to ../../../../../../../../data/Helpdesk/eval_camargo_sl5/results_part_1946.pkl



