# Import packages

In [None]:
import copy
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch.nn.functional as F
from torch.utils.data import DataLoader

from skpm import event_logs

from ppm.datasets import ContinuousTraces
from ppm.datasets.event_logs import EventFeatures, EventLog, EventTargets
from ppm.datasets.utils import continuous
from ppm.models import NextEventPredictor
from ppm.engine.utils import load_checkpoint

from model_training.utils import parse_args, get_model_config, prepare_data


Disabling PyTorch because PyTorch >= 2.1 is required but found 2.0.1


# Load dataset 

In [3]:
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)

NUMERICAL_FEATURES = [
    "accumulated_time",
    "day_of_month",
    "day_of_week",
    "day_of_year",
    "hour_of_day",
    "min_of_hour",
    "month_of_year",
    "sec_of_min",
    "secs_within_day",
    "week_of_year",
]

In [4]:
args = parse_args()

config = {
    # args to pop before logging
    "project_name": args.project_name,
    "checkpoint_path": args.checkpoint_path,
    # args to log
    "log": args.dataset,
    "device": args.device,
    # architecture
    "backbone": args.backbone,
    "rnn_type": args.rnn_type,
    "embedding_size": args.embedding_size,
    "hidden_size": args.hidden_size,
    "n_layers": args.n_layers,
    # hyperparameters
    "batch_size": args.batch_size,       
    # features and tasks
    "categorical_features": args.categorical_features,
    "continuous_features": (
        NUMERICAL_FEATURES
        if (
            args.continuous_features is not None
            and "all" in args.continuous_features
        )
        else args.continuous_features
    ),
    "categorical_targets": args.categorical_targets,
    "continuous_targets": args.continuous_targets,
    "strategy": args.strategy,
}

In [5]:
log = getattr(event_logs, config["log"])()
train, test = prepare_data(
    log.dataframe, log.unbiased_split_params, NUMERICAL_FEATURES
)  # this is my current code for the fine-tuning experiments
event_features = EventFeatures(
    categorical=config["categorical_features"],
    numerical=config["continuous_features"],
)
event_targets = EventTargets(
    categorical=config["categorical_targets"],
    numerical=config["continuous_targets"],
)

train_log = EventLog(
    dataframe=train,
    case_id="case_id",
    features=event_features,
    targets=event_targets,
    train_split=True,
    name=config["log"],
)

test_log = EventLog(
    dataframe=test,
    case_id="case_id",
    features=event_features,
    targets=event_targets,
    train_split=False,
    name=config["log"],
    vocabs=train_log.get_vocabs(),
)

dataset_device = (
    config["device"]
    if config["backbone"]
    not in ["gpt2", "llama32-1b", "llama2-7b", "qwen25-05b"]
    else "cpu"
)

train_dataset = ContinuousTraces(
    log=train_log,
    refresh_cache=True,
    device=dataset_device,
)
test_dataset = ContinuousTraces(
    log=test_log,
    refresh_cache=True,
    device=dataset_device,
)

train_loader = DataLoader(
    train_dataset,
    batch_size=config["batch_size"],
    shuffle=False,
    collate_fn=continuous,
)

test_loader = DataLoader(
    test_dataset,
    batch_size=config["batch_size"],
    shuffle=False,
    collate_fn=continuous,
)

In [6]:
# Loading a pre-trained model
model_config = get_model_config(test_log, config)

model = NextEventPredictor(**model_config).to(device=config["device"])
ckpt = load_checkpoint(config["checkpoint_path"], map_location=config["device"])
model.load_state_dict(ckpt)


 [*] Loading checkpoint from D:/PycharmProjects/xAI-PPM/persisted_models/suffix/BPI17_lstm_25epochs_num_features_preserved.pth succeed!


  return self.fget.__get__(instance, owner)()


<All keys matched successfully>

In [7]:
# Extracting sequences that predicted to have "O_Accepted" as the next activity of the given trace

o_cancelled_token = 14
o_cancelled_ids = []
o_cancelled_cases = []


for ind, batch in enumerate(test_loader):
    x_cat, x_num, _, _ = batch # type: ignore
    x_cat, x_num = x_cat[:, :-1, :], x_num[:, :-1, :]
    x_cat, x_num = (x_cat.to(dataset_device), x_num.to(dataset_device))

    attention_mask = (x_cat[..., 0] != 0).long()
    out, _ = model(x_cat=x_cat, x_num=x_num, attention_mask=attention_mask)
    pred_last_event = torch.argmax(out['next_activity'][:, -1, :]).cpu().item()
    # print(pred_last_event)
    if pred_last_event == o_cancelled_token:
        o_cancelled_ids.append(ind)     
        case_to_explain = np.concatenate([x_cat.numpy(), x_num.numpy()], axis=-1)
        o_cancelled_cases.append(case_to_explain)     

print(f'The following {len(o_cancelled_cases)} cases that derived "O_Cancelled" prediction for the next activity will be explained')

The following 185 cases that derived "O_Cancelled" prediction for the next activity will be explained


# Generate prototype explanations

In [1]:
from collections import namedtuple
from pathlib import Path

import numpy as np
import pandas as pd
import tensorflow as tf

from map.map_explainer import Explainer
from map.utils import ConceptProperties

In [None]:
output_dir = Path('D:/PycharmProjects/xAI-PPM/output')
explainer_name = 'map_explainer'

output_dir_ex = output_dir / explainer_name
output_dir_ex.mkdir(parents=True, exist_ok=True)

n_concepts = 4
epochs = 10


exp = Explainer(input_shape=np.shape(train.X[0]),
                output_directory=output_dir_ex,
                n_concepts=n_concepts,
                latent_dim=n_concepts * 5,
                epochs=epochs,
                batch_size=32)

In [None]:
fit_explainer = not Path(output_dir_ex / "map.h5").exists()
if fit_explainer:
    exp.fit_explainer(
        classifier=model,
        X=train.X)
else:
    exp.explainer.build(input_shape=np.shape(train.X))
    exp.explainer.load_weights(str(output_dir_ex / 'map.h5'))

X_concepts_kmeans, latent_centers = exp.get_concepts_kmeans(train.X)
concept_labels = model(X_concepts_kmeans)
latent = exp.explainer.encoder(test.X)

In [None]:
# completeness & importance
y_pred = model(test.X)

cp = ConceptProperties()
map_instance_concept = cp.get_closest_rec_concept_to_instance(test.X, latent.numpy(), latent_centers)

pd.DataFrame({
    "model": "MAP",
    "accuracy": cp.get_completness(test.y, y_pred),
    "output_dir": output_dir,
    "n_concepts": n_concepts,
    "concept representability": cp.KL_divergence_performance(test.X[:, :, 0], latent),
    "reconstructed concept representability": cp.KL_divergence_performance(test.X[:, :, 0],
                                                                            map_instance_concept[:, :, 0]),
    "latent_centers": [latent_centers]
}, index=[0]) \
    .to_csv(output_dir / "completeness_importance_concept_map.csv")
    