## Imports and Setup
Here I will install necessary libraries and do all imports necessary for the notebook.

In [1]:
import io
import os
import random
import pdb
import wget
import argparse
from multiprocessing import Pool
import json
from tqdm import tqdm
import zipfile
from datetime import datetime

import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn import utils
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras import regularizers
from keras import layers
from matplotlib import pyplot as plt

from processtransformer.models import transformer
from processtransformer.models.transformer import TokenAndPositionEmbedding, TransformerBlock

import pm4py




In [2]:
# tell tf to use gpu
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_memory_growth(gpus[0], True)
  except RuntimeError as e:
    print(e)

In [3]:
import warnings
warnings.filterwarnings("ignore")

# Download Data
Here we are downloading the necessary data to evaluate the benchmarks

In [4]:
data_dir = "./datasets/"
if not os.path.exists(data_dir):
  os.mkdir(data_dir)

## Show a Log

In [5]:
model1_log = pm4py.read_xes('Log_Model1.xes')

parsing log, completed traces :: 100%|██████████| 12000/12000 [00:02<00:00, 4464.80it/s]


In [6]:
model1_log

Unnamed: 0,concept:name,time:timestamp,case:concept:name
0,A,1970-04-26 19:46:40+00:00,0
1,B,1970-04-26 19:46:41+00:00,0
2,C,1970-04-26 19:46:42+00:00,0
3,D,1970-04-26 19:46:43+00:00,0
4,E1,1970-04-26 19:46:44+00:00,0
...,...,...,...
155995,E2,1970-04-28 15:06:35+00:00,11999
155996,F,1970-04-28 15:06:36+00:00,11999
155997,G,1970-04-28 15:06:37+00:00,11999
155998,H,1970-04-28 15:06:38+00:00,11999


# Helper Functions
These functions are created for convenience of operating with logs containing proces variants

In [7]:
def specify_variant(variants_log, trace_num):
  '''
  Takes a log that is a list of unique variants of a process model and returns
  the variant according to the specified trace number
  '''
  return list(variants_log.loc[variants_log['case:concept:name']==str(trace_num), 'concept:name'])

def count_traces(log):
  return log['case:concept:name'].nunique()

def count_matching_logs(variant, log):
  '''
  Returns the number of times the specified variant is modelled within the log
  '''
  # Group by trace and gather events into a list to compare to variant
  grouped = log.groupby('case:concept:name')['concept:name'].apply(list)
  # Sum each occurrence of a variant to get total occurrences
  return grouped.apply(lambda x: 1 if x == variant else 0).sum()

def filter_out_variant(variant, log):
  ''' Returns a log with the specified variant filtered out from it '''
  # Group by trace and gather events into a list to compare to variant
  grouped = log.groupby('case:concept:name')['concept:name'].apply(list)

  # Check if the ordered set of unique events values for each case matches the variant
  filtered_cases = grouped[grouped.apply(lambda x: x != variant)]

  # Get the indices for cases that don't match the variant
  filtered_case_names = filtered_cases.index

  # Filter the original DataFrame based on the cases that don't match the variant
  filtered_log = log[log['case:concept:name'].isin(filtered_case_names)]

  print(f'Removed {count_traces(log) - count_traces(filtered_log)} cases matching variant')

  return filtered_log

def create_zip(directory, zip_filename):
    with zipfile.ZipFile(zip_filename, 'w') as zipf:
        for root, _, files in os.walk(directory):
            for file in files:
                zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), directory))


## EDA

In [8]:
m1_var = pm4py.read_xes('Variants_Model1.xes')

parsing log, completed traces :: 100%|██████████| 120/120 [00:00<00:00, 4472.97it/s]


In [9]:
m1_var

Unnamed: 0,concept:name,time:timestamp,case:concept:name
0,A,1970-04-26 19:46:41+00:00,0
1,B,1970-04-26 19:46:42+00:00,0
2,C,1970-04-26 19:46:43+00:00,0
3,D,1970-04-26 19:46:44+00:00,0
4,E1,1970-04-26 19:46:45+00:00,0
...,...,...,...
1555,E1,1970-04-26 20:12:36+00:00,119
1556,F,1970-04-26 20:12:37+00:00,119
1557,G,1970-04-26 20:12:38+00:00,119
1558,H,1970-04-26 20:12:39+00:00,119


In [10]:
m1_var1 = specify_variant(m1_var, 0)

In [11]:
filter_out_variant(m1_var1, model1_log)

Removed 90 cases matching variant


Unnamed: 0,concept:name,time:timestamp,case:concept:name
0,A,1970-04-26 19:46:40+00:00,0
1,B,1970-04-26 19:46:41+00:00,0
2,C,1970-04-26 19:46:42+00:00,0
3,D,1970-04-26 19:46:43+00:00,0
4,E1,1970-04-26 19:46:44+00:00,0
...,...,...,...
155995,E2,1970-04-28 15:06:35+00:00,11999
155996,F,1970-04-28 15:06:36+00:00,11999
155997,G,1970-04-28 15:06:37+00:00,11999
155998,H,1970-04-28 15:06:38+00:00,11999


In [12]:
count_matching_logs(m1_var1, model1_log)

90

In [13]:
model1_log.groupby('case:concept:name')['concept:name'].apply(list)

case:concept:name
0       [A, B, C, D, E1, E5, E2, E4, E3, F, G, H, I]
1       [A, B, C, D, E3, E5, E4, E1, E2, F, G, H, I]
10      [A, B, C, D, E4, E5, E3, E2, E1, F, G, H, I]
100     [A, B, C, D, E3, E2, E1, E5, E4, F, G, H, I]
1000    [A, B, C, D, E1, E3, E2, E4, E5, F, G, H, I]
                            ...                     
9995    [A, B, C, D, E1, E2, E3, E4, E5, F, G, H, I]
9996    [A, B, C, D, E3, E5, E4, E2, E1, F, G, H, I]
9997    [A, B, C, D, E3, E4, E2, E5, E1, F, G, H, I]
9998    [A, B, C, D, E2, E5, E3, E1, E4, F, G, H, I]
9999    [A, B, C, D, E2, E1, E5, E3, E4, F, G, H, I]
Name: concept:name, Length: 12000, dtype: object

In [14]:
log = model1_log
var = m1_var1
grouped = log.groupby('case:concept:name')['concept:name'].apply(list)
filtered_cases = grouped[grouped.apply(lambda x: x == var)]
cases = filtered_cases.index.unique().to_list()
neg_cases = log.loc[~log['case:concept:name'].isin(cases), 'case:concept:name'].unique()

In [15]:
len(cases) + len(neg_cases) == len(log['case:concept:name'].unique())

True

# Defining Custom Data Handling
Here we are using the Objects from ProcessTransformer source code as a base and customizing functions within it to provide our own custom functions for loading variants and evaluating the metrics for process understanding.

In [16]:
class LogsDataProcessor:
    def __init__(self, name, log_filepath, variants_filepath, columns, dir_path = "./datasets/processed", pool = 1):
        """Provides support for processing raw logs.
        Args:
            name: str: Dataset name
            log_filepath: str: Path to the generated model log
            variants_filepath: str: Path to the model variants
            columns: list: name of column names
            dir_path:  str: Path to directory for saving the processed dataset
            pool: Number of CPUs (processes) to be used for data processing
        """
        self._name = name
        self._log_filepath = log_filepath
        self._variants_filepath = variants_filepath
        self._org_columns = columns
        self._dir_path = dir_path
        if not os.path.exists(f"{dir_path}/{self._name}/processed"):
            os.makedirs(f"{dir_path}/{self._name}/processed")
        self._dir_path = f"{self._dir_path}/{self._name}/processed"
        self._pool = pool
        self._log = self._process_log()
        self._variants = self._process_variants()

    def _load_xes(self, var):
        if var:
          df = pm4py.read_xes(self._variants_filepath)
          self.variants = {x: specify_variant(df, x) for x in df['case:concept:name'].unique()}
        else:
          df = pm4py.read_xes(self._log_filepath)
        df = df[self._org_columns]
        df.columns = ["case:concept:name",
            "concept:name", "time:timestamp"]
        df["concept:name"] = df["concept:name"].str.lower()
        df["concept:name"] = df["concept:name"].str.replace(" ", "-")
        df["time:timestamp"]= pd.to_datetime(df["time:timestamp"],
            dayfirst=True).map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))
        return df

    def _extract_logs_metadata(self, df):
        x_keys = ["[PAD]","[BOS]", "[EOS]"]
        y_keys = ["[EOS]"]
        activities = list(df["concept:name"].unique())
        x_keys.extend(activities)
        y_keys.extend(activities)
        x_val = range(len(x_keys))
        y_val = range(len(y_keys))

        coded_activity = dict({"x_word_dict":dict(zip(x_keys, x_val))})
        code_activity_normal = dict({"y_word_dict": dict(zip(y_keys, y_val))})

        coded_activity.update(code_activity_normal)
        coded_json = json.dumps(coded_activity)
        with open(f"{self._dir_path}/metadata.json", "w") as metadata_file:
            metadata_file.write(coded_json)

    def _next_activity_helper_func(self, df):
        case_id, case_name = "case:concept:name", "concept:name"
        processed_df = pd.DataFrame(columns = ["case_id",
        "prefix", "k", "next_act"])
        idx = 0
        unique_cases = df[case_id].unique()
        for _, case in tqdm(enumerate(unique_cases), total=len(unique_cases)):
            act = df[df[case_id] == case][case_name].to_list()
            act.insert(0, "[BOS]")
            act.append("[EOS]")
            for i in range(len(act) - 1):
                prefix = np.where(i == 0, act[0], " ".join(act[:i+1]))
                next_act = act[i+1]
                processed_df.at[idx, "case_id"]  =  case
                processed_df.at[idx, "prefix"]  =  prefix
                processed_df.at[idx, "k"] =  i
                processed_df.at[idx, "next_act"] = next_act
                idx = idx + 1
        return processed_df

    def _process_next_activity(self, df, train_list, test_list, test_vars):
        df_split = np.array_split(df, self._pool)
        with Pool(processes=self._pool) as pool:
            processed_df = pd.concat(pool.imap_unordered(self._next_activity_helper_func, df_split))
        train_df = processed_df[processed_df["case_id"].isin(train_list)]
        test_df = processed_df[processed_df["case_id"].isin(test_list)]
        # filename indicates which variants are left out for testing
        file_prefix = "_".join(test_vars)
        train_df.to_csv(f"{self._dir_path}/{file_prefix}_train.csv", index = False)
        test_df.to_csv(f"{self._dir_path}/{file_prefix}_test.csv", index = False)

    def _get_train_test_cases(self, log, test_vars):
      '''
      Gets the case ids from log that match the variants specified in vars
      '''
      # init matching cases
      test_list = []
      # get unique traces in the log
      unique_traces = log.groupby('case:concept:name')['concept:name'].apply(list)
      # find all cases matching the variant
      for var_ in test_vars:
        var = self.variants[var_]
        filtered_cases = unique_traces[unique_traces.apply(lambda x: [element.lower() for element in x] == [item.lower() for item in var])]
        case_matches = filtered_cases.index.unique().to_list()
        test_list += case_matches
      # convert to array of unique cases
      test_list = np.array(list(set(test_list)))
      # get train list as complement of cases from test list
      train_list = log.loc[~log['case:concept:name'].isin(test_list), 'case:concept:name'].unique()
      return train_list, test_list

    def process_test_data(self, test_vars = ['0']):
        # get train and test lists
        train_list, test_list = self._get_train_test_cases(self._log, test_vars)
        # process the log for next activity prediction task
        self._process_next_activity(self._log, train_list, test_list, test_vars)

    def _process_log(self):
      log = self._load_xes(var=False)
      # get log metadata
      self._extract_logs_metadata(log)
      return log

    def _process_variants(self):
      vars_df = self._load_xes(var=True)
      return vars_df



In [17]:
class LogsDataLoader:
    def __init__(self, name, dir_path = "./datasets/processed"):
        """Provides support for reading and
            pre-processing examples from processed logs.
        Args:
            name: str: name of the dataset as used during processing raw logs
            dir_path: str: Path to dataset directory
        """
        self._dir_path = f"{dir_path}/{name}/processed"

    def tokenize_data(self, df,
        x_wd, y_wd, max_case_length, shuffle=True):

        x = df["prefix"].values
        y = df["next_act"].values
        if shuffle:
            x, y = utils.shuffle(x, y)

        token_x = list()
        token_y = list()
        for _x, _y in zip(x,y):
            token_x.append([x_wd[s] for s in _x.split()])
            token_y.append(y_wd[_y])

        token_x = tf.keras.preprocessing.sequence.pad_sequences(
            token_x, maxlen=max_case_length)

        token_x = np.array(token_x, dtype=np.float32)
        token_y = np.array(token_y, dtype=np.float32)

        return token_x, token_y

    def get_max_case_length(self, train_x):
        train_token_x = list()
        for _x in train_x:
            train_token_x.append(len(_x.split()))
        return max(train_token_x)

    def load_eval_data(self, eval_id, use_val = False):

        train_df = pd.read_csv(f"{self._dir_path}/{eval_id}_train.csv")
        self.train_len = train_df['case_id'].nunique()
        # assign random 20% of prefixes for validation set
        if use_val:
            train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)
        test_df = pd.read_csv(f"{self._dir_path}/{eval_id}_test.csv")
        self.test_len = test_df['case_id'].nunique()
        max_case_length = self.get_max_case_length(train_df["prefix"].values)
        if use_val:
            return (train_df, val_df, test_df, max_case_length)
        else:
            return (train_df, test_df, max_case_length)

    def load_data(self):

      with open(f"{self._dir_path}/metadata.json", "r") as json_file:
              metadata = json.load(json_file)

      x_word_dict = metadata["x_word_dict"]
      y_word_dict = metadata["y_word_dict"]

      vocab_size = len(x_word_dict)
      total_classes = len(y_word_dict)

      return (x_word_dict, y_word_dict,
              vocab_size,
              total_classes)

# Evaluation Loader
Here we define an evaluation loader for each process model. The eval loaders handle generation of experiment data, training of the model, creation of simulation data, and computation of metrics.

In [18]:
# define different transformer architecture than base process transformer
def get_next_activity_model(max_case_length, vocab_size, output_dim,
    embed_dim = 36, num_heads = 4, ff_dim = 64, dropout=0.1, l1=1e-5, l2=1e-4):
    inputs = layers.Input(shape=(max_case_length,))
    x = TokenAndPositionEmbedding(max_case_length, vocab_size, embed_dim)(inputs)
    x = TransformerBlock(embed_dim, num_heads, ff_dim)(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Dense(ff_dim, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    outputs = layers.Dense(output_dim, kernel_regularizer=regularizers.L1L2(l1=l1, l2=l2), activation="softmax")(x)
    transformer = tf.keras.Model(inputs=inputs, outputs=outputs,
        name = "next_activity_transformer")
    return transformer

# override the processtransformer module's function
transformer.get_next_activity_model = get_next_activity_model

In [19]:
# have one EvalLoader class per model
class EvalLoader:
  # TODO: train and simulate the model for each id
  def __init__(self, model_number):
    self.model_name = 'Model' + str(model_number)
    self.log_filepath = 'Log_' + self.model_name + '.xes'
    self.variants_filepath = 'Variants_' + self.model_name + '.xes'
    self.processor = LogsDataProcessor(self.model_name, self.log_filepath, self.variants_filepath, columns = ["case:concept:name","concept:name", "time:timestamp"], pool=2)
    self.loader = LogsDataLoader(name = self.model_name)
    self.models = {}
    self.sims = {}
    self.metrics = {}

  def create_experiment_data(self, experiment_kind='LOVOCV'):
    if experiment_kind == 'LOVOCV':
      # iterate through all the variants and specify one as a variant each time
      for i, variant in enumerate(self.processor.var_nums):
        # skip it if the file already exists
        if not os.path.exists(f"{self.processor._dir_path}/{variant}_train.csv"):
          print(f'Generating Evaluation Data for Variant {i+1}/{len(self.processor.var_nums)}')
          self.processor.process_test_data(test_vars=[variant])

    else:
      # otherwise randomly leave out 20% of variants in three iterations
      # firstly check how many 20% variants have already been generated, and only go to a maximum of 3
      num_evals = 3 - ((len([f for f in os.listdir(self.processor._dir_path) if len(os.path.basename(f)) >= 20]))// 2)
      for i in range(num_evals):
        # choose 20% of variants
        num_vars = int(len(self.processor.variants) * 0.2)
        test_vars = random.sample(list(self.processor.variants.keys()), num_vars)
        file_prefix = "_".join(test_vars)
        # skip it if the file already exists
        if not os.path.exists(f"{self.processor._dir_path}/{file_prefix}_train.csv"):
          print(f'Generating Evaluation Data for 20% Split {i+1}/3')
          self.processor.process_test_data(test_vars=test_vars)

  def get_metadata(self):
    (x_wd, y_wd, vocab_size, total_classes) = self.loader.load_data()

    return (x_wd, y_wd, vocab_size, total_classes)

  def get_experiment_ids(self, experiment_kind):

    if experiment_kind == 'LOVOCV':
      # get all the LOVOCV experiment files
      train_test_files = [f for f in os.listdir(self.processor._dir_path) if (len(os.path.basename(f)) <= 20 and f.endswith('.csv') and 'sim' not in f)]
    else:
      # get all the 20% experiment files
      train_test_files = [f for f in os.listdir(self.processor._dir_path) if (len(os.path.basename(f)) > 20 and f.endswith('.csv') and 'sim' not in f)]
    # get identifiers for doing each train and test run
    train_test_ids = list(set([os.path.splitext(os.path.basename(f))[0].rsplit('_train', 1)[0].rsplit('_test', 1)[0] for f in train_test_files]))

    return train_test_ids

  def train_model(self, eval_id, 
                  use_val=False, learning_rate=0.001, batch_size=64, 
                  epochs=4, embed_dim = 36, num_heads = 4, 
                  ff_dim = 64, dropout=0.1, l1=1e-5, l2=1e-4):
    # load in model metadata for training
    (x_word_dict, y_word_dict, vocab_size, total_classes) = self.get_metadata()
    # get the train and test dataframe from loader
    if use_val:
      train_df, _, _, max_case_length = self.loader.load_eval_data(eval_id, use_val)
    else:
      train_df, _, max_case_length = self.loader.load_eval_data(eval_id)
    # tokenize for the transformer
    train_token_x, train_token_y = self.loader.tokenize_data(train_df,
    x_word_dict, y_word_dict, max_case_length)
    # init model
    transformer_model = get_next_activity_model(
      max_case_length=max_case_length,
      vocab_size=vocab_size,
      output_dim=total_classes,
      embed_dim=embed_dim,
      num_heads=num_heads,
      ff_dim=ff_dim,
      dropout=dropout,
      l1=l1,
      l2=l2
      )
    # compile model
    transformer_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),
      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
    # train model on data
    transformer_model.fit(train_token_x, train_token_y,
      epochs=epochs, batch_size=batch_size)
    model_metadata = (x_word_dict, y_word_dict, vocab_size, total_classes, max_case_length)
    self.models[eval_id] = (transformer_model, model_metadata)

    return transformer_model

  def sample(self, probs, temperature):
    # helper function to sample an index from softmax output
    a = np.log(probs) / temperature
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))

  def generate_simulation_data(self, model_id, sample=False, temperature=1.0, batch_size=128):
    # get the model
    model = self.models[model_id][0]
    # get model metadata
    (x_word_dict, y_word_dict, vocab_size, total_classes, max_case_length) = self.models[model_id][1]
    y_act_list = list(y_word_dict.keys())
    get_x_act_from_y = lambda y_ind: x_word_dict[y_act_list[y_ind]]
    # define path for output file
    output_file = f"{self.processor._dir_path}/{model_id}_sim.csv"
    if sample:
      output_file = f"{self.processor._dir_path}/{model_id}_sim_sample_{temperature}.csv"
    # get the number of traces to generate (should be equal to number of training traces)
    self.num_total_traces = self.loader.train_len + self.loader.test_len

    # create the base trace (just start token)
    start_token = x_word_dict["[BOS]"]
    end_token = x_word_dict["[EOS]"]
    base_trace = np.full((batch_size,1),start_token)
    # init list of generated traces
    generated = np.empty((0, max_case_length))

    # Generate traces in batches
    for batch_num in tqdm(range(0, self.num_total_traces, batch_size)):
      batch_size = min(batch_size, self.num_total_traces - batch_num)
      batch_traces = base_trace[:batch_size].copy()
      retain_traces = np.empty((0, max_case_length))
      for _ in range(max_case_length - 1):  # loop until max_case_length - 1 to accommodate for start_token
        # pad sequence prior to predicting
        batch_x = tf.keras.preprocessing.sequence.pad_sequences(
            batch_traces, maxlen=max_case_length)
        # get probabilities from model output for the entire batch
        next_activity_probs = model.predict(batch_x, verbose=0).astype('float64')
        if sample:
          next_activities = np.array([self.sample(probs, temperature) for probs in next_activity_probs])
        else:
          # get the class with the highest probability for each trace
          next_activities = np.argmax(next_activity_probs, axis=1)
        # get the encodings for the predictions within x
        next_acts_x = np.vectorize(get_x_act_from_y)(next_activities)
        next_acts_x_column = next_acts_x[:, np.newaxis]
        # update the batch traces with predictions
        batch_traces = np.concatenate((batch_traces, next_acts_x_column), axis=1)
        # check if end of trace token was generated for any trace
        end_mask = (next_acts_x == end_token)
        # only retain traces for future prediction if end token has not been generated
        # break the loop if all traces have generated the end token
        if np.all(end_mask):
          # pad to max lengths the same
          batch_traces = tf.keras.preprocessing.sequence.pad_sequences(
            batch_traces, maxlen=max_case_length)
          break
        batch_traces, ended_traces = batch_traces[~end_mask], batch_traces[end_mask]
        if len(ended_traces) > 0:
          # pad to make lengths the same
          ended_traces = tf.keras.preprocessing.sequence.pad_sequences(
            ended_traces, maxlen=max_case_length)
          retain_traces = np.vstack((retain_traces, ended_traces))


      # add the batch of generated traces to the result list
      generated = np.vstack((generated, batch_traces, retain_traces))

    # store generated file in object
    generated = pd.DataFrame(generated)
    self.sims[model_id] = generated
    # save the generated traces to a CSV file
    generated.to_csv(output_file, index=False)
    return generated

  def compute_metrics(self, model_id):
    # get raw dfs
    train_df, test_df, _ = self.loader.load_eval_data(model_id)
    sim_df = self.sims[model_id]
    variant_nums = model_id.split('_')
    variants = [self.processor.variants[x] for x in variant_nums]
    test_variants = np.array([" ".join(x).lower() for x in variants])
    # process all dfs into same form: list of traces represented by a single string each

    # sim traces
    x_word_dict = self.models[model_id][1][0]
    inverted_word_dict = {v: k for k, v in x_word_dict.items()}
    mapped_sim_traces = np.vectorize(inverted_word_dict.get)(sim_df.iloc[:,1:].values)
    sim_traces = np.array([' '.join([r for r in row.astype(str) if r not in ['[PAD]', '[EOS]', '[BOS]']]) for row in mapped_sim_traces])
    sim_variants = np.unique(sim_traces)

    train_traces = train_df.groupby('case_id').apply(lambda x: x.nlargest(1, 'k'))['prefix'].str.replace('[BOS] ','').values
    train_variants = np.unique(train_traces)

    test_traces = test_df.groupby('case_id').apply(lambda x: x.nlargest(1, 'k'))['prefix'].str.replace('[BOS] ','').values
    test_variants = np.unique(test_traces)
    # fitness
    fitness = 0
    for v in train_variants:
      occ_v_sim = np.count_nonzero(sim_traces == v)
      occ_v_train = np.count_nonzero(train_traces == v)
      fitness += min(occ_v_sim, occ_v_train)
    fitness /= len(train_traces)

    # precision
    precision = 0
    for v in sim_variants:
      occ_v_sim = np.count_nonzero(sim_traces == v)
      occ_v_test = np.count_nonzero(test_traces == v)
      occ_v_train = np.count_nonzero(train_traces == v)
      occ_v_train_test = occ_v_train + occ_v_test
      precision += min(occ_v_sim, occ_v_train_test)
    precision /= len(sim_traces)

    # generalisation
    general = 0
    for v in test_variants:
      occ_v_sim = np.count_nonzero(sim_traces == v)
      occ_v_test = np.count_nonzero(test_traces == v)
      general += min(occ_v_sim, occ_v_test)
    general /= len(test_traces)

    self.metrics[model_id] = [fitness, precision, general]

    return fitness, precision, general


  def get_average_metrics(self):
    avg_fitness = np.mean([x[0] for x in self.metrics.values()])
    avg_precision = np.mean([x[1] for x in self.metrics.values()])
    avg_generalisation = np.mean([x[2] for x in self.metrics.values()])
    error_fitness = np.std([x[0] for x in self.metrics.values()])
    error_precision = np.std([x[1] for x in self.metrics.values()])
    error_generalisation = np.std([x[2] for x in self.metrics.values()])
    return (avg_fitness, error_fitness), (avg_precision, error_precision), (avg_generalisation, error_generalisation)

  def evaluate_performance(self, model_id, df):
    # get model
    model = self.models[model_id][0]
    (x_word_dict, y_word_dict, vocab_size, total_classes, max_case_length) = self.models[model_id][1]
    # evaluate over all the prefixes (k) and save the results
    k, accuracies,fscores, precisions, recalls = [],[],[],[],[]
    for i in range(max_case_length):
        data_subset = df[df["k"]==i]
        if len(data_subset) > 0:
            token_x, token_y = self.loader.tokenize_data(data_subset, x_word_dict, y_word_dict, max_case_length)
            y_pred = np.argmax(model.predict(token_x, verbose=0), axis=1)
            accuracy = metrics.accuracy_score(token_y, y_pred)
            precision, recall, fscore, _ = metrics.precision_recall_fscore_support(
                token_y, y_pred, average="weighted")
            k.append(i)
            accuracies.append(accuracy)
            fscores.append(fscore)
            precisions.append(precision)
            recalls.append(recall)

    k.append(i + 1)
    accuracies.append(np.mean(accuracy))
    fscores.append(np.mean(fscores))
    precisions.append(np.mean(precisions))
    recalls.append(np.mean(recalls))

    return k, accuracies,fscores, precisions, recalls


In [20]:
# Function to write results to a text file
def write_results_to_file(model_params, evals, use_val=False):
    
    # Generate the current date and time
    current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    # ensure directory exists
    os.makedirs('results', exist_ok=True)
    # write to default path if none provided
    filename = f'results/results_{current_datetime}.txt'
    
    with open(filename, 'w') as file:

        # Write model parameters
        file.write("Model Parameters:\n")
        for key, value in model_params.items():
            file.write(f"{key}: {value}\n")
        file.write("\n")

        # Write average metrics for each model
        for i, eval in enumerate(evals):
            file.write(f'-------Model {i+1}/{len(evals)}-------\n')
            ((avg_fitness, error_fitness),
            (avg_precision, error_precision),
            (avg_generalisation, error_generalisation)) = eval.get_average_metrics()
            file.write(f'Average Fitness: {avg_fitness} +/- {error_fitness}\n')
            file.write(f'Average Precision: {avg_precision} +/- {error_precision}\n')
            file.write(f'Average Generalisation: {avg_generalisation} +/- {error_generalisation}\n')

            # Write train and test performance
            accuracies_train = np.array([])
            fscores_train = np.array([])
            precisions_train = np.array([])
            recalls_train = np.array([])
            accuracies_test = np.array([])
            fscores_test = np.array([])
            precisions_test = np.array([])
            recalls_test = np.array([])

            for j, model_id in enumerate(eval.models):
                file.write(f'Experiment {j+1}/{len(eval.models)}\n')
                # get train and test data
                train_df, test_df, _ = eval.loader.load_eval_data(model_id)
                # evaluate train performance
                k, accs, fs, ps, rs = eval.evaluate_performance(model_id, train_df)
                accuracies_train = np.append(accuracies_train, np.mean(accs))
                fscores_train = np.append(fscores_train, np.mean(fs))
                precisions_train = np.append(precisions_train, np.mean(ps))
                recalls_train = np.append(recalls_train, np.mean(rs))
                # evaluate test performance
                k, accs, fs, ps, rs = eval.evaluate_performance(model_id, test_df)
                accuracies_test = np.append(accuracies_test, np.mean(accs))
                fscores_test = np.append(fscores_test, np.mean(fs))
                precisions_test = np.append(precisions_test, np.mean(ps))
                recalls_test = np.append(recalls_test, np.mean(rs))

            file.write('-------Training Data-------\n')
            file.write('Average Accuracy: {}\n'.format(np.mean(accuracies_train)))
            file.write('Average F-Score: {}\n'.format(np.mean(fscores_train)))
            file.write('Average Precision: {}\n'.format(np.mean(precisions_train)))
            file.write('Average Recall: {}\n'.format(np.mean(recalls_train)))
            file.write('-------Testing Data-------\n')
            file.write('Average Accuracy: {}\n'.format(np.mean(accuracies_test)))
            file.write('Average F-Score: {}\n'.format(np.mean(fscores_test)))
            file.write('Average Precision: {}\n'.format(np.mean(precisions_test)))
            file.write('Average Recall: {}\n'.format(np.mean(recalls_test)))
            file.write("\n")

In [21]:
def hyperparameter_search(model_num, model_param_set):
    # define a function to do a hyperparameter search based on a single model (to save time)
    # first create a results file
    current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    result_file = f'results/results_{model_num}_{current_datetime}.txt'
    with open(result_file, 'w') as file:
        file.write(f"Model {model_num} Hyperparameter Search\n")
        # iterate thru param sets
        for model_params in model_param_set:
            file.write(f"Model Parameters: {model_params}\n")
            eval = EvalLoader(model_number = model_num)
            eval.create_experiment_data('20%')
            experiments_20 = eval.get_experiment_ids('20%')
            accuracies = np.array([])
            fscores = np.array([])
            precisions = np.array([])
            recalls = np.array([])
            for j,experiment in enumerate(experiments_20):
                print(f'Experiment {j+1}/{len(experiments_20)}')
                # train the model according to the split data generated
                eval.train_model(experiment,
                                use_val=True,
                                learning_rate=model_params['learning_rate'],
                                batch_size=model_params['batch_size'],
                                epochs=model_params['epochs'],
                                embed_dim = model_params['embed_dim'],
                                num_heads = model_params['num_heads'],
                                ff_dim = model_params['ff_dim'],
                                dropout=model_params['dropout'],
                                l1=model_params['l1'],
                                l2=model_params['l2'])
                # generate simulation data
                eval.generate_simulation_data(experiment, sample=False)
                # compute metrics
                eval.compute_metrics(experiment)
                # evaluate validation performance
                _, val_df, _, _ = eval.loader.load_eval_data(experiment, use_val=True)
                k, accs, fs, ps, rs = eval.evaluate_performance(experiment, val_df)
                accuracies = np.append(accuracies, np.mean(accs))
                fscores = np.append(fscores, np.mean(fs))
                precisions = np.append(precisions, np.mean(ps))
                recalls = np.append(recalls, np.mean(rs))
            # write average results to file
            file.write(f"Average Accuracy: {np.mean(accuracies)}\n")
            file.write(f"Average F-Score: {np.mean(fscores)}\n")
            file.write(f"Average Precision: {np.mean(precisions)}\n")
            file.write(f"Average Recall: {np.mean(recalls)}\n")

# Experiments
We will use the evaluation object already defined to perform the tests on each unique model. We firstly generate the train and test datasets for each experiment kind and save these. Next, we train a model for each train+test dataset. Finally, each model is used to generate a simulation log of the same size as the original reference log (100x the number of variants). Once each of the simulation logs is generated, we can compute the metrics for each experiemnt.

In [22]:
# define sets of hyperparameters
l1l2s = [1e-5, 1e-4, 1e-3, 1e-2]

model_params_set = []
for l1l2 in l1l2s:
    model_params_set.append(
        {'learning_rate': 0.001, 'batch_size': 256, 'epochs': 16, 'embed_dim': 16, 'num_heads': 2, 'ff_dim': 16, 'dropout': 0.2, 'l1': l1l2, 'l2': l1l2}
    )
# define model number
model_num = 1

# perform hyperparameter search
hyperparameter_search(model_num, model_params_set)

parsing log, completed traces :: 100%|██████████| 12600/12600 [00:05<00:00, 2421.11it/s]
parsing log, completed traces :: 100%|██████████| 126/126 [00:00<00:00, 3181.58it/s]


Experiment 1/3

Epoch 1/16

Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


100%|██████████| 99/99 [02:04<00:00,  1.26s/it]


Experiment 2/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


100%|██████████| 99/99 [01:58<00:00,  1.20s/it]


Experiment 3/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


100%|██████████| 99/99 [02:04<00:00,  1.26s/it]
parsing log, completed traces :: 100%|██████████| 12600/12600 [00:04<00:00, 2541.45it/s]
parsing log, completed traces :: 100%|██████████| 126/126 [00:00<00:00, 608.54it/s]


Experiment 1/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


100%|██████████| 99/99 [02:08<00:00,  1.30s/it]


Experiment 2/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


100%|██████████| 99/99 [02:06<00:00,  1.28s/it]


Experiment 3/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


100%|██████████| 99/99 [02:05<00:00,  1.27s/it]
parsing log, completed traces :: 100%|██████████| 12600/12600 [00:05<00:00, 2456.63it/s]
parsing log, completed traces :: 100%|██████████| 126/126 [00:00<00:00, 3000.83it/s]


Experiment 1/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


100%|██████████| 99/99 [01:58<00:00,  1.20s/it]


Experiment 2/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


100%|██████████| 99/99 [02:01<00:00,  1.22s/it]


Experiment 3/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


100%|██████████| 99/99 [02:00<00:00,  1.21s/it]
parsing log, completed traces :: 100%|██████████| 12600/12600 [00:05<00:00, 2433.48it/s]
parsing log, completed traces :: 100%|██████████| 126/126 [00:00<00:00, 3054.78it/s]


Experiment 1/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


100%|██████████| 99/99 [01:59<00:00,  1.21s/it]


Experiment 2/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


100%|██████████| 99/99 [01:59<00:00,  1.21s/it]


Experiment 3/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


100%|██████████| 99/99 [02:02<00:00,  1.23s/it]


In [107]:
# define model parameters for the whole testing cycle
model_params = {
  'epochs' :  16,
  'learning_rate' : 0.001,
  'batch_size' : 256,
  'use_sampling' : True,
  'temperature' : 1.25, 
  'embed_dim' : 16,
  'num_heads' : 2,
  'ff_dim' : 16,
  'dropout' : 0.2,
  'l1' : 1e-3,
  'l2' : 1e-3
}

In [83]:
# Get all the logs and variants and create processed versions
evals = []
# create the evaluation for each model
for i in range(6):
  model_num = i+1
  print(f'Model Number {model_num}')
  # init eval object
  eval = EvalLoader(model_number = model_num)
  # create eval data
  eval.create_experiment_data('20%')
  #eval.create_experiment_data('LOVOCV')
  evals.append(eval)

Model Number 1


parsing log, completed traces :: 100%|██████████| 12000/12000 [00:04<00:00, 2665.02it/s]
parsing log, completed traces :: 100%|██████████| 120/120 [00:00<00:00, 4795.45it/s]


Model Number 2


parsing log, completed traces :: 100%|██████████| 12800/12800 [00:04<00:00, 2717.48it/s]
parsing log, completed traces :: 100%|██████████| 128/128 [00:00<00:00, 2250.51it/s]


Model Number 3


parsing log, completed traces :: 100%|██████████| 12800/12800 [00:04<00:00, 2963.52it/s]
parsing log, completed traces :: 100%|██████████| 128/128 [00:00<00:00, 3673.25it/s]


Model Number 4


parsing log, completed traces :: 100%|██████████| 6400/6400 [00:01<00:00, 3414.78it/s]
parsing log, completed traces :: 100%|██████████| 64/64 [00:00<00:00, 4109.29it/s]


Model Number 5


parsing log, completed traces :: 100%|██████████| 12600/12600 [00:05<00:00, 2416.10it/s]
parsing log, completed traces :: 100%|██████████| 126/126 [00:00<00:00, 2927.17it/s]


Model Number 6


parsing log, completed traces :: 100%|██████████| 2700/2700 [00:01<00:00, 2665.20it/s]
parsing log, completed traces :: 100%|██████████| 27/27 [00:00<00:00, 3113.30it/s]


In [84]:
# train models
for i,eval in enumerate(evals):
  print(f'-------Model {i+1}/6-------')
  experiments_20 = eval.get_experiment_ids('20%')
  #experiments_LOVOCV = eval.get_experiment_ids('LOVOCV')
  for j,experiment in enumerate(experiments_20):
    print(f'Experiment {j+1}/{len(experiments_20)}')
    # train the model according to the split data generated
    eval.train_model(experiment,
                     learning_rate=model_params['learning_rate'],
                     batch_size=model_params['batch_size'],
                     epochs=model_params['epochs'],
                     embed_dim = model_params['embed_dim'],
                     num_heads = model_params['num_heads'],
                     ff_dim = model_params['ff_dim'],
                     dropout=model_params['dropout'],
                     l1=model_params['l1'],
                     l2=model_params['l2'])

-------Model 1/6-------
Experiment 1/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
Experiment 2/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
Experiment 3/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
-------Model 2/6-------
Experiment 1/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
Experiment 2/3
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/1

In [108]:
# produce simulation data
for i, eval in enumerate(evals):
    models = eval.models
    print(f'-------Model {i+1}/{len(evals)}-------')
    for j, model_id in enumerate(models):
        print(f'Experiment {j+1}/{len(models)}')
        # generate sim data if it has not already been generated
        eval.generate_simulation_data(model_id,
                                      sample=model_params['use_sampling'],
                                      temperature=model_params['temperature'])

-------Model 1/6-------
Experiment 1/3


100%|██████████| 94/94 [01:33<00:00,  1.00it/s]


Experiment 2/3


100%|██████████| 94/94 [01:32<00:00,  1.02it/s]


Experiment 3/3


100%|██████████| 94/94 [01:32<00:00,  1.02it/s]


-------Model 2/6-------
Experiment 1/3


100%|██████████| 100/100 [02:32<00:00,  1.52s/it]


Experiment 2/3


100%|██████████| 100/100 [02:35<00:00,  1.55s/it]


Experiment 3/3


100%|██████████| 100/100 [02:34<00:00,  1.55s/it]


-------Model 3/6-------
Experiment 1/3


100%|██████████| 100/100 [02:35<00:00,  1.55s/it]


Experiment 2/3


100%|██████████| 100/100 [02:30<00:00,  1.51s/it]


Experiment 3/3


100%|██████████| 100/100 [02:29<00:00,  1.50s/it]


-------Model 4/6-------
Experiment 1/3


100%|██████████| 50/50 [01:09<00:00,  1.39s/it]


Experiment 2/3


100%|██████████| 50/50 [01:09<00:00,  1.40s/it]


Experiment 3/3


100%|██████████| 50/50 [01:11<00:00,  1.42s/it]


-------Model 5/6-------
Experiment 1/3


100%|██████████| 99/99 [03:10<00:00,  1.93s/it]


Experiment 2/3


100%|██████████| 99/99 [03:10<00:00,  1.93s/it]


Experiment 3/3


100%|██████████| 99/99 [03:12<00:00,  1.94s/it]


-------Model 6/6-------
Experiment 1/3


100%|██████████| 22/22 [01:04<00:00,  2.95s/it]


Experiment 2/3


100%|██████████| 22/22 [01:03<00:00,  2.88s/it]


Experiment 3/3


100%|██████████| 22/22 [01:07<00:00,  3.06s/it]


In [109]:
for i, eval in enumerate(evals):
    models = eval.models
    print(f'-------Model {i+1}/{len(evals)}-------')
    for j, model_id in enumerate(models):
        # generate evaluation metrics for each experiment
        eval.compute_metrics(model_id)
        print(f'Finished Metrics for Experiment {j+1}/{len(models)}')

-------Model 1/6-------
Finished Metrics for Experiment 1/3
Finished Metrics for Experiment 2/3
Finished Metrics for Experiment 3/3
-------Model 2/6-------
Finished Metrics for Experiment 1/3
Finished Metrics for Experiment 2/3
Finished Metrics for Experiment 3/3
-------Model 3/6-------
Finished Metrics for Experiment 1/3
Finished Metrics for Experiment 2/3
Finished Metrics for Experiment 3/3
-------Model 4/6-------
Finished Metrics for Experiment 1/3
Finished Metrics for Experiment 2/3
Finished Metrics for Experiment 3/3
-------Model 5/6-------
Finished Metrics for Experiment 1/3
Finished Metrics for Experiment 2/3
Finished Metrics for Experiment 3/3
-------Model 6/6-------
Finished Metrics for Experiment 1/3
Finished Metrics for Experiment 2/3
Finished Metrics for Experiment 3/3


In [110]:
# write all results to a file
write_results_to_file(model_params, evals)

In [81]:
# report final metric averages
for i, eval in enumerate(evals):
  print(f'-------Model {i+1}/{len(evals)}-------')
  ((avg_fitness, error_fitness),
  (avg_precision, error_precision),
  (avg_generalisation, error_generalisation)) = eval.get_average_metrics()
  print(f'Average Fitness: {avg_fitness} +/- {error_fitness}')
  print(f'Average Precision: {avg_precision} +/- {error_precision}')
  print(f'Average Generalisation: {avg_generalisation} +/- {error_generalisation}')

-------Model 1/6-------
Average Fitness: 0.7922393581025546 +/- 0.061990213709805765
Average Precision: 0.7588333333333334 +/- 0.05306577853807336
Average Generalisation: 0.6239498690607798 +/- 0.034146749863781925
-------Model 2/6-------
Average Fitness: 0.6606899207507205 +/- 0.0627249362302162
Average Precision: 0.6550520833333334 +/- 0.06219274518988102
Average Generalisation: 0.6324879632424681 +/- 0.07731894701029371
-------Model 3/6-------
Average Fitness: 0.6171592809072384 +/- 0.16070598628392702
Average Precision: 0.6163802083333333 +/- 0.16374684829849695
Average Generalisation: 0.6129910469324645 +/- 0.17763000928651
-------Model 4/6-------
Average Fitness: 0.680391035705925 +/- 0.02774636358726318
Average Precision: 0.6531250000000001 +/- 0.045190748788238366
Average Generalisation: 0.5413951805488703 +/- 0.11592991217907364
-------Model 5/6-------
Average Fitness: 0.4480062892230454 +/- 0.13437738220897
Average Precision: 0.40568783068783065 +/- 0.1313495073980043
Average

In [None]:
# report train and test performance
for i, eval in enumerate(evals):
  print(f'-------Model {i+1}/{len(evals)}-------')
  accuracies_train = np.array([])
  fscores_train = np.array([])
  precisions_train = np.array([])
  recalls_train = np.array([])
  accuracies_test = np.array([])
  fscores_test = np.array([])
  precisions_test = np.array([])
  recalls_test = np.array([])
  for j, model_id in enumerate(eval.models):
    print(f'Experiment {j+1}/{len(eval.models)}')
    # get train and test data
    train_df, test_df, _ = eval.loader.load_eval_data(model_id)
    # evaluate train performance
    k, accs, fs, ps, rs = eval.evaluate_performance(model_id, train_df)
    accuracies_train = np.append(accuracies_train, np.mean(accs))
    fscores_train = np.append(fscores_train, np.mean(fs))
    precisions_train = np.append(precisions_train, np.mean(ps))
    recalls_train = np.append(recalls_train, np.mean(rs))
    # evaluate test performance
    k, accs, fs, ps, rs = eval.evaluate_performance(model_id, test_df)
    accuracies_test = np.append(accuracies_test, np.mean(accs))
    fscores_test = np.append(fscores_test, np.mean(fs))
    precisions_test = np.append(precisions_test, np.mean(ps))
    recalls_test = np.append(recalls_test, np.mean(rs))

  print('-------Training Data-------')
  print('Average Accuracy:', np.mean(accuracies_train))
  print('Average F-Score:', np.mean(fscores_train))
  print('Average Precision:', np.mean(precisions_train))
  print('Average Recall:', np.mean(recalls_train))
  print('-------Testing Data-------')
  print('Average Accuracy:', np.mean(accuracies_test))
  print('Average F-Score:', np.mean(fscores_test))
  print('Average Precision:', np.mean(precisions_test))
  print('Average Recall:', np.mean(recalls_test))


-------Model 1/6-------
Experiment 1/3
Experiment 2/3
Experiment 3/3
-------Training Data-------
Average Accuracy: 0.8313905492006636
Average F-Score: 0.8000173447031034
Average Precision: 0.7970633080819794
Average Recall: 0.8193470170007111
-------Testing Data-------
Average Accuracy: 0.7729096382847779
Average F-Score: 0.7458966061577709
Average Precision: 0.7462168636468278
Average Recall: 0.756688898162262
-------Model 2/6-------
Experiment 1/3
Experiment 2/3
Experiment 3/3
-------Training Data-------
Average Accuracy: 0.8370245588979636
Average F-Score: 0.7731030430870631
Average Precision: 0.7456059313644282
Average Recall: 0.8288757868428617
-------Testing Data-------
Average Accuracy: 0.8176273361200268
Average F-Score: 0.7527114855099294
Average Precision: 0.7278668344197818
Average Recall: 0.8085087029260283
-------Model 3/6-------
Experiment 1/3
Experiment 2/3
Experiment 3/3
-------Training Data-------
Average Accuracy: 0.8369966663328703
Average F-Score: 0.7736909090357399

# Testing

In [71]:
for eval in evals:
    eval.sample = EvalLoader.sample.__get__(eval, EvalLoader)
    eval.compute_metrics = EvalLoader.compute_metrics.__get__(eval, EvalLoader)

In [44]:
eval.compute_metrics(model_id='66_94_90_9_68_14_113_64_38_73_118_1_61_48_110_108_95_60_54_81_52_34_105_101')

In [None]:
sim_log = eval.generate_simulation_data(exp_id)
unique_rows, counts = np.unique(sim_log, axis=0, return_counts=True)
x_word_dict = eval.models[exp_id][1][0]
inverted_dict = {v: k for k, v in x_word_dict.items()}
mapped_sims = np.vectorize(inverted_dict.get)(unique_rows)
sorted_data = sorted(zip(mapped_sims, counts), key=lambda x: x[1], reverse=True)

for row, count in sorted_data[:15]:
    print(f"Row: {row}, Count: {count}")

Simulation data for 56_36_74_73_42_75_62_27_72_48_46_52 already exists
Row: ['[PAD]' '[PAD]' '[BOS]' 'a' 'b' 'c' 'd1' 'e' 'f' 'g' 'h2' 'i' 'j' 'k'
 'l1' 'm' 'n' 'o' '[EOS]'], Count: 6400


In [None]:
fitness, precision, generalisation = eval.compute_metrics(exp_id)
print(f'Fitness: {fitness}')
print(f'Precision: {precision}')
print(f'Generalisation: {generalisation}')

Fitness: 0.04167463200152934
Precision: 0.0340625
Generalisation: 0.0


In [None]:
for row, count in sorted_data[-10:]:
    print(f"Row: {row}, Count: {count}")

Row: ['[BOS]' 'a' 'b' 'c' 'd' 'e3' 'd' 'e1' 'e4' 'e2' 'f' 'g' 'h' 'i'], Count: 1
Row: ['[BOS]' 'a' 'b' 'c' 'd' 'e3' 'd' 'e5' 'e2' 'e4' 'f' 'g' 'h' 'i'], Count: 1
Row: ['[BOS]' 'a' 'b' 'c' 'd' 'e3' 'd' 'e2' 'e5' 'e4' 'f' 'g' 'h' 'i'], Count: 1
Row: ['[BOS]' 'a' 'b' 'c' 'd' 'e3' 'e1' 'e3' 'e4' 'e2' 'f' 'g' 'h' 'i'], Count: 1
Row: ['[BOS]' 'a' 'b' 'c' 'd' 'e3' 'e5' 'e5' 'e2' 'e1' 'f' 'g' 'h' 'i'], Count: 1
Row: ['[BOS]' 'a' 'b' 'c' 'd' 'e3' 'e2' 'b' 'e4' 'e5' 'f' 'g' 'h' 'i'], Count: 1
Row: ['[BOS]' 'a' 'b' 'c' 'd' 'e3' 'e2' 'e2' 'e4' 'e5' 'f' 'g' 'h' 'i'], Count: 1
Row: ['[BOS]' 'a' 'b' 'c' 'd' 'e3' 'e4' 'c' 'e2' 'e5' 'e1' 'e5' 'e3' 'e4'], Count: 1
Row: ['[BOS]' 'a' 'b' 'c' 'd' 'e3' 'e4' 'e4' 'e1' 'e5' 'e2' 'f' 'g' 'h'], Count: 1
Row: ['[BOS]' 'a' 'b' 'c' 'd' 'e3' 'e3' 'e5' 'e4' 'e2' 'f' 'g' 'h' 'i'], Count: 1


In [None]:
y_word_dict = eval.models[exp_id][1][1]
y_word_dict

{'[EOS]': 0,
 'a': 1,
 'b': 2,
 'c': 3,
 'd1': 4,
 'e': 5,
 'f': 6,
 'g': 7,
 'h2': 8,
 'h1': 9,
 'i': 10,
 'j': 11,
 'k': 12,
 'l1': 13,
 'm': 14,
 'n': 15,
 'o': 16,
 'd2': 17,
 'l2': 18}

In [None]:
eval = evals[3]
exp_id = '56_36_74_73_42_75_62_27_72_48_46_52'
train_df, test_df, _ = eval.loader.load_eval_data(exp_id)

In [None]:
# get all traces
train_traces = train_df.groupby('case_id').apply(lambda x: x.nlargest(1, 'k'))['prefix'].str.replace('\[BOS\] ','').values
train_traces

array(['a b c d1 e f g h2 h1 i j k l1 m n o',
       'a b c d1 d2 e f g h2 i j k l1 m n o',
       'a b c d1 e f g h2 i j k l2 m n o', ...,
       'a b c d2 e f g h1 i j k l1 l2 m n o',
       'a b c d1 d2 e f g h1 h2 i j k l2 m n o',
       'a b c d1 e f g h1 i j k l1 m n o'], dtype=object)

In [None]:
sim_df = eval.sims[exp_id]
sim_df
x_word_dict = eval.models[exp_id][1][0]
inverted_word_dict = {v: k for k, v in x_word_dict.items()}
mapped_sim_traces = np.vectorize(inverted_word_dict.get)(sim_df.values)
sim_traces = np.array([' '.join(row.astype(str)) for row in mapped_sim_traces])
# remove padding, BOS, and EOS
#sim_traces = np.array([x.replace('[BOS]','').replace('[EOS]','').replace('[PAD]','').strip() for x in sim_traces])
#sim_traces

In [None]:
variant_nums = exp_id.split('_')
variants = [eval.processor.variants[x] for x in variant_nums]
variants = [" ".join(x).lower() for x in variants]

In [None]:
variants

['a b c d e4 e1 e5 e2 e3 f g h i',
 'a b c d e2 e3 e4 e5 e1 f g h i',
 'a b c d e3 e4 e1 e2 e5 f g h i',
 'a b c d e2 e1 e3 e5 e4 f g h i',
 'a b c d e5 e3 e1 e2 e4 f g h i',
 'a b c d e3 e2 e5 e1 e4 f g h i',
 'a b c d e5 e4 e3 e2 e1 f g h i',
 'a b c d e4 e3 e5 e2 e1 f g h i',
 'a b c d e4 e5 e2 e3 e1 f g h i',
 'a b c d e1 e4 e5 e2 e3 f g h i',
 'a b c d e5 e2 e1 e3 e4 f g h i',
 'a b c d e5 e3 e2 e1 e4 f g h i',
 'a b c d e2 e4 e5 e3 e1 f g h i',
 'a b c d e5 e1 e3 e4 e2 f g h i',
 'a b c d e2 e5 e4 e3 e1 f g h i',
 'a b c d e2 e5 e4 e1 e3 f g h i',
 'a b c d e4 e2 e1 e3 e5 f g h i',
 'a b c d e3 e1 e5 e4 e2 f g h i',
 'a b c d e4 e5 e3 e1 e2 f g h i',
 'a b c d e1 e2 e4 e3 e5 f g h i',
 'a b c d e1 e2 e4 e5 e3 f g h i',
 'a b c d e2 e1 e4 e3 e5 f g h i',
 'a b c d e4 e2 e5 e1 e3 f g h i',
 'a b c d e2 e5 e3 e4 e1 f g h i']

In [None]:
eval.models[exp_id][1][0]

{'[PAD]': 0,
 '[BOS]': 1,
 '[EOS]': 2,
 'a': 3,
 'b': 4,
 'c': 5,
 'd': 6,
 'e1': 7,
 'e5': 8,
 'e2': 9,
 'e4': 10,
 'e3': 11,
 'f': 12,
 'g': 13,
 'h': 14,
 'i': 15}