<a href="https://colab.research.google.com/github/tomyaacov/process_mining/blob/main/build_acceptor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# install packages
!pip install aalpy
!pip install pm4py

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
# experiment parameters
dfa_params = [
    {"num_states": 15, "alphabet": ["A", "B", "C"], "num_accepting_states": 8}
]
word_max_len = 10
cv_folds_num = 2

In [3]:
# util functions
def is_accepted(dfa, word):
  if len(word) > 0:
    return dfa.execute_sequence(dfa.initial_state, word)[-1]
  else:
    return dfa.initial_state.is_accepting

In [4]:
# data generation
import itertools
import random
from aalpy.utils import generate_random_dfa
all_data = []
for param in dfa_params:
  all_data.append([])
  dfa = generate_random_dfa(**param)
  for length in range(word_max_len):
    for comb in itertools.product(param["alphabet"], repeat=length):
      word = tuple(comb)
      all_data[-1].append((word, is_accepted(dfa, word)))
  random.shuffle(all_data[-1])


In [5]:
import pandas as pd
import time
from aalpy.learning_algs.deterministic_passive.RPNI import run_RPNI
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
results = []
table_column_names = ['algorithm', 'time', "accuracy", "precision", "recall", "F-score"]
for data in all_data:
  results.append(pd.DataFrame(columns=table_column_names))
  # RPNI
  predicted = []
  actual = []
  learning_times = []
  for i in range(cv_folds_num):
    test = [x for idx, x in enumerate(data) if idx % cv_folds_num  == i]
    train = [x for idx, x in enumerate(data) if idx % cv_folds_num != i]
    start_time = time.time()
    model = run_RPNI(train, automaton_type='dfa')
    learning_times.append(time.time() - start_time)
    for w, l in test:
      predicted.append(int(is_accepted(model, w)))
      actual.append(int(l))
  precision, recall, fscore, support = precision_recall_fscore_support(actual, predicted, beta=1)
  results[-1].loc[len(results[-1])] = ["RPNI", sum(learning_times), accuracy_score(actual, predicted), precision, recall, fscore]


PTA Construction Time: 0.81
Current automaton size: 15
RPNI Learning Time: 41.41
RPNI Learned 15 state automaton.
PTA Construction Time: 0.24
Current automaton size: 15
RPNI Learning Time: 34.39
RPNI Learned 15 state automaton.


In [6]:
results[0]

Unnamed: 0,algorithm,time,accuracy,precision,recall,F-score
0,RPNI,76.867634,1.0,"[1.0, 1.0]","[1.0, 1.0]","[1.0, 1.0]"


In [7]:
actual[:20]

[0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0]

In [8]:
predicted[:20]

[0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0]