In [6]:
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.optimize import minimize
from mutation import MyMutation
from crossover import TraceCrossover
from encoder import Encoder
from sampling import MySampling
from callback import UpdatePopulationCallback, UpdatePopCallback
import numpy as np
from tools import Tools
from survival import MySurvival
from Declare4Py.ProcessModels.DeclareModel import DeclareModel
from Declare4Py.D4PyEventLog import D4PyEventLog
import warnings
import random
from Declare4Py.ProcessMiningTasks.ConformanceChecking.MPDeclareResultsBrowser import MPDeclareResultsBrowser
from Declare4Py.ProcessMiningTasks.ConformanceChecking.MPDeclareAnalyzer import MPDeclareAnalyzer
import pandas as pd
import logging
from terminator import MyTermination
from problem import Problem_single_ElementWise, MyProblem_Problem, MyProblem_Problem2, Problem_multi_ElementWise, Problem_single_ElementWise_noConstraints
from pymoo.algorithms.soo.nonconvex.ga import GA
from pymoo.termination.default import DefaultSingleObjectiveTermination
import testSetup


logging.getLogger('matplotlib').setLevel(logging.WARNING)
warnings.filterwarnings("ignore", ".*feasible.*")

# n_events = 5-10-15-20
# n_activities = 5-10-15-20-25-30





declare = DeclareModel().parse_from_file("../declare_models/model1.decl")
model_constraints = declare.get_decl_model_constraints()
activities_name = declare.get_model_activities()

encoder = Encoder(activities_name)

variable_boundaries = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
timestamps = Tools.generate_random_timestamps(len(activities_name))
case_concept_name = ['1'] * len(activities_name)

data = {
    'case:concept:name': case_concept_name,
    'concept:name': activities_name,
    'timestamp': timestamps,
}

dataframe = pd.DataFrame(data)


dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'])


event_log = D4PyEventLog()
event_log.log = dataframe

# TODO make some info
event_log.timestamp_key = "timestamp"
event_log.activity_key = "concept:name"

event_log.to_eventlog()

basic_checker = MPDeclareAnalyzer(log=event_log, declare_model=declare, consider_vacuity=False)
conf_check_res: MPDeclareResultsBrowser = basic_checker.run()

print(conf_check_res.get_metric(trace_id=0, metric="num_activations"))
print("-------------------------------------------")


[None, 1, None, None, None, None, None, None, None, None, 1, None, None, None, 1, 1, None]
-------------------------------------------


In [7]:


# create random pop
n_traces = 10
trace_length = 20

initial_population = [[random.choice(activities_name) for _ in range(trace_length)] for _ in range(n_traces)]
print(initial_population)
initial_encoded_pop = [encoder.encode(trace) for trace in initial_population]
features_range = Tools.calculate_feature_range(initial_encoded_pop, variable_boundaries)
lower_bounds = [x[0] for x in features_range]
upper_bounds = [x[1] for x in features_range]

print(initial_population)
print(initial_encoded_pop)

# instantiate problem objectives
mutation = MyMutation(feature_range=features_range)
crossover = TraceCrossover(variable_boundaries=variable_boundaries)
sampling = MySampling(initial_population=initial_encoded_pop)
pop_size = 1000
survival = MySurvival(n_children_survive=50)

# termination1 = DefaultMultiObjectiveTermination(
#     xtol=1e-8,
#     cvtol=1e-6,
#     ftol=0.0025,
#     period=30,
#     n_max_gen=1000,
#     n_max_evals=100000
# )
#
termination2 = DefaultSingleObjectiveTermination(
    xtol=1e-8,
    cvtol=1e-6,
    ftol=1e-6,
    period=20,
    n_max_gen=1000,
    n_max_evals=100000
)


termination = MyTermination(n_required=500)
# termination = get_termination("n_gen", 10)
# termination1 = termination
# termination2 = termination

[['Leucocytes', 'Release B', 'ER Triage', 'ER Registration', 'IV Liquid', 'Leucocytes', 'ER Triage', 'Release B', 'Leucocytes', 'Release B', 'Admission IC', 'IV Liquid', 'IV Antibiotics', 'IV Antibiotics', 'Release B', 'ER Sepsis Triage', 'ER Sepsis Triage', 'Return ER', 'Release B', 'CRP'], ['Leucocytes', 'CRP', 'Admission NC', 'ER Sepsis Triage', 'CRP', 'CRP', 'IV Antibiotics', 'Release B', 'IV Liquid', 'Release A', 'Admission IC', 'ER Registration', 'ER Registration', 'IV Liquid', 'ER Sepsis Triage', 'Release A', 'ER Sepsis Triage', 'Return ER', 'Release B', 'ER Triage'], ['Release B', 'Release B', 'ER Sepsis Triage', 'Admission IC', 'CRP', 'LacticAcid', 'Admission IC', 'Admission IC', 'Admission IC', 'Return ER', 'ER Registration', 'ER Sepsis Triage', 'Admission IC', 'Admission IC', 'ER Sepsis Triage', 'CRP', 'Release A', 'ER Sepsis Triage', 'ER Sepsis Triage', 'Release B'], ['LacticAcid', 'CRP', 'Admission NC', 'Release A', 'LacticAcid', 'ER Registration', 'CRP', 'Release A', 'Rel

In [8]:


problem = Problem_multi_ElementWise(
    trace_length=trace_length,
    encoder=encoder,
    d4py=declare,
    initial_population=initial_encoded_pop,
    xl=lower_bounds,xu=upper_bounds,
    event_log=event_log,
    dataframe=dataframe
)

callback = UpdatePopulationCallback(problem=problem)

algorithm = NSGA2(
    problem=problem,
    pop_size=pop_size,
    sampling=sampling,
    crossover=crossover,
    mutation=mutation,
    callback=callback,
    eliminate_duplicates=False,
)


result = minimize(problem, algorithm, termination=termination, seed=1, verbose=True)

G = np.array([individual.G for individual in result.pop])
traces = [individual.X.tolist() for individual in result.pop]

# Print the feasible traces
for i, (trace, g) in enumerate(zip(traces, G)):
    if g == 0:
        decoded_trace =  encoder.decode(trace)

        dataframe['concept:name'] = pd.DataFrame(decoded_trace)
        event_log.log = dataframe
        event_log.to_eventlog()

        basic_checker = MPDeclareAnalyzer(log=event_log, declare_model=declare, consider_vacuity=False)
        conf_check_res: MPDeclareResultsBrowser = basic_checker.run()
        print(f"Feasible Trace {i + 1}: {trace}")

        print("Solution Decoded:", encoder.decode(trace))
        print(conf_check_res.get_metric(trace_id=0, metric="num_violations"))
        print("-------------------------------------------")


n_gen  |  n_eval  | n_nds  |     cv_min    |     cv_avg    |      eps      |   indicator  
     1 |       10 |      1 |  0.2941176471 |  0.3705882353 |             - |             -
     2 |     1010 |      1 |  0.0588235294 |  0.3417058824 |             - |             -
     3 |     2010 |      1 |  0.0588235294 |  0.2703529412 |             - |             -
     4 |     3010 |      1 |  0.0588235294 |  0.2488823529 |             - |             -
     5 |     4010 |      1 |  0.0588235294 |  0.2239411765 |             - |             -
     6 |     5010 |      1 |  0.0588235294 |  0.2151764706 |             - |             -
     7 |     6010 |      1 |  0.0588235294 |  0.2036470588 |             - |             -
     8 |     7010 |      1 |  0.000000E+00 |  0.1842941176 |             - |             -
     9 |     8010 |      1 |  0.000000E+00 |  0.1596470588 |  0.000000E+00 |             f
    10 |     9010 |      1 |  0.000000E+00 |  0.1464117647 |  0.000000E+00 |             f

cv_min = smallest constraint violation

cv_avg = average constraint violation

eps = Epsilon Improvement, improvement of the pareto front from iter to iter


init pop = 10
population = 2000
iter = 20


avg of 28.5 sec with only satisfy score
avg of 29 sec with both satisfy score and violation score



In [9]:
problem = Problem_single_ElementWise_noConstraints(
    trace_length=trace_length,
    encoder=encoder,
    d4py=declare,
    initial_population=initial_encoded_pop,
    xl=lower_bounds,xu=upper_bounds,
    event_log=event_log,
    dataframe=dataframe
)

callback = UpdatePopCallback(problem=problem)

algorithm = GA(
    problem=problem,
    pop_size=pop_size,
    sampling=sampling,
    crossover=crossover,
    mutation=mutation,
    callback=callback,
    eliminate_duplicates=False,
)


result = minimize(problem, algorithm, termination=termination2, seed=1, verbose=True)

traces = [individual.X.tolist() for individual in result.pop]


print("Best Solution Encoded:", result.X)
# print("Best Solution Decoded:", [encoder.decode(solution) for solution in result.X])
print("Objective Values:", result.F)


n_gen  |  n_eval  |     f_avg     |     f_min    
     1 |       10 | -1.664000E+01 | -1.700000E+01
     2 |     1010 | -1.708920E+01 | -1.810000E+01
     3 |     2010 | -1.751810E+01 | -1.860000E+01
     4 |     3010 | -1.786790E+01 | -1.870000E+01
     5 |     4010 | -1.815020E+01 | -1.910000E+01
     6 |     5010 | -1.838340E+01 | -1.910000E+01
     7 |     6010 | -1.857500E+01 | -1.930000E+01
     8 |     7010 | -1.874510E+01 | -1.930000E+01
     9 |     8010 | -1.887730E+01 | -1.950000E+01
    10 |     9010 | -1.899310E+01 | -1.950000E+01
    11 |    10010 | -1.908620E+01 | -1.950000E+01
    12 |    11010 | -1.916170E+01 | -1.960000E+01
    13 |    12010 | -1.923050E+01 | -1.970000E+01
    14 |    13010 | -1.929620E+01 | -1.970000E+01
    15 |    14010 | -1.934910E+01 | -1.980000E+01
    16 |    15010 | -1.939450E+01 | -1.980000E+01
    17 |    16010 | -1.944430E+01 | -1.980000E+01
    18 |    17010 | -1.948030E+01 | -1.980000E+01
    19 |    18010 | -1.951540E+01 | -1.980000E+01


In [10]:



problem = Problem_single_ElementWise(
    trace_length=trace_length,
    encoder=encoder,
    d4py=declare,
    initial_population=initial_encoded_pop,
    xl=lower_bounds,xu=upper_bounds,
    event_log=event_log,
    dataframe=dataframe
)

callback = UpdatePopulationCallback(problem=problem, plot=0)

algorithm = GA(
    problem=problem,
    pop_size=pop_size,
    sampling=sampling,
    crossover=crossover,
    mutation=mutation,
    callback=callback,
    termination=termination,
    eliminate_duplicates=False,
)


result = minimize(problem, algorithm, termination=termination2, seed=1, verbose=True)

G = np.array([individual.G for individual in result.pop])
traces = [individual.X.tolist() for individual in result.pop]

# Print the feasible traces
for i, (trace, g) in enumerate(zip(traces, G)):
    if g == 0:
        print(f"Feasible Trace {i + 1}: {trace}")

print("Best Solution Encoded:", result.X)
# print("Best Solution Decoded:", [encoder.decode(solution) for solution in result.X])
print("Objective Values:", result.F)
print("Constraint Values:", result.G)


TypeError: UpdatePopulationCallback.__init__() got an unexpected keyword argument 'plot'

only few millisecond faster than the 2 obj algorithm

In [8]:



problem = MyProblem_Problem(
    trace_length=trace_length,
    encoder=encoder,
    d4py=declare,
    initial_population=initial_encoded_pop,
    xl=lower_bounds,xu=upper_bounds,
    event_log=event_log,
    dataframe=dataframe
)

callback = UpdatePopulationCallback(problem=problem, plot=0)


algorithm = NSGA2(
    problem=problem,
    pop_size=pop_size,
    sampling=sampling,
    crossover=crossover,
    mutation=mutation,
    callback=callback,
    termination=termination,
    eliminate_duplicates=False,
)

result = minimize(problem, algorithm, termination=termination1, seed=1, verbose=True)


print("Best Solution Encoded:", result.X)
print("Best Solution Decoded:", [encoder.decode(solution) for solution in result.X])
print("Objective Values:", result.F)
print("Constraint Values:", result.G)

n_gen  |  n_eval  | n_nds  |     cv_min    |     cv_avg    |      eps      |   indicator  
     1 |       10 |      1 |  0.3333333333 |  0.7666666667 |             - |             -
     2 |     2010 |      1 |  0.000000E+00 |  0.6755833333 |             - |             -
     3 |     4010 |      1 |  0.000000E+00 |  0.5208333333 |  0.4000000000 |         ideal
     4 |     6010 |      1 |  0.000000E+00 |  0.4137500000 |  0.000000E+00 |             f
     5 |     8010 |      2 |  0.000000E+00 |  0.3443333333 |  0.000000E+00 |             f
     6 |    10010 |      1 |  0.000000E+00 |  0.2790000000 |  0.2000000000 |         ideal
     7 |    12010 |      5 |  0.000000E+00 |  0.2414166667 |  0.1000000000 |         ideal
     8 |    14010 |      1 |  0.000000E+00 |  0.1887500000 |  0.4000000000 |         ideal
     9 |    16010 |      1 |  0.000000E+00 |  0.1365000000 |  0.1000000000 |         ideal
    10 |    18010 |      1 |  0.000000E+00 |  0.1208333333 |  0.000000E+00 |             f

Seems a little slower than with ElementalWise problem object

In [9]:



problem = MyProblem_Problem2(
    trace_length=trace_length,
    encoder=encoder,
    d4py=declare,
    initial_population=initial_encoded_pop,
    xl=lower_bounds,xu=upper_bounds,
    event_log=event_log,
    dataframe=dataframe
)

callback = UpdatePopulationCallback(problem=problem, plot=0)

algorithm = GA(
    problem=problem,
    pop_size=pop_size,
    sampling=sampling,
    crossover=crossover,
    mutation=mutation,
    callback=callback,
    eliminate_duplicates=False,
)

result = minimize(problem, algorithm, termination=termination2, seed=1, verbose=True)

print("Best Solution Encoded:", result.X)
# print("Best Solution Decoded:", [encoder.decode(solution) for solution in result.X])
print("Objective Values:", result.F)
print("Constraint Values:", result.G)


n_gen  |  n_eval  |     cv_min    |     cv_avg    |     f_avg     |     f_min    
     1 |       10 |  0.3333333333 |  0.7666666667 |             - |             -
     2 |     2010 |  0.1666666667 |  0.6750833333 |             - |             -
     3 |     4010 |  0.000000E+00 |  0.5258333333 | -1.790000E+01 | -1.790000E+01
     4 |     6010 |  0.000000E+00 |  0.4196666667 | -1.773333E+01 | -1.790000E+01
     5 |     8010 |  0.000000E+00 |  0.3510000000 | -1.774211E+01 | -1.820000E+01
     6 |    10010 |  0.000000E+00 |  0.2821666667 | -1.785918E+01 | -1.850000E+01
     7 |    12010 |  0.000000E+00 |  0.2451666667 | -1.788387E+01 | -1.860000E+01
     8 |    14010 |  0.000000E+00 |  0.1990000000 | -1.793148E+01 | -1.900000E+01
     9 |    16010 |  0.000000E+00 |  0.1422500000 | -1.797440E+01 | -1.900000E+01
    10 |    18010 |  0.000000E+00 |  0.1280833333 | -1.801793E+01 | -1.900000E+01
    11 |    20010 |  0.000000E+00 |  0.1108333333 | -1.807164E+01 | -1.900000E+01
    12 |    2201