In [37]:
from collections import defaultdict

In [38]:
def formatter(string):
    case, event = string.replace("\n","").split(",")
    return case, event

# returns a set with all the activities present in the log
# and a dictionary with each event's trace of events
# where the key of the dict is the caseId and the value
# is an array of events
def process_logs(filename):
    logs = defaultdict(list)
    events = set()
    with open(filename, "r") as logfile:
        for i in logfile.readlines()[1:]:
            case, event = formatter(i)
            logs[case].append(event)
            events.add(event)
    return events, logs
        

In [39]:
# returns a matrix filled with zeros
# with dimension equal to the event_set squared.
def zero_transition_matrix(event_set):
    event_list = list(event_set)
    event_list.sort()
    event_indexes = {event: index for index, event in enumerate(event_list)}
    event_count = len(event_list)
    matrix = [[0] * event_count for i in range(event_count)]
    return event_indexes, matrix

In [42]:
events, logs = process_logs("TDlog.csv")
def generate_succession_matrix(events, logs, window=1):
    event_indexes, transition_matrix = zero_transition_matrix(events)
    for log in logs.values():
        last_checkable_index = len(log) - window
        for index in range(0, last_checkable_index):
            predecessor = log[index]
            successor = log[index + window]
            predecessor_index = event_indexes[predecessor]
            successor_index = event_indexes[successor]
            transition_matrix[predecessor_index][successor_index] += 1
    return transition_matrix

succession_matrix = generate_succession_matrix(events, logs)
print(succession_matrix)

[[0, 511, 489, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 499, 511, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 261, 228, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 499, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 268, 0, 0, 243, 0], [0, 0, 0, 0, 0, 0, 0, 131, 130, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, 124, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 131, 130, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 130, 0, 131, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 261, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243, 0, 268, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 511, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 511], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243, 268, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 511, 0, 0, 0, 0, 0]]


In [43]:
def generate_dependancy_matrix(events, logs):
    succession_matrix = generate_succession_matrix(events, logs, 1)
    event_indexes, dependancy_matrix = zero_transition_matrix(events)
    dimension = len(succession_matrix)
    for row in range(dimension):
        for col in range(dimension):
            direct_succession = succession_matrix[row][col]
            inverse_succession = succession_matrix[col][row]
            if row == col:
                dependancy_matrix[row][col] = round((direct_succession / (direct_succession + 1)), 3)
            else:
                dependancy_matrix[row][col] = round((direct_succession - inverse_succession) / (direct_succession + inverse_succession + 1), 3)
    return dependancy_matrix
dependancy_matrix = generate_dependancy_matrix(events, logs)
print(dependancy_matrix)

[[0.0, 0.998, 0.998, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-0.998, 0.0, 0.0, 0.0, 0.998, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-0.998, 0.0, 0.0, 0.0, 0.0, 0.996, 0.996, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, -0.998, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.996, 0.0, 0.0, 0.996, 0.0], [0.0, 0.0, -0.996, 0.0, 0.0, 0.0, 0.0, 0.992, 0.992, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -0.996, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.99, 0.992, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, -0.992, 0.0, 0.0, 0.004, 0.992, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, -0.992, 0.0, -0.004, 0.0, 0.992, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.992, -0.992, 0.0, 0.0, 0.996, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.99, 0.0, 0.0, 0.0, 0.0, 0.99, 0.0, 0

In [14]:
for i in logs.values():
    print(i)

['A', 'C', 'F', 'I', 'H', 'J', 'L']
['A', 'B', 'D', 'B', 'D', 'B', 'D', 'B', 'D', 'B', 'E', 'P', 'M', 'N', 'O', 'Q', 'L']
['A', 'C', 'G', 'L']
['A', 'B', 'D', 'B', 'E', 'P', 'M', 'N', 'O', 'Q', 'L']
['A', 'C', 'F', 'I', 'H', 'J', 'L']
['A', 'C', 'F', 'I', 'H', 'J', 'L']
['A', 'B', 'E', 'M', 'P', 'N', 'O', 'Q', 'L']
['A', 'B', 'E', 'P', 'M', 'N', 'O', 'Q', 'L']
['A', 'B', 'D', 'B', 'D', 'B', 'D', 'B', 'E', 'M', 'P', 'N', 'O', 'Q', 'L']
['A', 'B', 'E', 'M', 'P', 'N', 'O', 'Q', 'L']
['A', 'B', 'E', 'P', 'M', 'N', 'O', 'Q', 'L']
['A', 'C', 'F', 'H', 'I', 'J', 'L']
['A', 'C', 'G', 'L']
['A', 'B', 'E', 'M', 'P', 'N', 'O', 'Q', 'L']
['A', 'B', 'E', 'P', 'M', 'N', 'O', 'Q', 'L']
['A', 'C', 'F', 'H', 'I', 'J', 'L']
['A', 'C', 'G', 'K', 'L']
['A', 'B', 'D', 'B', 'D', 'B', 'E', 'M', 'P', 'N', 'O', 'Q', 'L']
['A', 'B', 'E', 'M', 'P', 'N', 'O', 'Q', 'L']
['A', 'C', 'F', 'I', 'H', 'J', 'L']
['A', 'C', 'F', 'H', 'I', 'J', 'L']
['A', 'B', 'D', 'B', 'D', 'B', 'D', 'B', 'D', 'B', 'E', 'M', 'P', 'N', 'O'

In [44]:
def detect_xor_splits(succession_matrix, dependancy_matrix, **options):
    print("For each event, select the events that match both thresholds")
    print("Select the inverse dependancy relations from the matrix, and discard those that match the threshold")
    print("Then, look for direct succession between them. If there is, discard them.")

options = {"succession_threshold": 50,"dependancy_threshold": 0.6}    
detect_xor_splits(succession_matrix, dependancy_matrix, **options)

TypeError: detect_xor_splits() takes 2 positional arguments but 3 were given