In [1]:
from collections import defaultdict

In [2]:
def formatter(string):
    case, event = string.replace("\n","").split(",")
    return case, event

# returns a set with all the activities present in the log
# and a dictionary with each event's trace of events
# where the key of the dict is the caseId and the value
# is an array of events
def process_logs(filename):
    logs = defaultdict(list)
    events = set()
    with open(filename, "r") as logfile:
        for i in logfile.readlines()[1:]:
            case, event = formatter(i)
            logs[case].append(event)
            events.add(event)
    return events, logs
        

In [3]:
# returns a matrix filled with zeros
# with dimension equal to the event_set squared.
def zero_transition_matrix(event_set):
    event_list = list(event_set)
    event_list.sort()
    event_indexes = {event: index for index, event in enumerate(event_list)}
    event_count = len(event_list)
    matrix = [[0] * event_count for i in range(event_count)]
    return event_indexes, matrix

In [4]:
events, logs = process_logs("TDlog.csv")
def generate_succession_matrix(events, logs, window=1):
    event_indexes, transition_matrix = zero_transition_matrix(events)
    for log in logs.values():
        last_checkable_index = len(log) - window
        for index in range(0, last_checkable_index):
            predecessor = log[index]
            successor = log[index + window]
            predecessor_index = event_indexes[predecessor]
            successor_index = event_indexes[successor]
            transition_matrix[predecessor_index][successor_index] += 1
    return transition_matrix

succession_matrix = generate_succession_matrix(events, logs)
print(succession_matrix)

[[0, 511, 489, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 499, 511, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 261, 228, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 499, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 268, 0, 0, 243, 0], [0, 0, 0, 0, 0, 0, 0, 131, 130, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, 124, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 131, 130, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 130, 0, 131, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 261, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243, 0, 268, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 511, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 511], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243, 268, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 511, 0, 0, 0, 0, 0]]


In [5]:
def generate_dependancy_matrix(events, logs):
    succession_matrix = generate_succession_matrix(events, logs, 1)
    event_indexes, dependancy_matrix = zero_transition_matrix(events)
    dimension = len(succession_matrix)
    for row in range(dimension):
        for col in range(dimension):
            direct_succession = succession_matrix[row][col]
            inverse_succession = succession_matrix[col][row]
            if row == col:
                dependancy_matrix[row][col] = round((direct_succession / (direct_succession + 1)), 3)
            else:
                dependancy_matrix[row][col] = round((direct_succession - inverse_succession) / (direct_succession + inverse_succession + 1), 3)
    return dependancy_matrix
dependancy_matrix = generate_dependancy_matrix(events, logs)
print(dependancy_matrix)

[[0.0, 0.998, 0.998, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-0.998, 0.0, 0.0, 0.0, 0.998, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-0.998, 0.0, 0.0, 0.0, 0.0, 0.996, 0.996, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, -0.998, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.996, 0.0, 0.0, 0.996, 0.0], [0.0, 0.0, -0.996, 0.0, 0.0, 0.0, 0.0, 0.992, 0.992, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -0.996, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.99, 0.992, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, -0.992, 0.0, 0.0, 0.004, 0.992, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, -0.992, 0.0, -0.004, 0.0, 0.992, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.992, -0.992, 0.0, 0.0, 0.996, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.99, 0.0, 0.0, 0.0, 0.0, 0.99, 0.0, 0

In [6]:
for i in logs.values():
    print(i)

['A', 'C', 'F', 'I', 'H', 'J', 'L']
['A', 'B', 'D', 'B', 'D', 'B', 'D', 'B', 'D', 'B', 'E', 'P', 'M', 'N', 'O', 'Q', 'L']
['A', 'C', 'G', 'L']
['A', 'B', 'D', 'B', 'E', 'P', 'M', 'N', 'O', 'Q', 'L']
['A', 'C', 'F', 'I', 'H', 'J', 'L']
['A', 'C', 'F', 'I', 'H', 'J', 'L']
['A', 'B', 'E', 'M', 'P', 'N', 'O', 'Q', 'L']
['A', 'B', 'E', 'P', 'M', 'N', 'O', 'Q', 'L']
['A', 'B', 'D', 'B', 'D', 'B', 'D', 'B', 'E', 'M', 'P', 'N', 'O', 'Q', 'L']
['A', 'B', 'E', 'M', 'P', 'N', 'O', 'Q', 'L']
['A', 'B', 'E', 'P', 'M', 'N', 'O', 'Q', 'L']
['A', 'C', 'F', 'H', 'I', 'J', 'L']
['A', 'C', 'G', 'L']
['A', 'B', 'E', 'M', 'P', 'N', 'O', 'Q', 'L']
['A', 'B', 'E', 'P', 'M', 'N', 'O', 'Q', 'L']
['A', 'C', 'F', 'H', 'I', 'J', 'L']
['A', 'C', 'G', 'K', 'L']
['A', 'B', 'D', 'B', 'D', 'B', 'E', 'M', 'P', 'N', 'O', 'Q', 'L']
['A', 'B', 'E', 'M', 'P', 'N', 'O', 'Q', 'L']
['A', 'C', 'F', 'I', 'H', 'J', 'L']
['A', 'C', 'F', 'H', 'I', 'J', 'L']
['A', 'B', 'D', 'B', 'D', 'B', 'D', 'B', 'D', 'B', 'E', 'M', 'P', 'N', 'O'

In [7]:
def is_above_threshold(frecuency, dependancy, thresholds):
    return (frecuency > thresholds["frecuency"] and dependancy > thresholds["dependancy"])

In [8]:
def edges_above_threshold(edges, thresholds, succession_matrix, dependancy_matrix):
    conditions = {}
    for start, end in edges:
        frecuency = succession_matrix[start][end]
        dependancy = dependancy_matrix[start][end]
        conditions[(start, end)] = is_above_threshold(frecuency, dependancy, thresholds)
    return conditions

In [13]:
event_indexes, zero_matrix = zero_transition_matrix(events)

def find_xor_splits(event_indexes, succession_matrix, dependancy_matrix, thresholds):
    xor_nodes = defaultdict(list)
    for row in event_indexes.values():
        xor_edges = []
        for col in event_indexes.values():
            xor_edges.append([row, col])
        above_threshold = edges_above_threshold(xor_edges, thresholds, succession_matrix, dependancy_matrix)
        for edge, above in above_threshold.items():
            if above:
                xor_nodes[edge[0]].append(edge[1])
    xor_nodes = { k: v for k, v in xor_nodes.items() if len(v) > 1 }
    print("Potential XOR splits (format -> 'start: [ends]'): {}".format(xor_nodes))    
    xor_filtered_nodes = xor_nodes.copy()
    for start_node, successors in xor_nodes.items():
        edges = []
        conditions = defaultdict(list)
        others = [start_node] + successors        
        for successor in successors:
            for other in others:
                if successor == other:
                    continue
                edges.append([successor, other])
        above_threshold = edges_above_threshold(edges, thresholds, succession_matrix, dependancy_matrix)                
        conditions[start_node] = above_threshold
        if any(condition for condition in conditions[start_node].values()):
            xor_filtered_nodes.pop(start_node, None)
     
    return xor_filtered_nodes

# Setted based on observation
thresholds = {"frecuency": 120, "dependancy": 0}  
print("Thresholds setted at:\n\t* Frecuency: {}\n\t* Dependancy: {}".format(thresholds['frecuency'], thresholds['dependancy']))

xor_nodes = find_xor_splits(event_indexes, succession_matrix, dependancy_matrix, thresholds)

print("discovered XOR splits: {}".format(xor_nodes))

Thresholds setted at:
	* Frecuency: 120
	* Dependancy: 0
Potential XOR splits (format -> 'start: [ends]'): {0: [1, 2], 2: [5, 6], 4: [12, 15], 5: [7, 8], 7: [8, 9], 12: [13, 15]}
discovered XOR splits: {0: [1, 2], 2: [5, 6]}


In [11]:
event_indexes, zero_matrix = zero_transition_matrix(events)

def find_xor_joins(event_indexes, succession_matrix, dependancy_matrix, thresholds):
    xor_nodes = defaultdict(list)
    for current_node in event_indexes.values():
        xor_edges = []
        for other_node in event_indexes.values():
            xor_edges.append([other_node, current_node])
        above_threshold = edges_above_threshold(xor_edges, thresholds, succession_matrix, dependancy_matrix)
        for edge, above in above_threshold.items():
            if above:
                xor_nodes[current_node].append(edge[0])
    xor_nodes = { k: v for k, v in xor_nodes.items() if len(v) > 1 }
    print("Potential XOR joins (format -> 'end: [starts]'): {}".format(xor_nodes))
    xor_filtered_nodes = xor_nodes.copy()
    for end_node, predecessors in xor_nodes.items():
        edges = []
        conditions = defaultdict(list)
        others = [end_node] + predecessors
        for predecessor in predecessors:
            for other in others:
                if predecessor == other:
                    continue
                edges.append([other, predecessor])
        above_threshold = edges_above_threshold(edges, thresholds, succession_matrix, dependancy_matrix)                
        conditions[end_node] = above_threshold
        if any(condition for condition in conditions[end_node].values()):
            xor_filtered_nodes.pop(end_node, None)
     
    return xor_filtered_nodes

# Setted based on observation
thresholds = {"frecuency": 120, "dependancy": 0}  
print("Thresholds setted at:\n\t* Frecuency: {}\n\t* Dependancy: {}".format(thresholds['frecuency'], thresholds['dependancy']))

xor_nodes = find_xor_joins(event_indexes, succession_matrix, dependancy_matrix, thresholds)

print("discovered XOR joins: {}".format(xor_nodes))

Thresholds setted at:
	* Frecuency: 120
	* Dependancy: 0
Potential XOR joins (format -> 'end: [starts]'): {8: [5, 7], 9: [7, 8], 11: [6, 9, 16], 13: [12, 15], 15: [4, 12]}
discovered XOR joins: {11: [6, 9, 16]}
