In [24]:
import random
import networkx as nx
import matplotlib.pyplot as plt

import pandas as pd

import sys
sys.path.append("../../")
from src.models.synthetic_logs import *

In [25]:
CLIQUE_THRESHOLD=0

## Functions

In [26]:
def get_successor_pairs( this_trace ):
    pairs = []
    for idx in range(0, len(this_trace)-1):
        partial_subtrace = this_trace[idx:]

        # For every s_i in s_0...s_i...partial_subtrace
        s_i = partial_subtrace.pop(0)
        # Find first K+1 such s_i == s_(k+1)
        try:
            k_plus_1 = partial_subtrace.index(s_i)
        except:
            k_plus_1 = len(partial_subtrace)
        # This is the subtrace T, the maximal that not contains s0
        T=partial_subtrace[:k_plus_1]

        # Collect all (s_i, s_k) with i < k < k+1:
        # UPDATE 2020-01-16: discard cases ABBBBBBB and condider just AB
        unique_pairs = []
        for s_k in T:
            e = (s_i, s_k)
            unique_pairs.append(e)
        pairs += list(set(unique_pairs))
    return pairs

In [27]:
def get_successor_pairs_by_freq( traces, sensitivity=-1 ):
    pairs_by_freq = {}

    for trace in traces:
        for pair in get_successor_pairs(trace):
            if pair in pairs_by_freq.keys():
                pairs_by_freq[pair] = pairs_by_freq[pair] + 1
            else:
                pairs_by_freq[pair] = 1
    return pairs_by_freq

In [28]:
def cluster_same_freq(pairs_dic, threshold=0):
    freq = list(set(pairs_dic.values()))
    
    groups = {}
    for pair in pairs_dic.keys():
        f  = pairs_dic[pair]
        if f in groups.keys():
            groups[f].append( pair )
        else:
            groups[f] = [ pair ]
            
    return groups

In [29]:
# Renamed from get_serial_paths
def get_cliques_from_trace( data, minfreq=0, minsymbols=0, merge_freqs=False, debug=False):
    def print_debug(s):
        if debug:
            print(s)
    
    if len(data) == 0:
        return []
    
    # Case: sequence of pairs
    elif type(data[0]) == type((1,1)):
        pairs = data
        paths = []

        G = nx.DiGraph()
        G.add_edges_from( pairs )

        # Search cliques
        G_prime=nx.to_undirected(G) 
        for V in nx.algorithms.clique.find_cliques(G_prime):
            print_debug("found this clique: %s" % ( V ))

            if len(V) > CLIQUE_THRESHOLD:

                # Create complete graph from clique
                G_complete = G.copy()
                for node in set(G_complete.nodes).difference( set(V) ):
                    G_complete.remove_node(node)

                # Order nodes by outer degree
                nodes = sorted( G_complete.out_degree() , key=lambda p: p[1], reverse=True)
                
                # Strict checking of outer_degree
                i=len(nodes)
                put = True
                for a, outdeg in nodes:
                    i -= 1
                    if outdeg != i:
                        put = False
                if put:
                    paths.append ( [ a for a,b in nodes ] )
        return paths

    # Case: set of traces
    elif type(data[0]) == type([]):
        
        # This is the working one
        cluster = cluster_same_freq( get_successor_pairs_by_freq(data) )
        paths = {}
        
        # Added on 2020-01-16:
        # When working witho lower freq clusters, combine the info with those in higher freq
        previous_cluster = []
        for f in sorted(cluster.keys(), reverse=True):
            
            print_debug("Working in freq=%d" % f)
            if f >= minfreq:
                p = []
                for X in get_cliques_from_trace( cluster[f] + previous_cluster, debug=debug ):
                    if len(X) >= minsymbols:
                        p.append(X)

                # Remove this line to consider pairs just fromn this freq
                if merge_freqs:
                    previous_cluster += cluster[f]
                if len(p) > 0:
                    paths[f] = p
        return paths

## Basic shapes

### Single path

In [30]:
single_path_trace = [
    list("ABCDE"),
    list("ABCDE"),
]
get_cliques_from_trace( single_path_trace, minfreq=0, minsymbols=0, debug=True )

Working in freq=2
found this clique: ['A', 'B', 'C', 'E', 'D']


{2: [['A', 'B', 'C', 'D', 'E']]}

## Loop

In [31]:
loop_trace = [
    list("ABAB"),
]
get_cliques_from_trace( loop_trace )

{2: [['A', 'B']], 1: [['B', 'A']]}

In [32]:
loop_trace = [
    list("ABABAB"),
]
get_cliques_from_trace( loop_trace )

{3: [['A', 'B']], 2: [['B', 'A']]}

In [33]:
loop_trace = [
    list("ABABAB"),
    list("ABABAB"),
]
get_cliques_from_trace( loop_trace )

{6: [['A', 'B']], 4: [['B', 'A']]}

In [34]:
loop_trace = [
    list("ABCDABCDABCD"),
]
get_cliques_from_trace( loop_trace )

{3: [['A', 'B', 'C', 'D']], 2: [['D', 'C', 'B', 'A']]}

## Tree (if/then/else)

In [35]:
tree_trace = [
    list("ABm"),
    list("ABp"),
    list("ABs"),
]
get_cliques_from_trace( tree_trace, merge_freqs=False )

{3: [['A', 'B']],
 1: [['B', 's'], ['B', 'p'], ['B', 'm'], ['A', 's'], ['A', 'p'], ['A', 'm']]}

In [36]:
tree_trace = [
    list("ABCmno"),
    list("ABCpqr"),
    list("ABCstu"),
]
get_cliques_from_trace( tree_trace, merge_freqs=False  )

{3: [['A', 'B', 'C']],
 1: [['B', 's', 't', 'u'],
  ['B', 'p', 'q', 'r'],
  ['B', 'm', 'n', 'o'],
  ['C', 's', 't', 'u'],
  ['C', 'p', 'q', 'r'],
  ['C', 'm', 'n', 'o'],
  ['A', 's', 't', 'u'],
  ['A', 'p', 'q', 'r'],
  ['A', 'm', 'n', 'o']]}

In [37]:
tree_trace = [
    list("ABmn15"),
    list("ABmn26"),
    list("ABpq15"),
    list("ABpq26"),
    list("ABst15"),
    list("ABst26"),
]
get_cliques_from_trace( tree_trace, merge_freqs=True  )

{6: [['A', 'B']],
 3: [['A', 'B', '2', '6'], ['A', 'B', '1', '5']],
 2: [['A', 'B', '2', '6'],
  ['A', 'B', 'p', 'q'],
  ['A', 'B', '1', '5'],
  ['A', 'B', 's', 't'],
  ['A', 'B', 'm', 'n']],
 1: [['A', 'B', 'p', 'q', '2', '6'],
  ['A', 'B', 's', 't', '2', '6'],
  ['A', 'B', 'm', 'n', '2', '6'],
  ['A', 'B', 'p', 'q', '1', '5'],
  ['A', 'B', 's', 't', '1', '5'],
  ['A', 'B', 'm', 'n', '1', '5']]}

In [38]:
get_cliques_from_trace( tree_trace, merge_freqs=False )

{6: [['A', 'B']],
 3: [['B', '2', '6'], ['B', '1', '5'], ['A', '2', '6'], ['A', '1', '5']],
 2: [['B', 'p', 'q'],
  ['B', 's', 't'],
  ['B', 'm', 'n'],
  ['A', 'p', 'q'],
  ['A', 's', 't'],
  ['A', 'm', 'n']],
 1: [['p', '2'],
  ['q', '2'],
  ['s', '2'],
  ['t', '2'],
  ['m', '2'],
  ['n', '2'],
  ['p', '6'],
  ['q', '6'],
  ['s', '6'],
  ['t', '6'],
  ['m', '6'],
  ['n', '6'],
  ['p', '5'],
  ['q', '5'],
  ['s', '5'],
  ['t', '5'],
  ['m', '5'],
  ['n', '5'],
  ['p', '1'],
  ['q', '1'],
  ['s', '1'],
  ['t', '1'],
  ['m', '1'],
  ['n', '1']]}

## Joins

In [39]:
join_trace = [
    list("mnoABC"),
    list("pqrABC"),
    list("stuABC"),
]
get_cliques_from_trace( join_trace)

{3: [['A', 'B', 'C']],
 1: [['s', 't', 'u', 'B'],
  ['p', 'q', 'r', 'B'],
  ['m', 'n', 'o', 'B'],
  ['s', 't', 'u', 'C'],
  ['p', 'q', 'r', 'C'],
  ['m', 'n', 'o', 'C'],
  ['s', 't', 'u', 'A'],
  ['p', 'q', 'r', 'A'],
  ['m', 'n', 'o', 'A']]}

In [40]:
get_cliques_from_trace( join_trace, merge_freqs=True  )

{3: [['A', 'B', 'C']],
 1: [['s', 't', 'u', 'A', 'B', 'C'],
  ['p', 'q', 'r', 'A', 'B', 'C'],
  ['m', 'n', 'o', 'A', 'B', 'C']]}

In [41]:
get_cliques_from_trace( join_trace, merge_freqs=False )

{3: [['A', 'B', 'C']],
 1: [['s', 't', 'u', 'B'],
  ['p', 'q', 'r', 'B'],
  ['m', 'n', 'o', 'B'],
  ['s', 't', 'u', 'C'],
  ['p', 'q', 'r', 'C'],
  ['m', 'n', 'o', 'C'],
  ['s', 't', 'u', 'A'],
  ['p', 'q', 'r', 'A'],
  ['m', 'n', 'o', 'A']]}

In [42]:
join_trace = [
    list("15mnAB"),
    list("26mnAB"),
    list("15pqAB"),
    list("26pqAB"),
    list("15stAB"),
    list("26stAB"),
]
get_cliques_from_trace( join_trace, merge_freqs=True )

{6: [['A', 'B']],
 3: [['2', '6', 'A', 'B'], ['1', '5', 'A', 'B']],
 2: [['2', '6', 'A', 'B'],
  ['p', 'q', 'A', 'B'],
  ['1', '5', 'A', 'B'],
  ['s', 't', 'A', 'B'],
  ['m', 'n', 'A', 'B']],
 1: [['2', '6', 'p', 'q', 'A', 'B'],
  ['2', '6', 's', 't', 'A', 'B'],
  ['2', '6', 'm', 'n', 'A', 'B'],
  ['1', '5', 'p', 'q', 'A', 'B'],
  ['1', '5', 's', 't', 'A', 'B'],
  ['1', '5', 'm', 'n', 'A', 'B']]}

In [43]:
get_cliques_from_trace( join_trace, merge_freqs=False )

{6: [['A', 'B']],
 3: [['2', '6', 'B'], ['1', '5', 'B'], ['2', '6', 'A'], ['1', '5', 'A']],
 2: [['p', 'q', 'B'],
  ['s', 't', 'B'],
  ['m', 'n', 'B'],
  ['p', 'q', 'A'],
  ['s', 't', 'A'],
  ['m', 'n', 'A']],
 1: [['2', 'p'],
  ['2', 'q'],
  ['2', 's'],
  ['2', 't'],
  ['2', 'n'],
  ['2', 'm'],
  ['6', 'p'],
  ['6', 'q'],
  ['6', 's'],
  ['6', 't'],
  ['6', 'n'],
  ['6', 'm'],
  ['5', 'p'],
  ['5', 'q'],
  ['5', 's'],
  ['5', 't'],
  ['5', 'n'],
  ['5', 'm'],
  ['1', 'p'],
  ['1', 'q'],
  ['1', 's'],
  ['1', 't'],
  ['1', 'm'],
  ['1', 'n']]}

## Loop inside

In [44]:
loop_inside = [
#     list("ABFG"),
    list("ABcdecdeFGH"),
    list("ABcdecdecdeFGH"),
    list("ABcdeFGH"),
]
get_cliques_from_trace( loop_inside, merge_freqs=False )

{6: [['c', 'd', 'e']], 3: [['A', 'B', 'e', 'd', 'c', 'F', 'G', 'H']]}

In [45]:
get_cliques_from_trace( loop_inside, merge_freqs=True )

{6: [['c', 'd', 'e']]}