In [101]:
# setup
data_path = "../data/"
traces_path = "5_ode.csv"

import pandas as pd
df = pd.read_csv(data_path+traces_path)
print(df.head(5))
# print(df['New'])

   case ID resource     activity name   resource+activity     Sitter      New  \
0        1        A        greeting()        A|greeting()   A|hello*    Hello   
1        1        U    question(data)    U|question(data)  U|request    Query   
2        1        A    list(keywords)    A|list(keywords)    A|offer  Request   
3        1        U     set(keywords)     U|set(keywords)  U|request    Query   
4        1        A  prompt(keywords)  A|prompt(keywords)    A|offer  Request   

     New.1  
0    Hello  
1    Query  
2  Request  
3    Query  
4  Request  


In [102]:
# collect dialogues
case = 0
dialogues = []
for index, utterance in df.iterrows():
    if utterance['case ID'] != case:
        dialogues.append('')
        case = utterance['case ID']
    elif utterance['New'] != 'Hello':
        dialogues[-1] += utterance['New'][0]
print(len(dialogues))
print(dialogues)

26
['QRQRQRQRQAF', 'QRFAQRQRQAF', 'QRQRQAF', 'AQRQRQRQAF', 'RQRQRQAQRQRF', 'RQRQAFRFRQRQAQRQRFRFRQAFAFA', 'QRQAQRQRFRF', 'QRQRQAQAFRFAQRFRF', 'RQAQRQAFRQAQRQA', 'QRQRQRQRQRFRQRFRFRFRQRQRQRF', 'RFRFRQRFRQAFRFRQRFRFRQRQRQRQRQRQRQA', 'QAFRF', 'QRQRQRQRQRFRFRFRFRQRQRFAQRFRF', 'QRQAQAFAFAFRFAFA', 'RQRFRQRFRFA', 'QRQRQAFRF', 'AQAAF', 'QRFRFRFRQRFRFRFRQRFRQAFRQRQAFA', 'QRQAQRFAFA', 'QRQRQAQAF', 'RQAQAF', 'RQAQRQAFRFRFRFAF', 'QRQRQRFRQAF', 'RQRQAFRQAQAFAQRFRQRFAFAFA', 'QAQAQAFRFAF', 'RFAFA']


In [118]:
# 1st transformation
def find_loops(dialogues):
    # mine loops (linear), e.g. QRQ, i.e. s[0] == s[2] -> s[0:1] is a loop
    loops = []
    s = ''
    for d in 'o'.join(dialogues):
        if d == 'o':
            # reset pattern
            s = ''
            continue
        s += d
        if len(s) == 3:
            # check if loop
            if s[0] == s[2]:
                loop = s[:2]
                if loop not in loops:
                    loops.append(loop)
            # slide over
            s = s[1:]
    return loops

loops = find_loops(dialogues)
print(loops)

['QR', 'RQ', 'QA', 'FR', 'RF', 'AF', 'FA', 'AQ']


In [133]:
# group loops (linear)
import string
chars = string.ascii_lowercase
def group_loops(loops, loops_alphabet):
    for loop in loops:
        # normalise loop label
        loop_label = ''.join(sorted(loop))
        if loop_label not in loops_alphabet:
            loops_alphabet[loop_label] = chars[len(loops_alphabet)]
    return loops_alphabet

loops_alphabet = group_loops(loops, {})
print(loops_alphabet)

{'QR': 'a', 'AQ': 'b', 'FR': 'c', 'AF': 'd'}


In [135]:
# replace all loops with loop ids
def transform_log(dialogues, loops_alphabet, loops):
    trace = ''
    s = ''
    last_loop = None
    for d in 'o'.join(dialogues):
        if last_loop and d in last_loop:
            continue
        last_loop = None
        s += d
        if s in loops:
            loop_label = ''.join(sorted(s))
            loop_id = str(loops_alphabet[loop_label])
            # do not repeat loop symbols
            if trace == '' or trace[-1] != loop_id:
                trace += loop_id
                last_loop = loop_label
            s = ''
        elif len(s) == 2:
            trace += s[0]
            # slide over
            s = s[1:]
    trace += s
    return trace.split('o')
        
dialogues_2 = transform_log(dialogues, loops_alphabet, loops)
print(dialogues_2)

['ad', 'adad', 'ad', 'bad', 'abaF', 'adcabacbd', 'abac', 'abcbc', 'abadabaA', 'acacaF', 'cacbcacaA', 'bc', 'acadac', 'abdcd', 'acacA', 'adc', 'bF', 'acacacbcad', 'abcd', 'abF', 'abF', 'abadcd', 'acbF', 'adabdacad', 'bcd', 'cd']


In [136]:
# 2nd transformation
loops_2 = find_loops(dialogues_2)
print(loops_2)
loops_alphabet_2 = group_loops(loops_2, loops_alphabet)
print(loops_alphabet_2)
dialogues_3 = transform_log(dialogues_2, loops_alphabet_2, loops_2)
print(dialogues_3)

['ad', 'da', 'ab', 'bc', 'cb', 'ac', 'ca', 'dc']
{'QR': 'a', 'AQ': 'b', 'FR': 'c', 'AF': 'd', 'ad': 'e', 'ab': 'f', 'bc': 'g', 'ac': 'h', 'cd': 'i'}
['e', 'e', 'e', 'be', 'fF', 'ehbhbd', 'fc', 'fg', 'febaA', 'hF', 'hghA', 'g', 'hec', 'fi', 'hA', 'ec', 'bF', 'hge', 'fcd', 'fF', 'fF', 'fi', 'hbF', 'ebehd', 'gd', 'cd']


In [141]:
# 3rd transformation
loops_3 = find_loops(dialogues_3)
print(loops_3)
loops_alphabet_3 = group_loops(loops_3, loops_alphabet)
print(loops_alphabet_3)
dialogues_4 = transform_log(dialogues_3, loops_alphabet_3, loops_3)
print(dialogues_4)

['hb', 'bh', 'hg', 'eb']
{'QR': 'a', 'AQ': 'b', 'FR': 'c', 'AF': 'd', 'ad': 'e', 'ab': 'f', 'bc': 'g', 'ac': 'h', 'cd': 'i', 'bh': 'j', 'gh': 'k', 'be': 'l'}
['e', 'e', 'e', 'be', 'fF', 'ejd', 'fc', 'fg', 'flaA', 'hF', 'kA', 'g', 'hec', 'fi', 'hA', 'ec', 'bF', 'ke', 'fcd', 'fF', 'fF', 'fi', 'jF', 'lhd', 'gd', 'cd']


In [144]:
# 4th transformation
loops_4 = find_loops(dialogues_4)
print(loops_4)
# termination

[]


In [146]:
# hierarchical PM
print(dialogues)
print(dialogues_2)
print(dialogues_3)
print(dialogues_4)
print(loops_alphabet)

['QRQRQRQRQAF', 'QRFAQRQRQAF', 'QRQRQAF', 'AQRQRQRQAF', 'RQRQRQAQRQRF', 'RQRQAFRFRQRQAQRQRFRFRQAFAFA', 'QRQAQRQRFRF', 'QRQRQAQAFRFAQRFRF', 'RQAQRQAFRQAQRQA', 'QRQRQRQRQRFRQRFRFRFRQRQRQRF', 'RFRFRQRFRQAFRFRQRFRFRQRQRQRQRQRQRQA', 'QAFRF', 'QRQRQRQRQRFRFRFRFRQRQRFAQRFRF', 'QRQAQAFAFAFRFAFA', 'RQRFRQRFRFA', 'QRQRQAFRF', 'AQAAF', 'QRFRFRFRQRFRFRFRQRFRQAFRQRQAFA', 'QRQAQRFAFA', 'QRQRQAQAF', 'RQAQAF', 'RQAQRQAFRFRFRFAF', 'QRQRQRFRQAF', 'RQRQAFRQAQAFAQRFRQRFAFAFA', 'QAQAQAFRFAF', 'RFAFA']
['ad', 'adad', 'ad', 'bad', 'abaF', 'adcabacbd', 'abac', 'abcbc', 'abadabaA', 'acacaF', 'cacbcacaA', 'bc', 'acadac', 'abdcd', 'acacA', 'adc', 'bF', 'acacacbcad', 'abcd', 'abF', 'abF', 'abadcd', 'acbF', 'adabdacad', 'bcd', 'cd']
['e', 'e', 'e', 'be', 'fF', 'ehbhbd', 'fc', 'fg', 'febaA', 'hF', 'hghA', 'g', 'hec', 'fi', 'hA', 'ec', 'bF', 'hge', 'fcd', 'fF', 'fF', 'fi', 'hbF', 'ebehd', 'gd', 'cd']
['e', 'e', 'e', 'be', 'fF', 'ejd', 'fc', 'fg', 'flaA', 'hF', 'kA', 'g', 'hec', 'fi', 'hA', 'ec', 'bF', 'ke', 'fcd', '

In [148]:
# recursive call

import string
chars = string.ascii_lowercase

def mine_simple_loops(dialogues, loops_alphabet={}): 
    # mine loops (linear), e.g. QRQ, i.e. s[0] == s[2] -> s[0:1] is a loop
    loops = []
    s = ''
    for d in 'o'.join(dialogues):
        if d == 'o':
            # reset pattern
            s = ''
            continue
        s += d
        if len(s) == 3:
            # check if loop
            if s[0] == s[2]:
                loop = s[:2]
                if loop not in loops:
                    loops.append(loop)
            # slide over
            s = s[1:]
    if not loops:
        return None, loops_alphabet

    # group loops (linear)
    for loop in loops:
        # normalise loop label
        loop_label = ''.join(sorted(loop))
        if loop_label not in loops_alphabet:
            loops_alphabet[loop_label] = chars[len(loops_alphabet)]
            
    # transform log by replacing all loops with loop ids
    trace = ''
    s = ''
    last_loop = None
    for d in 'o'.join(dialogues):
        if last_loop and d in last_loop:
            continue
        last_loop = None
        s += d
        if s in loops:
            loop_label = ''.join(sorted(s))
            loop_id = str(loops_alphabet[loop_label])
            # do not repeat loop symbols
            if trace == '' or trace[-1] != loop_id:
                trace += loop_id
                last_loop = loop_label
            s = ''
        elif len(s) == 2:
            trace += s[0]
            # slide over
            s = s[1:]
    trace += s
    return trace.split('o'), loops_alphabet


def build_hpm(dialogues):
    loops_alphabet = {}
    hpm = []
    while dialogues:
        hpm.append(dialogues)
        dialogues, loops_alphabet = mine_simple_loops(dialogues, loops_alphabet)
    return hpm, loops_alphabet


hpm, loops_alphabet = build_hpm(dialogues)
for pm in hpm:
    print(pm)
print(loops_alphabet)

['QRQRQRQRQAF', 'QRFAQRQRQAF', 'QRQRQAF', 'AQRQRQRQAF', 'RQRQRQAQRQRF', 'RQRQAFRFRQRQAQRQRFRFRQAFAFA', 'QRQAQRQRFRF', 'QRQRQAQAFRFAQRFRF', 'RQAQRQAFRQAQRQA', 'QRQRQRQRQRFRQRFRFRFRQRQRQRF', 'RFRFRQRFRQAFRFRQRFRFRQRQRQRQRQRQRQA', 'QAFRF', 'QRQRQRQRQRFRFRFRFRQRQRFAQRFRF', 'QRQAQAFAFAFRFAFA', 'RQRFRQRFRFA', 'QRQRQAFRF', 'AQAAF', 'QRFRFRFRQRFRFRFRQRFRQAFRQRQAFA', 'QRQAQRFAFA', 'QRQRQAQAF', 'RQAQAF', 'RQAQRQAFRFRFRFAF', 'QRQRQRFRQAF', 'RQRQAFRQAQAFAQRFRQRFAFAFA', 'QAQAQAFRFAF', 'RFAFA']
['ad', 'adad', 'ad', 'bad', 'abaF', 'adcabacbd', 'abac', 'abcbc', 'abadabaA', 'acacaF', 'cacbcacaA', 'bc', 'acadac', 'abdcd', 'acacA', 'adc', 'bF', 'acacacbcad', 'abcd', 'abF', 'abF', 'abadcd', 'acbF', 'adabdacad', 'bcd', 'cd']
['e', 'e', 'e', 'be', 'fF', 'ehbhbd', 'fc', 'fg', 'febaA', 'hF', 'hghA', 'g', 'hec', 'fi', 'hA', 'ec', 'bF', 'hge', 'fcd', 'fF', 'fF', 'fi', 'hbF', 'ebehd', 'gd', 'cd']
['e', 'e', 'e', 'be', 'fF', 'ejd', 'fc', 'fg', 'flaA', 'hF', 'kA', 'g', 'hec', 'fi', 'hA', 'ec', 'bF', 'ke', 'fcd', '

In [None]:
# unwind: decode each trace