In [27]:
import mne
import os
import glob
import numpy as np
import pandas as pd

# def add_stream_to_event_tags(events, sub):
# Get marks and streams
tags = get_tags(events)
marks, streams = get_marks_and_streams_from_log_file(sub)

# Find diff between marks and tags
diffs = diff(marks, tags)
indexes_to_drop_from_marks, indexes_to_drop_from_tags = get_drop_indexes(diffs)
marks = apply_diff(marks, indexes_to_drop_from_marks)
tags = apply_diff(tags, indexes_to_drop_from_tags)

# Check
if tags != marks:
    raise ValueError('Event tags do not match log file tags!')
else:
    print('Successfully matched marks and tags :-)')

# Now apply the diffs the list of stream sides
streams = apply_diff(streams, indexes_to_drop_from_marks)

# Now add streams to event tags
hier_tags = make_tags_hierarchical(streams, tags)

# Now make original events object match the new tags
events = apply_diff(events.tolist(), indexes_to_drop_from_tags)
events = np.array(events)

# Now add hierarchical tags to events object
hier_events = add_hierarchical_tags_to_events(events, hier_tags)

#     return hier_events

def get_drop_indexes(diffs):
    indexes_to_drop_from_tags = []
    indexes_to_drop_from_marks = []

    for i, diff in enumerate(diffs):
        change = diff[0]
        if change == 'addition': # tag is found in tags but not in marks
            indexes_to_drop_from_tags.append(i)
        elif change == 'removal':
            indexes_to_drop_from_marks.append(i)
    
    return indexes_to_drop_from_marks, indexes_to_drop_from_tags

def apply_diff(l, indexes):
    for index in sorted(indexes, reverse=True):
        del l[index]
    return l

def compute_lcs_len(text1, text2):
    """Computes a table of f(i, j) results."""
    n = len(text1)
    m = len(text2)

    # We store the results in a (n + 1) x (m + 1) matrix. The +1s are to
    # allocate space for the empty strings. Cell [i][j] will cache the
    # result of f(i, j).
    lcs = [[None for _ in range(m + 1)]
               for _ in range(n + 1)]

    # We then fill the matrix by going through all rows, using the fact
    # that each call only needs results from the previous (i - 1) or
    # same (i) row, and from the previous (j - 1) or same (j) column.
    for i in range(0, n + 1):
        for j in range(0, m + 1):
          # The remaining code is exactly the same recursion as before, but
          # we do not make recursive calls and instead use the results cached
          # in the matrix.
            if i == 0 or j == 0:
                lcs[i][j] = 0
            elif text1[i - 1] == text2[j - 1]:
                lcs[i][j] = 1 + lcs[i - 1][j - 1]
            else:
                lcs[i][j] = max(lcs[i - 1][j], lcs[i][j - 1])

    return lcs

def diff(text1, text2):
    """Computes the optimal diff of the two given inputs.

    The result is a list where all elements are Removals, Additions or
    Unchanged elements.
    """
    lcs = compute_lcs_len(text1, text2)
    results = []

    text1 = list(text1)
    text2 = list(text2)
    
    i = len(text1)
    j = len(text2)

  # We iterate until we reach the end of both texts.
    while i != 0 or j != 0:
        # If we reached the end of one of text1 (i == 0) or text2 (j == 0),
        # then we just need to print the remaining additions and removals.
        if i == 0:
            results.append(('addition', text2[j - 1]))
            j -= 1
        elif j == 0:
            results.append(('removal', text1[i - 1]))
            i -= 1
        # Otherwise there's still parts of text1 and text2 left. If the
        # currently considered parts are equal, then we found an unchanged
        # part which belongs to the longest common subsequence.
        elif text1[i - 1] == text2[j - 1]:
            results.append(('unchanged', text1[i - 1]))
            i -= 1
            j -= 1
        # In any other case, we go in the direction of the longest common
        # subsequence.
        elif lcs[i - 1][j] <= lcs[i][j - 1]:
            results.append(('addition', text2[j - 1]))
            j -= 1
        else:
            results.append(('removal', text1[i - 1]))
            i -= 1

    # Reverse results because we iterated over the texts from the end but
    # want the results to be in forward order.
    return list(reversed(results))

def get_tags(events):
    tags = []
    for i in range(1, len(events)):
        tags.append(events[i][2])
    return tags
    
def get_marks_and_streams_from_log_file(sub):
    # Get events from log files
    log_dir = '../data/logs'
    logs = pd.DataFrame()

    for fpath in list(glob.glob(f'{log_dir}/sub-{sub}_*.log')):
        print(fpath)

        # checking if it is a file
        if os.path.isfile(fpath):
            log = pd.read_csv(fpath)
            logs = pd.concat([logs, log])

    logs = logs.sort_values(by = ['block_num', 'seq_num', 'tone_num'])
    logs = logs.reset_index()
    marks = list(logs.mark)
    streams = logs.stream

    return marks, streams

def make_tags_hierarchical(streams, tags):

    # Change stream string value from 'r' and 'l' into 1 and 2
    stream_tag = streams.replace(['r', 'l'], [1, 2])
    stream_tag = list(stream_tag)

    # Concat stream tags with event tags
    hier_tags = []
    hier_tags.insert(0, 99999)
    for stream, tag in zip(stream_tag, tags):
        hier_tag = int(str(stream) + str(tag))
        hier_tags.append(hier_tag)

    return hier_tags

def add_hierarchical_tags_to_events(events, hier_tags):
    hier_events = []
    for i in range(0, len(events)):
        hier_event = list(events[i])
        hier_event[2] = hier_tags[i]
        hier_events.append(hier_event)
    return hier_events

sub = 24
eeg_fpath = '../data/raw/sub-24.vhdr'
raw = mne.io.read_raw(eeg_fpath)
events, event_ids = mne.events_from_annotations(raw)
# add_stream_to_event_tags(events, sub)

../data/logs/sub-24_blk-2.log
../data/logs/sub-24_blk-4.log
../data/logs/sub-24_blk-0.log
../data/logs/sub-24_blk-1.log
../data/logs/sub-24_blk-3.log
Successfully matched marks and tags :-)
Extracting parameters from ../data/raw/sub-24.vhdr...
Setting channel info structure...
Used Annotations descriptions: ['New Segment/', 'Stimulus/S 11', 'Stimulus/S 12', 'Stimulus/S 13', 'Stimulus/S 21', 'Stimulus/S 22', 'Stimulus/S 23', 'Stimulus/S 31', 'Stimulus/S 32', 'Stimulus/S 33']
../data/logs/sub-24_blk-2.log
../data/logs/sub-24_blk-4.log
../data/logs/sub-24_blk-0.log
../data/logs/sub-24_blk-1.log
../data/logs/sub-24_blk-3.log
Successfully matched marks and tags :-)


IndexError: list index out of range

In [26]:
import numpy as np
events_list = events.tolist()
np.array(events_list)
events

array([[       0,        0,    99999],
       [  420922,        0,       12],
       [  426687,        0,       12],
       ...,
       [17217970,        0,       13],
       [17223730,        0,       23],
       [17229495,        0,       23]])

In [4]:
sub = 9
eeg_fpath = '../data/raw/sub-9.vhdr'
raw = mne.io.read_raw(eeg_fpath)
events, event_ids = mne.events_from_annotations(raw)

# Get marks and streams
tags = get_tags(events)
marks, streams = get_marks_and_streams_from_log_file(sub)

# Find diff between marks and tags
diffs = diff(marks, tags)
indexes_to_drop_from_marks, indexes_to_drop_from_tags = get_drop_indexes(diffs)
marks = apply_diff(marks, indexes_to_drop_from_marks)
tags = apply_diff(tags, indexes_to_drop_from_tags)

# Check
if tags != marks:
    raise ValueError('Event tags do not match log file tags!')
else:
    print('Successfully matched marks and tags :-)')
    
# Now apply the diffs the list of stream sides
streams = apply_diff(streams, indexes_to_drop_from_marks)

# Now add streams to event tags
hier_tags = make_tags_hierarchical(streams, tags)

# Now add hierarchical tags to events object
hier_events = add_hierarchical_tags_to_events(events, hier_tags)

Extracting parameters from ../data/raw/sub-9.vhdr...
Setting channel info structure...
Used Annotations descriptions: ['New Segment/', 'Stimulus/S 11', 'Stimulus/S 12', 'Stimulus/S 13', 'Stimulus/S 21', 'Stimulus/S 22', 'Stimulus/S 23', 'Stimulus/S 31', 'Stimulus/S 32', 'Stimulus/S 33']
../data/logs/sub-9_blk-0.log
../data/logs/sub-9_blk-3.log
../data/logs/sub-9_blk-2.log
../data/logs/sub-9_blk-4.log
../data/logs/sub-9_blk-1.log
Successfully matched marks and tags :-)


In [3]:
import mne
import os
import glob
import pandas as pd

def get_drop_indexes(diffs):
    indexes_to_drop_from_tags = []
    indexes_to_drop_from_marks = []

    for i, diff in enumerate(diffs):
        change = diff[0]
        if change == 'addition': # tag is found in tags but not in marks
            indexes_to_drop_from_tags.append(i)
        elif change == 'removal':
            indexes_to_drop_from_marks.append(i)
    
    return(indexes_to_drop_from_marks, indexes_to_drop_from_tags)

def apply_diff(l, indexes):
    for index in sorted(indexes, reverse=True):
        del l[index]
    return(l)

def compute_lcs_len(text1, text2):
    """Computes a table of f(i, j) results."""
    n = len(text1)
    m = len(text2)

    # We store the results in a (n + 1) x (m + 1) matrix. The +1s are to
    # allocate space for the empty strings. Cell [i][j] will cache the
    # result of f(i, j).
    lcs = [[None for _ in range(m + 1)]
               for _ in range(n + 1)]

    # We then fill the matrix by going through all rows, using the fact
    # that each call only needs results from the previous (i - 1) or
    # same (i) row, and from the previous (j - 1) or same (j) column.
    for i in range(0, n + 1):
        for j in range(0, m + 1):
          # The remaining code is exactly the same recursion as before, but
          # we do not make recursive calls and instead use the results cached
          # in the matrix.
            if i == 0 or j == 0:
                lcs[i][j] = 0
            elif text1[i - 1] == text2[j - 1]:
                lcs[i][j] = 1 + lcs[i - 1][j - 1]
            else:
                lcs[i][j] = max(lcs[i - 1][j], lcs[i][j - 1])

    return lcs

def diff(text1, text2):
    """Computes the optimal diff of the two given inputs.

    The result is a list where all elements are Removals, Additions or
    Unchanged elements.
    """
    lcs = compute_lcs_len(text1, text2)
    results = []

    text1 = list(text1)
    text2 = list(text2)
    
    i = len(text1)
    j = len(text2)

  # We iterate until we reach the end of both texts.
    while i != 0 or j != 0:
        # If we reached the end of one of text1 (i == 0) or text2 (j == 0),
        # then we just need to print the remaining additions and removals.
        if i == 0:
            results.append(('addition', text2[j - 1]))
            j -= 1
        elif j == 0:
            results.append(('removal', text1[i - 1]))
            i -= 1
        # Otherwise there's still parts of text1 and text2 left. If the
        # currently considered parts are equal, then we found an unchanged
        # part which belongs to the longest common subsequence.
        elif text1[i - 1] == text2[j - 1]:
            results.append(('unchanged', text1[i - 1]))
            i -= 1
            j -= 1
        # In any other case, we go in the direction of the longest common
        # subsequence.
        elif lcs[i - 1][j] <= lcs[i][j - 1]:
            results.append(('addition', text2[j - 1]))
            j -= 1
        else:
            results.append(('removal', text1[i - 1]))
            i -= 1

    # Reverse results because we iterated over the texts from the end but
    # want the results to be in forward order.
    return list(reversed(results))

def get_tags(events):
    tags = []
    for i in range(1, len(events)):
        tags.append(events[i][2])
    return tags
    
def get_marks_and_streams_from_log_file(sub):
    # Get events from log files
    log_dir = '../data/logs'
    logs = pd.DataFrame()

    for fpath in list(glob.glob(f'{log_dir}/sub-{sub}_*.log')):
        print(fpath)

        # checking if it is a file
        if os.path.isfile(fpath):
            log = pd.read_csv(fpath)
            logs = pd.concat([logs, log])

    logs = logs.sort_values(by = ['block_num', 'seq_num', 'tone_num'])
    logs = logs.reset_index()
    marks = list(logs.mark)
    streams = logs.stream

    return(marks, streams)

def make_tags_hierarchical(streams, tags):

    # Change stream string value from 'r' and 'l' into 1 and 2
    stream_tag = streams.replace(['r', 'l'], [1, 2])
    stream_tag = list(stream_tag)

    # Concat stream tags with event tags
    hier_tags = []
    hier_tags.insert(0, 99999)
    for stream, tag in zip(stream_tag, tags):
        hier_tag = int(str(stream) + str(tag))
        hier_tags.append(hier_tag)

    return(hier_tags)

def add_hierarchical_tags_to_events(events, hier_tags):
    hier_events = []
    for i in range(0, len(events)):
        hier_event = list(events[i])
        hier_event[2] = hier_tags[i]
        hier_events.append(hier_event)
    return(hier_events)

In [42]:
#testing from terminal 
# from util.io.add_stream_to_event_tags import *
import mne
import os
import glob
import pandas as pd
        
def add_stream_to_event_tags(events, sub):

    # Extract event tags
    tags = []
    for i in range(1, len(events)):
        tags.append(events[i][2])
        
    # Get marks from log files
    marks, streams = get_marks_and_streams_from_log_file(sub)

    # Find the indexes that match event and log tags up
    window = 20
    tags_i, marks_i = get_index_of_match(tags, marks, window)

    # Trim marks to match event tags
    start_i = marks_i
    end_i = marks_i + len(tags)
    marks = marks[start_i:end_i]
    streams = streams[start_i:end_i]

    # Add stream to tag
    hier_tags = make_tags_hierarchical(streams, tags)

    # Add hierarchical tags to events object
    hier_events = add_hierarchical_tags_to_events(events, hier_tags)

    return(hier_events)

def get_tags(events):
    tags = []
    for i in range(1, len(events)):
        tags.append(events[i][2])
    return tags
    
def get_marks_and_streams_from_log_file(sub):
    # Get events from log files
    log_dir = '../data/logs'
    logs = pd.DataFrame()

    for fpath in list(glob.glob(f'{log_dir}/sub-{sub}_*.log')):
        print(fpath)

        # checking if it is a file
        if os.path.isfile(fpath):
            log = pd.read_csv(fpath)
            logs = pd.concat([logs, log])

    logs = logs.sort_values(by = ['block_num', 'seq_num', 'tone_num'])
    logs = logs.reset_index()
    marks = list(logs.mark)
    streams = list(logs.stream)

    return(marks, streams)

def check(tags, marks, tags_i, marks_i, score):
    if tags[tags_i] == marks[marks_i]:
        score += 1
        tags_i += 1
        marks_i += 1
    else:
        tags_i = tags_i - score
        marks_i += 1
        score = 0
    return(tags_i, marks_i, score)

def get_index_of_match(tags, marks, window):
    for tags_i in range(len(tags) - window):
        for marks_i in range(len(marks) - window):
            tags_set = tuple(tags[tags_i:tags_i+window])
            marks_set = tuple(marks[marks_i:marks_i+window])
            if tags_set == marks_set:
                print(f'Match found! tags_i: {tags_i}; marks_i: {marks_i}')
                print(f'tags_set: {tags_set}')
                print(f'marks_set: {marks_set}')
                found = True
                break
        if found:
            break

    if not found:
        raise ValueError('No match found!')
    if tuple(tags[tags_i:len(tags)]) != tuple(marks[marks_i:marks_i + len(tags)]): # length of marks should always >= length of tags
        raise ValueError('Event tags do not match log file tags!')
    return(tags_i, marks_i)

def make_tags_hierarchical(streams, tags):

    # Change stream string value from 'r' and 'l' into 1 and 2
    stream_tag = streams.replace(['r', 'l'], [1, 2])
    stream_tag = list(stream_tag)

    # Concat stream tags with event tags
    hier_tags = []
    hier_tags.insert(0, 99999)
    for stream, tag in zip(stream_tag, tags):
        hier_tag = int(str(stream) + str(tag))
        hier_tags.append(hier_tag)

    return(hier_tags)

def add_hierarchical_tags_to_events(events, hier_tags):
    hier_events = []
    for i in range(0, len(events)):
        hier_event = list(events[i])
        hier_event[2] = hier_tags[i]
        hier_events.append(hier_event)
    return(hier_events)

# sub = 9
# eeg_fpath = '../data/raw/sub-9.vhdr'
# raw = mne.io.read_raw(eeg_fpath)
# events, event_ids = mne.events_from_annotations(raw)
# events_new = add_stream_to_event_tags(events, sub)

In [2]:
tags = []
for i in range(1, len(events)):
    tags.append(events[i][2])

# Get marks from log files
marks, streams = get_marks_and_streams_from_log_file(sub)

# Find the indexes that match event and log tags up
window = 20
tags_i, marks_i = get_index_of_match(tags, marks, window)

# Trim marks to match event tags
start_i = marks_i
end_i = marks_i + len(tags)
marks = marks[start_i:end_i]
streams = streams[start_i:end_i]

# Add stream to tag
hier_tags = make_tags_hierarchical(streams, tags)

# Add hierarchical tags to events object
hier_events = add_hierarchical_tags_to_events(events, hier_tags)

../data/logs/sub-24_blk-2.log
../data/logs/sub-24_blk-4.log
../data/logs/sub-24_blk-0.log
../data/logs/sub-24_blk-1.log
../data/logs/sub-24_blk-3.log
Match found! tags_i: 0; marks_i: 0
tags_set: (12, 12, 12, 11, 11, 13, 23, 22, 21, 13, 31, 23, 21, 22, 31, 22, 11, 32, 12, 12)
marks_set: (12, 12, 12, 11, 11, 13, 23, 22, 21, 13, 31, 23, 21, 22, 31, 22, 11, 32, 12, 12)


ValueError: Event tags do not match log file tags!

In [4]:
print(marks)

0       12
1       12
2       12
3       11
4       11
        ..
1807    33
1808    13
1809    13
1810    23
1811    23
Name: mark, Length: 1812, dtype: int64


In [8]:
print(tags[-20:])
print(marks[-20:])

[21, 33, 32, 12, 23, 31, 22, 22, 22, 22, 23, 31, 33, 21, 32, 33, 13, 13, 23, 23]
1792    21
1793    33
1794    32
1795    12
1796    23
1797    31
1798    22
1799    22
1800    22
1801    22
1802    23
1803    31
1804    33
1805    21
1806    32
1807    33
1808    13
1809    13
1810    23
1811    23
Name: mark, dtype: int64


In [15]:
len(tags)

1835

In [14]:
# print(list(marks))
len(marks)

1812

In [26]:
def compute_lcs_len(text1, text2):
    """Computes a table of f(i, j) results."""
    n = len(text1)
    m = len(text2)

    # We store the results in a (n + 1) x (m + 1) matrix. The +1s are to
    # allocate space for the empty strings. Cell [i][j] will cache the
    # result of f(i, j).
    lcs = [[None for _ in range(m + 1)]
               for _ in range(n + 1)]

    # We then fill the matrix by going through all rows, using the fact
    # that each call only needs results from the previous (i - 1) or
    # same (i) row, and from the previous (j - 1) or same (j) column.
    for i in range(0, n + 1):
        for j in range(0, m + 1):
          # The remaining code is exactly the same recursion as before, but
          # we do not make recursive calls and instead use the results cached
          # in the matrix.
            if i == 0 or j == 0:
                lcs[i][j] = 0
            elif text1[i - 1] == text2[j - 1]:
                lcs[i][j] = 1 + lcs[i - 1][j - 1]
            else:
                lcs[i][j] = max(lcs[i - 1][j], lcs[i][j - 1])

    return lcs

def diff(text1, text2):
    """Computes the optimal diff of the two given inputs.

    The result is a list where all elements are Removals, Additions or
    Unchanged elements.
    """
    lcs = compute_lcs_len(text1, text2)
    results = []

    i = len(text1)
    j = len(text2)

  # We iterate until we reach the end of both texts.
    while i != 0 or j != 0:
        # If we reached the end of one of text1 (i == 0) or text2 (j == 0),
        # then we just need to print the remaining additions and removals.
        if i == 0:
            results.append(('addition', text2[j - 1]))
            j -= 1
        elif j == 0:
            results.append(('removal', text1[i - 1]))
            i -= 1
        # Otherwise there's still parts of text1 and text2 left. If the
        # currently considered parts are equal, then we found an unchanged
        # part which belongs to the longest common subsequence.
        elif text1[i - 1] == text2[j - 1]:
            results.append(('unchanged', text1[i - 1]))
            i -= 1
            j -= 1
        # In any other case, we go in the direction of the longest common
        # subsequence.
        elif lcs[i - 1][j] <= lcs[i][j - 1]:
            results.append(('addition', text2[j - 1]))
            j -= 1
        else:
            results.append(('removal', text1[i - 1]))
            i -= 1

    # Reverse results because we iterated over the texts from the end but
    # want the results to be in forward order.
    return list(reversed(results))

diffs = diff(list(marks), list(tags))

# If there is an addition (a marker that is in tags but not marks), drop those tags

In [39]:
indexes_to_drop = []
for i, tup in enumerate(diffs):
    if tup[0] == 'unchanged':
        continue
    elif tup[0] == 'addition':
        indexes_to_drop.append(i)

In [40]:
indexes_to_drop

[119,
 120,
 121,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142]

In [1]:
# from util.io.preprocessing import *
import mne
import os
import pandas as pd
import glob
import time

In [5]:
# Get events from raw data
sub = 9
eeg_fpath = '../data/raw/sub-9.vhdr'
raw = mne.io.read_raw(eeg_fpath)
events, event_ids = mne.events_from_annotations(raw)

# Extract tags from events
tags = []
for i in range(1, len(events)):
    tags.append(events[i][2])

Extracting parameters from ../data/raw/sub-9.vhdr...
Setting channel info structure...
Used Annotations descriptions: ['New Segment/', 'Stimulus/S 11', 'Stimulus/S 12', 'Stimulus/S 13', 'Stimulus/S 21', 'Stimulus/S 22', 'Stimulus/S 23', 'Stimulus/S 31', 'Stimulus/S 32', 'Stimulus/S 33']


In [9]:
# Get events from log files
log_dir = '../data/logs'
logs = pd.DataFrame()

for fpath in list(glob.glob(f'{log_dir}/sub-{sub}_*.log')):

    # checking if it is a file
    print(fpath)
    if os.path.isfile(fpath):
        log = pd.read_csv(fpath)
        logs = pd.concat([logs, log])

logs = logs.sort_values(by = ['block_num', 'seq_num', 'tone_num'])
logs = logs.reset_index()
marks = logs.mark
streams = logs.stream

../data/logs/sub-9_blk-4.log
../data/logs/sub-9_blk-2.log
../data/logs/sub-9_blk-3.log
../data/logs/sub-9_blk-1.log
../data/logs/sub-9_blk-0.log


In [6]:
# Iterate over tags in exponential time
t0 = time.time()

window = 20
for tags_i in range(len(tags) - window):
    for marks_i in range(len(marks) - window):
        tags_set = tuple(tags[tags_i:tags_i+window])
        marks_set = tuple(marks[marks_i:marks_i+window])
        if tags_set == marks_set:
            print(f'Match found! tags_i: {tags_i}; marks_i: {marks_i}')
            print(f'tags_set: {tags_set}')
            print(f'marks_set: {marks_set}')
            found = True
            break
    if found:
        break
        
t1 = time.time()

print(f"TIME: {t1-t0}")

Match found! tags_i: 0; marks_i: 50
tags_set: (31, 13, 12, 12, 33, 23, 32, 23, 33, 33, 33, 21, 31, 12, 33, 23, 13, 12, 13, 12)
marks_set: (31, 13, 12, 12, 33, 23, 32, 23, 33, 33, 33, 21, 31, 12, 33, 23, 13, 12, 13, 12)
TIME: 0.0058002471923828125


In [238]:
# Iterate over tags in linear time
t0 = time.time()

window = 20

def check(tags, marks, tags_i, marks_i, score):
    if tags[tags_i] == marks[marks_i]:
        score += 1
        tags_i += 1
        marks_i += 1
    else:
        tags_i = tags_i - score
        marks_i += 1
        score = 0
    return(tags_i, marks_i, score)

score = 0
marks_i = 0
for tags_i in range(len(tags) - window):
    while score < window:
        tags_i, marks_i, score = check(tags, marks, tags_i, marks_i, score)
    if score == window:
        print(f'Match found! tags_i: {tags_i}; marks_i: {marks_i}')
        print(f'tags_set: {tags_set}')
        print(f'marks_set: {marks_set}')
        break
        
t1 = time.time()

print(f"TIME: {t1-t0}")

Match found! tags_i: 20; marks_i: 70
tags_set: (31, 13, 12, 12, 33, 23, 32, 23, 33, 33, 33, 21, 31, 12, 33, 23, 13, 12, 13, 12)
marks_set: (31, 13, 12, 12, 33, 23, 32, 23, 33, 33, 33, 21, 31, 12, 33, 23, 13, 12, 13, 12)
TIME: 0.003219127655029297


In [None]:
# Trim marks to match event tags
start_i = marks_i
end_i = marks_i + len(tags)
marks = marks[start_i:end_i]
streams = streams[start_i:end_i]

# Aggressive check
if not found:
    raise ValueError('No match found!')
if tuple(tags[tags_i:len(tags)]) != tuple(marks): # length of marks will always >= length of tags
    raise ValueError('Event tags do not match log file tags!')

In [257]:
# Make event tags hierarchical

# Change stream string value from 'r' and 'l' into 1 and 2
stream_tag = streams.replace(['r', 'l'], [1, 2])
stream_tag = list(stream_tag)

# Concat stream tags with event tags
hier_tags = []
hier_tags.insert(0, 99999)
for stream, tag in zip(stream_tag, tags):
    hier_tag = int(str(stream) + str(tag))
    hier_tags.append(hier_tag)

In [264]:
# Return new events object
hier_events = []
for i in range(0, len(events)):
    hier_event = list(events[i])
    hier_event[2] = hier_tags[i]
    hier_events.append(hier_event)