# Finite Automata pattern searching algorithm

In [2]:
# function for generating delta function for finite automata pattern searching algorithm

def get_delta(pattern):
    # for each state we want to create a dictionary that will tell us how each encountered letter changes the state
    result = [{} for _ in range(len(pattern)+1)]
    
    # we'll also need a set of all letters appearing in the pattern
    letters = set()
    
    # let's get those letters
    for l in pattern:
        if l not in letters:
            letters.add(l)
            
    # now for each state...
    for q in range(len(pattern) + 1):
        
        # ... we need to process each letter from our set...
        for l in letters:
        
            # ... by iterating backwards through the processed part of the pattern... 
            k = min(len(pattern) + 1, q + 1)
            while k > 0:
                
                # ... until we find a prefix that ends in the same way as the string defined by our processed state change...
                if pattern[:k] == (pattern[:q] + l)[-k:]:
                    break
                k-=1
            
            # ... and our new state for this letter is the one corresponding to the said prefix
            result[q][l] = k
    
    return result

In [3]:
# finite automata algorithm function that takes a pattern and text 
# it returns indices at which this pattern starts in the text

def finite_automata(pattern, text):
    # we get our delta function...
    delta = get_delta(pattern)
    
    # ... and length of the pattern
    length = len(pattern)
    
    # we define our state
    q = 0
    
    # and initialise result set
    result = []

    # for each letter in the text
    for i in range(len(text)):
        
        # only if it's a valid state change for current state
        if text[i] in delta[q]:
            
            # we change that state
            q = delta[q][text[i]]
            
            # and if we managed to find the pattern string we add it at the end of the resulting list
            if q == length:
                result.append(i + 1 - q)
        
        # otherwise we reset the state
        else:
            q = 0

    return result

In [4]:
finite_automata("abc", "abacabcadabc")

[4, 9]