# Knuth Morris Pratt pattern searching algorithm

In [2]:
# function for generating prefix instructions for kmp algorithm

def prefix(pattern):
    
    # we initialize the result
    result = [0 for i in range(len(pattern))]
    
    # and a variable for navigating on the string
    k = 0
    
    # for each letter(starting with the second one)...
    for q in range(1, len(pattern)):
        
        # ... untill we reach it's beginning or find the same letter...
        while k > 0 and pattern[k] != pattern[q]:
            # we go back to the best prefix state
            k = result[k - 1]
        
        # then if our new letter extends the prefix we increment k by 1
        if pattern[k] == pattern[q]:
            k += 1
        
        # we add k to the 
        result[q] = k
        
    # we get a list of instructions on how to travell back through 'states' to get the best prefix in case of mismatch
    return result

In [3]:
# kmp function that takes a pattern and text 
# it returns indexes at which this pattern starts in the text

def kmp(pattern, T):
    # transition instruction
    pi = prefix(pattern)
    
    # state
    q = 0
    
    # self explanatory
    result = []
    
    # for eaach letter...
    for i in range(len(T)):
        
        # ... untill we find a longest prefix that matches our new letter... 
        while q > 0 and T[i] != pattern[q]:
            # ... we change state so that our previous letters in the text are alligned with the longest prefix 
            # that is shorter than the current one(otherwise we wouldn't be in this loop)
            q = pi[q - 1]
        
        # if we succeeded in finding such prefix or if we didn't have to...
        if pattern[q] == T[i]:
            # we're good to increment the state(in other words progress in our pattern seeking process)
            q += 1
        # otherwise we're back at the state 0
            
        # of course if we've found the pattern we need to add it to the result
        if q == len(pattern):
            result.append(i + 1 - q)
            q = pi[q - 1]

    return result

In [4]:
kmp("abc","abacabcabdabadabc")

[4, 14]