In [10]:
import re
import string
import time
from timeit import timeit

# pattern matching

## algorithms

### naive


In [11]:
def naive_patter_matching(text:string, pattern: string):
    result = []
    for s in range(len(text)-len(pattern)+1):    
        if pattern==text[s:s+len(pattern)]:
            result.append(s)
    return result

 
### finite automaton

In [12]:
def transition_table(pattern:string):
    result = []
    alphabet = set()
    for a in pattern:
        alphabet.add(a)
    for q in range(0, len(pattern) + 1):
        result.append({})
        for a in alphabet:
            k = min(len(pattern) + 1, q + 2)
            found = False
            while not found:
                k = k - 1
                suffix = pattern[:q] + a
                prefix = pattern[:k]
                if prefix == '':
                    found = True
                for i in range(q+1):
                    if suffix[i:] == prefix:
                        found = True
                        break
            result[q][a] = k
    return result

def finite_automaton_pattern_matching(text:string, pattern:string): 
    q = 0
    delta = transition_table(pattern)
    result = []
    for s in range(0, len(text)):
        if text[s] in delta[q]:
            q = delta[q][text[s]]
            if q == len(delta) - 1:
                result.append(s + 1 - q)
        else:
            q=0
    return result

### Knutha-Morrisa-Pratta

In [13]:
def prefix_function(pattern:string):
    pi = [0]
    k = 0
    for q in range(1, len(pattern)):
        while k > 0 and pattern[k] != pattern[q]:
            k = pi[k-1]
        if pattern[k] == pattern[q]:
            k = k + 1
        pi.append(k)
    return pi

def kmp_string_matching_pattern_matching(text, pattern):
    pi = prefix_function(pattern)
    q = 0
    result = []
    for i in range(0, len(text)):
        while q > 0 and pattern[q] != text[i]:
            q = pi[q-1]
        if pattern[q] == text[i]:
            q = q + 1
        if q == len(pattern):
            result.append(i + 1 - q)
            q = pi[q-1]
    return result

## execution time tests

In [23]:
with open('1997_714.txt', 'r') as file:
    data = file.read()
    start = time.time()
    naive_patter_matching(data, "Art")
    end = time.time()
    print(f"naive time elapsed: {end-start}")
    start = time.time()
    finite_automaton_pattern_matching(data, "Art")
    end = time.time()
    print(f"finite automaton time elapsed: {end-start}")
    start = time.time()
    kmp_string_matching_pattern_matching(data, "Art")
    end = time.time()
    print(f"Knutha-Morrisa-Pratta time elapsed: {end-start}")
          

naive time elapsed: 0.12645864486694336
finite automaton time elapsed: 0.0299224853515625
Knutha-Morrisa-Pratta time elapsed: 0.06503605842590332


## naive worst case

In [30]:
pattern = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
text = "aaaaaaaaaa"
start = time.time()
naive_patter_matching(text, pattern)
end = time.time()
print(f"naive time elapsed: {end-start}")
start = time.time()
finite_automaton_pattern_matching(text, pattern)
end = time.time()
print(f"finite automaton time elapsed: {end-start}")
start = time.time()
kmp_string_matching_pattern_matching(text, pattern)
end = time.time()
print(f"Knutha-Morrisa-Pratta time elapsed: {end-start}")
          

naive time elapsed: 0.0001239776611328125
finite automaton time elapsed: 0.0036072731018066406
Knutha-Morrisa-Pratta time elapsed: 0.00015306472778320312


## finite automaton worse than KMP

In [31]:
pattern = "qwertyuiopasdfghjklzxcvbnm"
start = time.time()
transition_table(pattern)
end = time.time()
print(f"transition_table time elapsed: {end-start}")

start = time.time()
prefix_function(pattern)
end = time.time()
print(f"prefix_function time elapsed: {end-start}")

transition_table time elapsed: 0.03694772720336914
prefix_function time elapsed: 0.00012183189392089844
