In [1]:
import re
import string
import time
from timeit import timeit

# pattern matching

## algorithms

### naive


In [2]:
def naive_patter_matching(text:string, pattern: string):
    result = []
    for s in range(len(text)-len(pattern)+1):    
        if pattern==text[s:s+len(pattern)]:
            result.append(s)
    return result

 
### finite automaton

In [17]:
def transition_table(pattern:string):
    result = []
    alphabet = set()
    for a in pattern:
        alphabet.add(a)
    for q in range(len(pattern) + 1):
        result.append({})
        for a in alphabet:
            k = min(len(pattern), q + 1)
            suffix = pattern[:q] + a
            while pattern[:k]!="" and pattern[:k] != suffix[q - k + 1:]:
                k -= 1
            result[q][a] = k
    return result

def finite_automaton_pattern_matching(text:string, pattern:string): 
    q = 0
    delta = transition_table(pattern)
    result = []
    for s in range(0, len(text)):
        if text[s] in delta[q]:
            q = delta[q][text[s]]
            if q == len(delta) - 1:
                result.append(s + 1 - q)
        else:
            q=0
    return result

### Knutha-Morrisa-Pratta

In [18]:
def prefix_function(pattern:string):
    pi = [0]
    k = 0
    for q in range(1, len(pattern)):
        while k > 0 and pattern[k] != pattern[q]:
            k = pi[k-1]
        if pattern[k] == pattern[q]:
            k = k + 1
        pi.append(k)
    return pi

def kmp_string_matching_pattern_matching(text, pattern):
    pi = prefix_function(pattern)
    q = 0
    result = []
    for i in range(0, len(text)):
        while q > 0 and pattern[q] != text[i]:
            q = pi[q-1]
        if pattern[q] == text[i]:
            q = q + 1
        if q == len(pattern):
            result.append(i + 1 - q)
            q = pi[q-1]
    return result

## execution time tests

In [19]:
with open('1997_714.txt', 'r') as file:
    data = file.read()
    start = time.time()
    naive_patter_matching(data, "Art")
    end = time.time()
    print(f"naive time elapsed: {end-start}")
    start = time.time()
    finite_automaton_pattern_matching(data, "Art")
    end = time.time()
    print(f"finite automaton time elapsed: {end-start}")
    start = time.time()
    kmp_string_matching_pattern_matching(data, "Art")
    end = time.time()
    print(f"Knutha-Morrisa-Pratta time elapsed: {end-start}")
          

naive time elapsed: 0.056523799896240234
finite automaton time elapsed: 0.017686128616333008
Knutha-Morrisa-Pratta time elapsed: 0.030426502227783203


## naive worst case

In [40]:
pattern = "aaaaaaaaaaaaaaaaaaaaaaaaa"*100
text = "What the fuck did you just fucking say about me, you little bitch? I'll have you know I graduated top of my class in the Navy Seals, and I've been involved in numerous secret raids on Al-Quaeda, and I have over 300 confirmed kills. I am trained in gorilla warfare and I'm the top sniper in the entire US armed forces. You are nothing to me but just another target. I will wipe you the fuck out with precision the likes of which has never been seen before on this Earth, mark my fucking words. You think you can get away with saying that shit to me over the Internet? Think again, fucker. As we speak I am contacting my secret network of spies across the USA and your IP is being traced right now so you better prepare for the storm, maggot. The storm that wipes out the pathetic little thing you call your life. You're fucking dead, kid. I can be anywhere, anytime, and I can kill you in over seven hundred ways, and that's just with my bare hands. Not only am I extensively trained in unarmed combat, but I have access to the entire arsenal of the United States Marine Corps and I will use it to its full extent to wipe your miserable ass off the face of the continent, you little shit. If only you could have known what unholy retribution your little 'clever' comment was about to bring down upon you, maybe you would have held your fucking tongue. But you couldn't, you didn't, and now you're paying the price, you goddamn idiot. I will shit fury all over you and you will drown in it. You're fucking dead, kiddo."*100
start = time.time()
naive_patter_matching(text, pattern)
end = time.time()
print(f"naive time elapsed: {end-start}")
start = time.time()
finite_automaton_pattern_matching(text, pattern)
end = time.time()
print(f"finite automaton time elapsed: {end-start}")
start = time.time()
kmp_string_matching_pattern_matching(text, pattern)
end = time.time()
print(f"Knutha-Morrisa-Pratta time elapsed: {end-start}")
          

naive time elapsed: 0.05313563346862793
finite automaton time elapsed: 0.01462554931640625
Knutha-Morrisa-Pratta time elapsed: 0.022936344146728516


## finite automaton worse than KMP

In [30]:
pattern = "qwertyuiopasdfghjklzxcvbnm"*10
start = time.time()
transition_table(pattern)
end = time.time()
print(f"transition_table time elapsed: {end-start}")

start = time.time()
prefix_function(pattern)
end = time.time()
print(f"prefix_function time elapsed: {end-start}")

transition_table time elapsed: 0.24985671043395996
prefix_function time elapsed: 9.059906005859375e-05
