## Implementacje algorytmów
Naiwny (podpunkt 1.1):

In [135]:
def naive(text, pattern):
    result = []

    for i in range(len(text) - len(pattern) + 1):
        for j in range(len(pattern)):
            if pattern[j] != text[i + j]:
                # NIEDOPASOWANIE
                break
        else:
            # DOPASOWANIE
            result.append(i)
    
    return result


Automat skończony (podpunkt 1.2):

In [136]:
def transition_table(pattern):
    alphabet = {e for e in pattern}
    result = []

    for q in range(0, len(pattern) + 1):
        result.append({})

        for a in alphabet:
            k = min(len(pattern) + 1, q + 2)

            while True:
                k = k - 1
                # x[:k] - prefiks o długości k
                # x[-k:] - sufiks o długości k
                if(k == 0 or pattern[:k] == (pattern[:q] + a)[-k:]):
                    break
                
            result[q][a] = k
    return result

def automat(text, pattern):
    result = []
    table = transition_table(pattern)

    length = len(pattern)
    q = 0

    for i, t in enumerate(text):
        if t in table[q]:
            q = table[q][t]
        
            if q == length:
                # DOPASOWANIE
                result.append(i - length + 1)
        
        else:
            q = 0
    
    return result


Knutha-Morrisa-Pratta (podpunkt 1.3):

In [137]:
def prefix_function(pattern):
    pi = [0]
    k = 0

    for q in range(1, len(pattern)):
        while(k > 0 and pattern[k] != pattern[q]):
            k = pi[k - 1]

        if(pattern[k] == pattern[q]):
            k = k + 1
            
        pi.append(k)

    return pi

def kmp(text, pattern):
    result = []
    pi = prefix_function(pattern)
    
    length = len(pattern)
    q = 0

    for i, t in enumerate(text):
        while(q > 0 and pattern[q] != text[i]):
            # NIEDOPASOWANIE
            q = pi[q-1]

        if(pattern[q] == text[i]):
            q = q + 1

        if q == length:
            # DOPASOWANIE
            result.append(i - length + 1)
            q = pi[q - 1]
    
    return result


## Otwieranie plików

In [138]:
text = open("1997_714.txt", "r").read()
wiki = open("passages-head.tsv").read()

## Pomiary

In [139]:
import time

Testy (podpunkt 2):

In [140]:
def measure(function):
    start = time.time()
    function()
    return time.time() - start

def measure_all(text, pattern):
    print("Naiwny zajmuje:\t\t", measure(lambda: naive(text, pattern)), "s.")
    print("Automat zajmuje:\t", measure(lambda: automat(text, pattern)), "s.")
    print("  W tym preprocessing:\t", measure(lambda: transition_table(pattern)), "s.")
    print("KMP zajmuje:\t\t", measure(lambda: kmp(text, pattern)), "s.")
    print("  W tym preprocessing:\t", measure(lambda: prefix_function(pattern)), "s.")

Znajdowanie wzorca "art" w ustawie (podpunkt 3):

In [141]:
print("Naiwny:")
print(naive(text, "art"))
print()
print("Automat:")
print(automat(text, "art"))
print()
print("KMP:")
print(kmp(text, "art"))

Naiwny:
[1156, 1505, 4692, 4734, 4879, 5082, 5148, 5949, 6039, 7266, 7511, 7781, 8044, 8299, 9104, 9959, 10022, 10224, 11122, 11207, 11618, 13194, 15284, 15358, 16092, 16261, 16406, 16547, 16616, 16840, 16856, 23637, 24061, 24152, 24586, 24683, 24780, 24931, 25530, 25689, 27001, 27288, 27479, 27542, 27592, 27857, 28373, 28558, 28766, 30964, 31021, 31096, 31362, 31811, 32609, 32968, 33053, 33268, 33595, 34651, 34737, 35511, 36155, 37143, 37543, 38451, 38595, 39056, 39210, 39436, 39568, 39980, 41152, 41829, 42028, 42198, 42371, 42504, 42718, 42896, 42941, 43447, 43555, 43787, 44590, 44653, 44953, 45010, 45293, 45401, 47319, 47422, 48785, 48820, 48906, 49052, 49259, 49316, 49488, 49559, 49915, 49979, 50102, 50160, 50702, 51050, 51179, 51966, 52071, 52272, 52552, 53008, 53032, 53211, 53788, 53931, 54078, 54137, 54770, 55075, 55279, 55465, 55807, 55991, 56827, 56911, 57164, 57549, 57800, 57932, 57989, 58280, 58378, 58874, 58966, 59395, 59523, 59949, 60296, 60549, 60794, 61262, 61770, 62463,

Pomiar czasu dla wzorca "art" w ustawie (podpunkt 4):

In [142]:
measure_all(text, "art")

Naiwny zajmuje:		 0.10247540473937988 s.
Automat zajmuje:	 0.02831578254699707 s.
  W tym preprocessing:	 2.86102294921875e-05 s.
KMP zajmuje:		 0.04338407516479492 s.
  W tym preprocessing:	 7.152557373046875e-06 s.


Pomiar czasu dla wzorca "Ukraina" we fragmencie Wikipedii (podpunkt 5):

In [None]:
measure_all(wiki, "Ukraina")

## Propozycje
Propozycja zarówno dla podpunktu 6 co 7.

In [None]:
t = "a" * 10000 + "b"
p = "a" * 100 + "b"

measure_all(t, p)