In [None]:
import random
import string
import time


def generate_strings(sizes, alphabet_sizes):
    alphabet_base = string.ascii_letters + string.digits
    results = []
    for size in sizes:
        for alpha_size in alphabet_sizes:
            alphabet = random.sample(alphabet_base, alpha_size)
            random_string = ''.join(random.choices(alphabet, k=size))
            results.append((size, alpha_size, random_string))
    return results


def generate_patterns(pattern_lengths, alphabet_size, test_string):
    patterns = []
    for pattern_len in pattern_lengths:
        pattern = ''.join(random.choices(test_string, k=pattern_len))
        patterns.append(pattern)
    return patterns

alphabet_sizes = [4, 15, 20]
string_sizes = [100000, 500000, 1000000]
pattern_lengths = [4, 16, 32]

strings = generate_strings(string_sizes, alphabet_sizes)
patterns = generate_patterns(pattern_lengths, 20, "bananabanaba")  

In [2]:
# Naive string matching algorithm
def naive_pattern_matching(text, pattern):
    m = len(pattern)
    n = len(text)
    positions = []
    for i in range(n - m + 1):
        if text[i:i+m] == pattern:
            positions.append(i)
    return positions


In [3]:
# KMP pattern matching algorithm
def build_kmp_table(pattern):
    m = len(pattern)
    kmp_table = [0] * m
    j = 0
    for i in range(1, m):
        while j > 0 and pattern[i] != pattern[j]:
            j = kmp_table[j - 1]
        if pattern[i] == pattern[j]:
            j += 1
            kmp_table[i] = j
    return kmp_table

def kmp_pattern_matching(text, pattern):
    m = len(pattern)
    n = len(text)
    kmp_table = build_kmp_table(pattern)
    j = 0
    positions = []
    for i in range(n):
        while j > 0 and text[i] != pattern[j]:
            j = kmp_table[j - 1]
        if text[i] == pattern[j]:
            j += 1
            if j == m:
                positions.append(i - m + 1)
                j = kmp_table[j - 1]
    return positions


In [None]:

def rabin_karp_pattern_matching(text, pattern):
    d = 256  
    q = 101 
    m = len(pattern)
    n = len(text)
    pattern_hash = 0
    text_hash = 0
    h = 1
    positions = []

    for i in range(m - 1):
        h = (h * d) % q
    for i in range(m):
        pattern_hash = (d * pattern_hash + ord(pattern[i])) % q
        text_hash = (d * text_hash + ord(text[i])) % q

    # Slide the pattern over text one by one
    for i in range(n - m + 1):
        if pattern_hash == text_hash:
            if text[i:i+m] == pattern:
                positions.append(i)
        if i < n - m:
            text_hash = (d * (text_hash - ord(text[i]) * h) + ord(text[i + m])) % q
            if text_hash < 0:
                text_hash = text_hash + q
    return positions


In [None]:
# Function to test different algorithms
def test_algorithms(strings, patterns):
    results = []
    
    for string_info in strings:
        size, alpha_size, test_string = string_info
        for pattern in patterns:
            start_time = time.time()
            naive_result = naive_pattern_matching(test_string, pattern)
            naive_time = (time.time() - start_time) * 1000  # in milliseconds

            start_time = time.time()
            kmp_result = kmp_pattern_matching(test_string, pattern)
            kmp_time = (time.time() - start_time) * 1000  # in milliseconds

            start_time = time.time()
            rk_result = rabin_karp_pattern_matching(test_string, pattern)
            rk_time = (time.time() - start_time) * 1000  # in milliseconds

            results.append((size, alpha_size, len(pattern), naive_time, kmp_time, rk_time))
    
    return results


results = test_algorithms(strings, patterns)

for result in results:
    print(f"String Size: {result[0]}, Alphabet Size: {result[1]}, Pattern Length: {result[2]}, "
          f"Naive Time: {result[3]:.4f}ms, KMP Time: {result[4]:.4f}ms, Rabin-Karp Time: {result[5]:.4f}ms")


String Size: 100000, Alphabet Size: 4, Pattern Length: 4, Naive Time: 1.0004ms, KMP Time: 7.2184ms, Rabin-Karp Time: 17.5207ms
String Size: 100000, Alphabet Size: 4, Pattern Length: 16, Naive Time: 4.5159ms, KMP Time: 3.9999ms, Rabin-Karp Time: 14.2725ms
String Size: 100000, Alphabet Size: 4, Pattern Length: 32, Naive Time: 4.9167ms, KMP Time: 3.5081ms, Rabin-Karp Time: 13.6645ms
String Size: 100000, Alphabet Size: 15, Pattern Length: 4, Naive Time: 5.5938ms, KMP Time: 3.0901ms, Rabin-Karp Time: 13.6061ms
String Size: 100000, Alphabet Size: 15, Pattern Length: 16, Naive Time: 5.8091ms, KMP Time: 2.2697ms, Rabin-Karp Time: 15.0876ms
String Size: 100000, Alphabet Size: 15, Pattern Length: 32, Naive Time: 5.3864ms, KMP Time: 3.2089ms, Rabin-Karp Time: 14.6520ms
String Size: 100000, Alphabet Size: 20, Pattern Length: 4, Naive Time: 4.9996ms, KMP Time: 4.2880ms, Rabin-Karp Time: 12.8932ms
String Size: 100000, Alphabet Size: 20, Pattern Length: 16, Naive Time: 5.7034ms, KMP Time: 3.9761ms, R