In [1]:
def compute_repetition_factor(pattern):
    m = len(pattern)
    if m == 0:
        return []
    if m == 1:
        return [1]
    chars = list(pattern)
    # Compute the prefix function π(i)
    pi = [0] * m
    k = 0
    for i in range(1, m):
        while k > 0 and chars[k] != chars[i]:
            k = pi[k-1]
        if chars[k] == chars[i]:
            k = k + 1
        pi[i] = k
    # Compute the repetition factor for each character i
    rep_factors = []
    for i in range(m):
        # Length of the longest proper prefix of pattern[:i+1] that is also a suffix of a prefix of pattern[:i+1]
        len_prefix = pi[i]
        # Length of the repeating substring
        len_repeated = i + 1 - len_prefix
        if len_repeated == 0 or (i+1) % len_repeated != 0:
            # Pattern[:i+1] is not a repeated string
            rep_factors.append(1)
        else:
            # Pattern[:i+1] is a repeated string with repetition factor (i+1) // len_repeated
            rep_factors.append((i+1) // len_repeated)
    return rep_factors


# Question 2

In [2]:
P = "abababa"
rep_factors = compute_repetition_factor(P)
for i, rf in enumerate(rep_factors):
    print(f"ρ(P{i+1}) = {rf}: {P[:i+1][:len(P[:i+1])//rf]}")


ρ(P1) = 1: a
ρ(P2) = 1: ab
ρ(P3) = 1: aba
ρ(P4) = 2: ab
ρ(P5) = 1: ababa
ρ(P6) = 3: ab
ρ(P7) = 1: abababa


In [3]:
import random
import time

def generate_random_pattern(m):
    return ''.join(chr(random.randint(97, 122)) for _ in range(m))

def run_experiment(num_trials, pattern_length):
    max_repetition_factors = []
    for i in range(num_trials):
        pattern = generate_random_pattern(pattern_length)
        repetition_factors = compute_repetition_factor(pattern)
        max_repetition_factors.append(max(repetition_factors))
    return max_repetition_factors

start_time = time.time()
num_trials = 100
pattern_length = 1000
max_repetition_factors = run_experiment(num_trials, pattern_length)
end_time = time.time()

print(f"Maximum repetitor values for {num_trials} random patterns of length {pattern_length}:")
print(max_repetition_factors)
print("Maximum repetitor value:",max(max_repetition_factors))
print(f"Average time per pattern: {(end_time - start_time) / num_trials} seconds")
max_rf = max(max_repetition_factors)


Maximum repetitor values for 100 random patterns of length 1000:
[1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
Maximum repetitor value: 2
Average time per pattern: 0.007232446670532227 seconds


Conclusion: After running the experiment for a 100 trials of random patterns of the size 1000 characters, I found that the maximum repetitor factor ρ*(P) is usually very small, ranging from 1 to 3(or)4. This suggests that most random strings of length 1000 have very little repetition.

# Question 3


The time complexity of the compute_repetition_factor function, which calculates the repetition factors of a given pattern, is O(m), where m is the length of the pattern. The function consists of two parts, each with a time complexity of O(m). Therefore, the total time complexity of the function is O(m). However, the section of code that prints the repetition factors and repeated substrings has a time complexity of O(m^2) due to nested loops. As a result, the overall time complexity of the code, including the printing section, is O(m^2).

# Question 4

If we randomly select a binary string of length m, then the maximum repetitor factor ρ(P) is likely to be constant. By using a union bound argument, we can calculate an upper limit on the expected value of ρ(P), which is e^(-2^(-k)m) for any k value. By summing over all possible k values, we can determine that the upper bound of the expected value of ρ(P) is Σk e^(-2^(-k)m), which is a constant regardless of m. Thus, the expected value of ρ*(P) is also a constant.