In [17]:
from typing import List, Tuple
import random

In [18]:
RANDOM_SEED = 73685

In [19]:
CONSONANTS = ['f', 'k', 'm', 'n', 'p', 's', 't']
VOWELS = ['a', 'e', 'i', 'o', 'u']

In [20]:
NUMBER_OF_REPETITIONS = 4
WORDS_PER_GROUP = 4
REDUNDANT_WORDS_PER_GROUP = 1 # Each fifth word should be chosen such that it can be ignored later

In [21]:
def generate_unique_words() -> List[str]:
    words: List[str] = []
    for first_consonant in CONSONANTS:
        for vowel in VOWELS:
            for second_consonant in CONSONANTS:
                words.append(f'{first_consonant}{vowel}{second_consonant}')
    return words

In [22]:
def sort_randomly(words: List[str]) -> List[str]:
    random.seed(RANDOM_SEED)
    random.shuffle(words)
    return words

In [23]:
def generate_all_words(repetitions: int, words_fn, sort_fn) -> List[str]:
    all_words: List[str] = []
    for i in range(repetitions):
        words = words_fn()
        words = sort_fn(words)
        all_words.extend(words)
    return all_words

In [24]:
all_words = generate_all_words(NUMBER_OF_REPETITIONS, generate_unique_words, sort_randomly)
all_words

['sof',
 'nuk',
 'fon',
 'mim',
 'pom',
 'kom',
 'mep',
 'kip',
 'fot',
 'paf',
 'kaf',
 'tok',
 'fis',
 'pam',
 'kak',
 'nef',
 'kek',
 'sos',
 'maf',
 'pan',
 'fup',
 'set',
 'nit',
 'fit',
 'kik',
 'fak',
 'kep',
 'tas',
 'mas',
 'mop',
 'mun',
 'sot',
 'kut',
 'nus',
 'tup',
 'nas',
 'sif',
 'saf',
 'nos',
 'fip',
 'pup',
 'sap',
 'pus',
 'nop',
 'sop',
 'tum',
 'mat',
 'kem',
 'pif',
 'mek',
 'tik',
 'mof',
 'pat',
 'sik',
 'muf',
 'kus',
 'tuk',
 'fus',
 'kes',
 'pep',
 'sum',
 'mik',
 'tet',
 'met',
 'nen',
 'sek',
 'tot',
 'sep',
 'faf',
 'fas',
 'nek',
 'tam',
 'sim',
 'nes',
 'sup',
 'tus',
 'puk',
 'sun',
 'fap',
 'kun',
 'kok',
 'ken',
 'ket',
 'fef',
 'fop',
 'som',
 'tuf',
 'muk',
 'kat',
 'nak',
 'sis',
 'tim',
 'pip',
 'tom',
 'pok',
 'nam',
 'mip',
 'nan',
 'pun',
 'tos',
 'mef',
 'mes',
 'sin',
 'men',
 'sak',
 'sen',
 'nuf',
 'tun',
 'kot',
 'pet',
 'tut',
 'tak',
 'mit',
 'non',
 'mut',
 'map',
 'tip',
 'pit',
 'mem',
 'fin',
 'pos',
 'kim',
 'pik',
 'sok',
 'kis',


In [25]:
len(all_words)

980

In [26]:
def split_into_groups(words: List[str], words_per_group: int) -> List[List[str]]:
    groups: List[List[str]] = []
    for i in range(0, len(words), words_per_group):
        groups.append(words[i:i+words_per_group])
    return groups

In [27]:
groups_of_words = split_into_groups(all_words, WORDS_PER_GROUP)
groups_of_words

[['sof', 'nuk', 'fon', 'mim'],
 ['pom', 'kom', 'mep', 'kip'],
 ['fot', 'paf', 'kaf', 'tok'],
 ['fis', 'pam', 'kak', 'nef'],
 ['kek', 'sos', 'maf', 'pan'],
 ['fup', 'set', 'nit', 'fit'],
 ['kik', 'fak', 'kep', 'tas'],
 ['mas', 'mop', 'mun', 'sot'],
 ['kut', 'nus', 'tup', 'nas'],
 ['sif', 'saf', 'nos', 'fip'],
 ['pup', 'sap', 'pus', 'nop'],
 ['sop', 'tum', 'mat', 'kem'],
 ['pif', 'mek', 'tik', 'mof'],
 ['pat', 'sik', 'muf', 'kus'],
 ['tuk', 'fus', 'kes', 'pep'],
 ['sum', 'mik', 'tet', 'met'],
 ['nen', 'sek', 'tot', 'sep'],
 ['faf', 'fas', 'nek', 'tam'],
 ['sim', 'nes', 'sup', 'tus'],
 ['puk', 'sun', 'fap', 'kun'],
 ['kok', 'ken', 'ket', 'fef'],
 ['fop', 'som', 'tuf', 'muk'],
 ['kat', 'nak', 'sis', 'tim'],
 ['pip', 'tom', 'pok', 'nam'],
 ['mip', 'nan', 'pun', 'tos'],
 ['mef', 'mes', 'sin', 'men'],
 ['sak', 'sen', 'nuf', 'tun'],
 ['kot', 'pet', 'tut', 'tak'],
 ['mit', 'non', 'mut', 'map'],
 ['tip', 'pit', 'mem', 'fin'],
 ['pos', 'kim', 'pik', 'sok'],
 ['kis', 'naf', 'sit', 'nat'],
 ['fep',

In [28]:
def add_redundant_word_to_groups(groups: List[List[str]], words_fn, words_per_group, redundant_words_per_group: int) -> List[List[str]]:
    redundant_words = words_fn()
    random.seed(RANDOM_SEED)
    for i in range(0, len(groups) - 1):
        for j in range(redundant_words_per_group):
            candidate = random.choice(redundant_words)
            while candidate in groups[:words_per_group]:
                candidate = random.choice(redundant_words)
            if len(groups[i]) > words_per_group + j:
                groups[i][words_per_group+j] = candidate
            else:
                groups[i].append(candidate)

    i = len(groups) - 1
    words_in_group = len(groups[i])
    for j in range(words_per_group + redundant_words_per_group - words_in_group):
        candidate = random.choice(redundant_words)
        while candidate in groups[:words_per_group]:
            candidate = random.choice(redundant_words)
        groups[i].append(candidate)
    return groups

In [29]:
add_redundant_word_to_groups(groups_of_words, generate_unique_words, WORDS_PER_GROUP, REDUNDANT_WORDS_PER_GROUP)


[['sof', 'nuk', 'fon', 'mim', 'pas'],
 ['pom', 'kom', 'mep', 'kip', 'kon'],
 ['fot', 'paf', 'kaf', 'tok', 'fat'],
 ['fis', 'pam', 'kak', 'nef', 'tit'],
 ['kek', 'sos', 'maf', 'pan', 'fom'],
 ['fup', 'set', 'nit', 'fit', 'nep'],
 ['kik', 'fak', 'kep', 'tas', 'mot'],
 ['mas', 'mop', 'mun', 'sot', 'kan'],
 ['kut', 'nus', 'tup', 'nas', 'kam'],
 ['sif', 'saf', 'nos', 'fip', 'tif'],
 ['pup', 'sap', 'pus', 'nop', 'min'],
 ['sop', 'tum', 'mat', 'kem', 'kif'],
 ['pif', 'mek', 'tik', 'mof', 'nok'],
 ['pat', 'sik', 'muf', 'kus', 'kop'],
 ['tuk', 'fus', 'kes', 'pep', 'kef'],
 ['sum', 'mik', 'tet', 'met', 'tem'],
 ['nen', 'sek', 'tot', 'sep', 'nin'],
 ['faf', 'fas', 'nek', 'tam', 'sut'],
 ['sim', 'nes', 'sup', 'tus', 'fum'],
 ['puk', 'sun', 'fap', 'kun', 'kap'],
 ['kok', 'ken', 'ket', 'fef', 'nup'],
 ['fop', 'som', 'tuf', 'muk', 'mif'],
 ['kat', 'nak', 'sis', 'tim', 'mup'],
 ['pip', 'tom', 'pok', 'nam', 'fuk'],
 ['mip', 'nan', 'pun', 'tos', 'nif'],
 ['mef', 'mes', 'sin', 'men', 'pum'],
 ['sak', 'se

In [48]:
def calculate_levenshtein_distance(word_a: str, word_b: str) -> Tuple[int, List[int]]:
    if len(word_a) != len(word_b):
        raise ValueError('Words must be of the same length')

    number_of_different_letters = 0
    different_positions = []
    for i in range(len(word_a)):
        if word_a[i] != word_b[i]:
            number_of_different_letters += 1
            different_positions.append(i)

    return number_of_different_letters, different_positions

In [52]:
def find_minimal_pairs(words: List[str]) -> List[Tuple[Tuple[str, str], int]]:
    minimal_pairs: List[Tuple[Tuple[str, str], int]] = []
    for i in range(len(words)):
        for j in range(i+1, len(words)):
            levenshtein_distance, different_positions = calculate_levenshtein_distance(words[i], words[j])
            if levenshtein_distance == 1:
                minimal_pairs.append(((words[i], words[j]), different_positions[0]))
    return minimal_pairs

In [53]:
minimal_pairs = find_minimal_pairs(generate_unique_words())
len(minimal_pairs)

1960

In [54]:
minimal_pairs


[(('faf', 'fak'), 2),
 (('faf', 'fam'), 2),
 (('faf', 'fan'), 2),
 (('faf', 'fap'), 2),
 (('faf', 'fas'), 2),
 (('faf', 'fat'), 2),
 (('faf', 'fef'), 1),
 (('faf', 'fif'), 1),
 (('faf', 'fof'), 1),
 (('faf', 'fuf'), 1),
 (('faf', 'kaf'), 0),
 (('faf', 'maf'), 0),
 (('faf', 'naf'), 0),
 (('faf', 'paf'), 0),
 (('faf', 'saf'), 0),
 (('faf', 'taf'), 0),
 (('fak', 'fam'), 2),
 (('fak', 'fan'), 2),
 (('fak', 'fap'), 2),
 (('fak', 'fas'), 2),
 (('fak', 'fat'), 2),
 (('fak', 'fek'), 1),
 (('fak', 'fik'), 1),
 (('fak', 'fok'), 1),
 (('fak', 'fuk'), 1),
 (('fak', 'kak'), 0),
 (('fak', 'mak'), 0),
 (('fak', 'nak'), 0),
 (('fak', 'pak'), 0),
 (('fak', 'sak'), 0),
 (('fak', 'tak'), 0),
 (('fam', 'fan'), 2),
 (('fam', 'fap'), 2),
 (('fam', 'fas'), 2),
 (('fam', 'fat'), 2),
 (('fam', 'fem'), 1),
 (('fam', 'fim'), 1),
 (('fam', 'fom'), 1),
 (('fam', 'fum'), 1),
 (('fam', 'kam'), 0),
 (('fam', 'mam'), 0),
 (('fam', 'nam'), 0),
 (('fam', 'pam'), 0),
 (('fam', 'sam'), 0),
 (('fam', 'tam'), 0),
 (('fan', 