In [1]:
import numpy as np
import pandas as pd
import random
from itertools import product
import re

In [2]:
# each consonant has one unique feature
C = ['b', 'k', 's', 'r']
V = ['a', '^', 'u', 'i']

In [3]:
for seed in range(1000):
    random.seed(seed)

    candidate_list = []
    selection_list = []

    for x_tuple in product(C, V, C, V):
        x = ''.join(x_tuple)
        # no phoneme repetition allowed within a given word
        if x[0] != x[2] and x[1] != x[3]:
            candidate_list.append(x)

    while len(selection_list) < 12:

        # check how many cohort and rhyme items each word has
        cohort_num = [len([y for y in selection_list if x[:2] == y[:2] and x != y]) for x in selection_list]
        rhyme_num = [len([y for y in selection_list if x[1:] == y[1:] and x != y]) for x in selection_list]

        # randomly select a new word to start with when cohort and rhyme items are evenly distributed
        if len(selection_list) == 0:
            selection_list.append(random.sample(candidate_list, 1)[0])
        elif (min(cohort_num) == max(cohort_num) & min(rhyme_num) == max(rhyme_num)):
            # eliminate words with the same first vowels as the selected words
            candidate_list = [x for x in candidate_list if x[1] not in [y[1] for y in selection_list]]
            selection_list.append(random.sample(candidate_list, 1)[0])

        for i in selection_list:
            # check the number of cohort and rhyme items of a given word
            i_cohort = [x for x in selection_list if x[:2] == i[:2] and x != i]
            i_rhyme = [x for x in selection_list if x[1:] == i[1:] and x != i]

            # only search for a cohort item when a given word doesn't already have one
            if len(i_cohort) == 0:
                r_cohort = re.compile('^' + re.escape(i[:2]))
                cohort_list = [x for x in list(filter(r_cohort.search, candidate_list)) if x not in selection_list]

                if len(cohort_list) > 0:
                    cohort = random.sample(cohort_list, 1)[0]
                    selection_list.append(cohort)
                    candidate_list = [x for x in candidate_list if x[:2] != cohort[:2]]
            
            # only search for a rhyme item when a given word doesn't already have one
            if len(i_rhyme) == 0:
                r_rhyme = re.compile(re.escape(i[1:]) + '$')
                rhyme_list = [x for x in list(filter(r_rhyme.search, candidate_list)) if x not in selection_list]

                if len(rhyme_list) > 0:
                    rhyme = random.sample(rhyme_list, 1)[0]
                    selection_list.append(rhyme)
                    candidate_list = [x for x in candidate_list if x[1:] != rhyme[1:]]

            # create a fourth word based on the cohort and rhyme items to complete the subset of words
            # every 4 words should be a complete set where each word has one cohort and one rhyme
            if len(i_cohort) == 0 & len(i_rhyme) == 0:
                selection_list.append(rhyme[:2]+cohort[2:])

    # calculate phoneme frequency by position
    phon_freq = {x: [''.join([y[:2] for y in selection_list]).count(x),
                     ''.join([y[2:] for y in selection_list]).count(x)] for x in C+V}
    phon_freq_0 = {k: v[0] for k, v in phon_freq.items()}
    phon_freq_1 = {k: v[1] for k, v in phon_freq.items()}
    
    # each phoneme is limited to no more than 4 times at a given position across words
    if (max(phon_freq_0.values()) <= 4 and max(phon_freq_1.values()) <= 4 and
        # no phoneme repetition allowed within a given word
        all([x[0] != x[2] for x in selection_list]) and all([x[1] != x[3] for x in selection_list]) and
        # no transposed syllables are allowed in a given word list
        all([y not in selection_list for y in [x[2:]+x[:2] for x in selection_list]])):
        
        print("seed: {}".format(seed))
        break

seed: 255


In [4]:
for x in selection_list:
    x_cohort = [y for y in selection_list if y[:2] == x[:2] and y != x]
    x_rhyme = [y for y in selection_list if y[1:] == x[1:] and y != x]
    print("target: {}, cohort: {}, rhyme: {}".format([x], x_cohort, x_rhyme))

target: ['b^si'], cohort: ['b^ra'], rhyme: ['k^si']
target: ['b^ra'], cohort: ['b^si'], rhyme: ['k^ra']
target: ['k^si'], cohort: ['k^ra'], rhyme: ['b^si']
target: ['k^ra'], cohort: ['k^si'], rhyme: ['b^ra']
target: ['kibu'], cohort: ['kisa'], rhyme: ['ribu']
target: ['kisa'], cohort: ['kibu'], rhyme: ['risa']
target: ['ribu'], cohort: ['risa'], rhyme: ['kibu']
target: ['risa'], cohort: ['ribu'], rhyme: ['kisa']
target: ['suk^'], cohort: ['suki'], rhyme: ['ruk^']
target: ['suki'], cohort: ['suk^'], rhyme: ['ruki']
target: ['ruk^'], cohort: ['ruki'], rhyme: ['suk^']
target: ['ruki'], cohort: ['ruk^'], rhyme: ['suki']


In [5]:
{x: [''.join([y[:2] for y in selection_list]).count(x),
     ''.join([y[2:] for y in selection_list]).count(x)] for x in C+V}

{'b': [2, 2],
 'k': [4, 4],
 's': [2, 4],
 'r': [4, 2],
 'a': [0, 4],
 '^': [4, 2],
 'u': [4, 2],
 'i': [4, 4]}

In [6]:
{''.join(x): [''.join([y[:2] for y in selection_list]).count(''.join(x)),
              ''.join([y[2:] for y in selection_list]).count(''.join(x))] for x in product(C, V)}

{'ba': [0, 0],
 'b^': [2, 0],
 'bu': [0, 2],
 'bi': [0, 0],
 'ka': [0, 0],
 'k^': [2, 2],
 'ku': [0, 0],
 'ki': [2, 2],
 'sa': [0, 2],
 's^': [0, 0],
 'su': [2, 0],
 'si': [0, 2],
 'ra': [0, 2],
 'r^': [0, 0],
 'ru': [2, 0],
 'ri': [2, 0]}

In [7]:
print(selection_list)

['b^si', 'b^ra', 'k^si', 'k^ra', 'kibu', 'kisa', 'ribu', 'risa', 'suk^', 'suki', 'ruk^', 'ruki']
