In [1]:
fh = open('cmudict-0.7b-ipa.txt','r', encoding='utf-8')

specialchars = '!@#$%^&*()-+"\',./:1234567890'

pronunciations = [[line.split('\t')[0], line.split('\t')[1].replace('r','ɹ').replace('ɝ', 'əɹ').rstrip('\n')] for line in fh if not line.startswith(';;;') and (not(any(c in specialchars for c in line.split('\t')[0])) and ',' not in line.split('\t')[1] and len(line.split('\t')[1])<7)]

In [2]:
for i in range(10):
    print(pronunciations[i])

['AAH', 'ˈɑː']
['AAKER', 'ˈɑːkəɹ']
['AARGH', 'ˈɑːɹg']
['AARON', 'ˈɛɹən']
['AASE', 'ˈɑːs']
['AB', 'ˈæb']
['ABACK', 'əˈbæk']
['ABAIR', 'əˈbɛɹ']
['ABASH', 'əˈbæʃ']
['ABBA', 'ˈæbə']


In [3]:
len(pronunciations)

11490

In [4]:
allipa = " ".join([word[1] for word in pronunciations])
print(set(allipa))

{'e', 'ɑ', 'ʃ', 'j', 'ɹ', 'ɔ', 'm', 'v', 'ʒ', 'w', 'ˈ', 'h', 'æ', 'ʊ', 'a', 'o', 't', 'ð', 'g', 'n', 'ː', 'b', 'd', 'ʌ', 'θ', 'ɛ', 'i', 'ŋ', 'l', ' ', 'f', 'ɪ', 'ə', 'p', 's', 'u', 'k', 'z'}


In [5]:
error_dict = {
    'p':['b'], 
    'b':['p'], 
    'f':['v'], 
    'v':['f'], 
    'ð':['θ', 'th'], 
    'θ':['ð', 'th'], 
    't':['d'], 
    'd':['t'], 
    's':['z', 'ss'], 
    'z': ['s', 'zz'], 
    'ʃ': ['ʒ', 'sh'], 
    'ʒ': ['ʃ', 'zh'],
    'k':['g'], 
    'g':['k'], 
    'h':['x'], 
    'n': ['ŋ','m'], 
    'm':['n', 'ŋ'], 
    'ŋ':['n','m'], 
    'j':['y'], 
    'w':['u'],
    'ɹ':['w', 'r'], 
    'l':['w'], 
    'ˌ':['NONE'], 
    ' ': ['NONE'], 
    'ː': ['NONE'], 
    'ˈ':['NONE'],
    'ɛ':['e', 'æ', 'ɪ'], 
    'æ': ['e', 'ɛ', 'a'], 
    'u':['ʊ', 'ɔ', 'oo'], 
    'ʌ':['uh','ɔ','o'], 
    'ʊ':['u','oo'], 
    'ə':['uh','ʌ'],
    'i':['ɪ', 'ee', 'ie'],
    'ɑ':['ah', 'æ'],
    'a':['ah', 'æ'],
    'e':['ɛ', 'ɪ', 'æ'],
    'o':['oh', 'ɔ'],
    'ɔ':['o'],
    'ɪ':['i', 'ie', 'ee']
}





In [6]:
import random

In [7]:
def generate_answers(pron):
    answer_choices = [pron[:i]+random.choice(error_dict[pron[i]])+pron[i+1:] for i in range(len(pron)) if error_dict[pron[i]] != ['NONE']]
    answer_choices.append(pron)
    answer_choices = list(set(answer_choices))
    random.shuffle(answer_choices)
    return answer_choices

In [8]:
generate_answers('əˈbʌz')

['ʌˈbʌz', 'əˈbʌz', 'əˈbɔz', 'əˈpʌz', 'əˈbʌzz']

In [9]:
all_answers = [[word[0], word[1], generate_answers(word[1])] for word in pronunciations]

In [10]:
for i in range(10):
    print(all_answers[i])

['AAH', 'ˈɑː', ['ˈɑː', 'ˈahː']]
['AAKER', 'ˈɑːkəɹ', ['ˈahːkəɹ', 'ˈɑːkəw', 'ˈɑːkəɹ', 'ˈɑːkuhɹ', 'ˈɑːgəɹ']]
['AARGH', 'ˈɑːɹg', ['ˈahːɹg', 'ˈɑːɹg', 'ˈɑːrg', 'ˈɑːɹk']]
['AARON', 'ˈɛɹən', ['ˈɪɹən', 'ˈɛɹən', 'ˈɛɹəŋ', 'ˈɛwən', 'ˈɛɹʌn']]
['AASE', 'ˈɑːs', ['ˈæːs', 'ˈɑːs', 'ˈɑːz']]
['AB', 'ˈæb', ['ˈeb', 'ˈæp', 'ˈæb']]
['ABACK', 'əˈbæk', ['əˈbɛk', 'əˈbæk', 'əˈbæg', 'uhˈbæk', 'əˈpæk']]
['ABAIR', 'əˈbɛɹ', ['əˈbɛw', 'əˈbeɹ', 'əˈpɛɹ', 'əˈbɛɹ', 'uhˈbɛɹ']]
['ABASH', 'əˈbæʃ', ['əˈbæsh', 'əˈbæʃ', 'əˈbaʃ', 'əˈpæʃ', 'uhˈbæʃ']]
['ABBA', 'ˈæbə', ['ˈæbuh', 'ˈæbə', 'ˈæpə', 'ˈɛbə']]


In [11]:
def is_last(alist,choice):
    if choice == alist[-1]:
        return True
    else:
        return False

In [12]:
with open('answer_choices.json', 'w+', encoding='utf-8') as js_file:
    js_file.write('[\n')
    for entry in all_answers:
        js_file.write('{ "question": "How is the word \''+entry[0]+'\' phonemically transcribed in IPA?",')
        js_file.write('"answers":[')
        for option in entry[2]:
            if option == entry[1]:
                js_file.write('{ "text":"' +option+ '","correct":true}')
            else:
                js_file.write('{ "text":"' +option+ '","correct":false}')
            if is_last(entry[2], option):
                js_file.write('')
            else:
                js_file.write(',')
        if is_last(all_answers, entry):
            js_file.write(']}')
        else:    
            js_file.write(']},\n')
    js_file.write('\n]')