In [44]:
def create_pron_dict(f):
    
    """ create and fill a dictionary
    where keys are phonological transcriptions (e.g. "B IY1 T")
    and values are lists of corresponding words (e.g. ["BEAT", "BEET"]) """

    print("Creating pronunciation dictionary ...")
    
    entries = f.split("\n")
    
    pron_dict = {}

    for e in entries:
        phonemes = e.split("  ")[1]
        word = e.split("  ")[0]

        if phonemes not in pron_dict:
            pron_dict[phonemes] = []

        pron_dict[phonemes].append(word)
    
    print("Created pronunciation dictionary.")
        
    return pron_dict

In [45]:
def create_x_dict(pron_dict):
    """ create a similar dictionary where keys are transcriptions with one phoneme changed to X (e.g. "X IY1 T")
and values are dicts whose entries have the form {missing phoneme: [list of relevant words]};
e.g. {"B": ["BEAT", "BEET"], "P": ["PEAT", "PETE"]...} """

    print("Creating 'X'-dictionary ...")
    
    x_dict = {}
    
    for k in pron_dict:
        pattern = k.split()
        words = pron_dict[k]

        # for each phoneme in list, create (or add to) key in x_dict with this phoneme as variable 'X'

        for i in range(len(pattern)):
            x_pattern = pattern[:i] + ["X"] + pattern[i+1:]
            x_pattern = " ".join(x_pattern)

            # if this x-pattern has not been added to x_dict yet, create value as an empty dict

            if x_pattern not in x_dict:
                x_dict[x_pattern] = {}

            # the value corresponding to the sound is the list of valid words it produces when it replaces 'X'
                
            x_dict[x_pattern][pattern[i]] = words
        
    # remove keys for which only a single sound can validly replace 'X'
    # (since these patterns yield no minimal pairs)
        
    trim_uniques(x_dict)

    print("Created 'X'-dictionary.")
        
    return x_dict

In [46]:
def trim_uniques(d):
    
    """ Remove keys from x_dict that only have one corresponding word (or set of homophones) """
    
    print("Removing 'X'-values that generate no minimal pairs ...")
    
    keys = list(d.keys())
    for k in keys:
        if len(d[k])==1:
            del d[k]
            
    return None

In [48]:
def print_minimal_pairs(phoneme_1, phoneme_2):

    count = 0
    
    for k in x_dict:
        if (phoneme_1 in x_dict[k]) and (phoneme_2 in x_dict[k]):
            pair_1 = x_dict[k][phoneme_1]
            pair_2 = x_dict[k][phoneme_2]
            
            count += 1
            
            print(pair_1, "contrasts with", pair_2)
            
    print("The phonemes", phoneme_1, "and", phoneme_2, "yield", count, "minimal pairs.")
    
    return None

In [58]:
f = open("pronunciation_dict.txt")
f = f.read()

pron_dict = create_pron_dict(f)

x_dict = {}

x_dict = create_x_dict(pron_dict)

Creating pronunciation dictionary ...
Created pronunciation dictionary.
Creating 'X'-dictionary ...
Removing 'X'-values that generate no minimal pairs ...
Created 'X'-dictionary.


In [59]:
# Input desired phonemes here!

# Use phoneme symbols from ARPABET (https://en.wikipedia.org/wiki/ARPABET)

print_minimal_pairs("SH","ZH")

['ASCHER', 'ASHER', 'ASHUR'] contrasts with ['AZURE']
['ALEUTIAN'] contrasts with ['ALLUSION']
['ALEUTIANS'] contrasts with ['ALLUSIONS']
['SCHAACK', 'SCHOCH', 'SCHOCK', 'SHAAK', 'SHOCK'] contrasts with ['JACQUES']
['HSIA', 'SHA', 'SHAH'] contrasts with ['ZSA']
['SCHAKE', 'SHAIK', 'SHAIKH', 'SHAKE'] contrasts with ['JACQUE']
['SIANG(1)'] contrasts with ['ZHANG']
['HSIAO', 'SCHOW', 'SHAO', 'SHOUGH'] contrasts with ['XIAO', 'ZHAO', 'ZSCHAU']
['CHAIRES', "SHARE'S", 'SHARES', "SHARES'", "SHERR'S"] contrasts with ['JARES']
['SHAW'] contrasts with ['XIO']
["SHAW'S"] contrasts with ['SIAS']
['SCHEEL', 'SCHEELE', 'SCHIEL', 'SCHIELE', "SHE'LL", 'SHIEL'] contrasts with ['GILLES']
['CHENETTE'] contrasts with ['JENNETTE']
['SCHERTZ', 'SHIRTS', 'SHURTZ'] contrasts with ['GEURTS']
['HSU', 'SCHOO', 'SCHOU', 'SCHUE', 'SCHUH', 'SHEW', 'SHIU', 'SHOE', 'SHOO', 'SHU', 'SHUE'] contrasts with ['ZHOU', 'ZHU']
['SHOCKS'] contrasts with ["JACQUES'"]
['KASHMIR'] contrasts with ['CASHMERE']
['CHANEL'] contrasts 