In [None]:
import random
import csv

In [None]:
# The edit distance function
def edit_distance(w1, w2):
    
    cost = []
    
    vowels = ['A', 'E', 'I', 'O', 'U']
    # These may be useful for later work:
    #voiced = ['B', 'D', 'G', 'J', 'L', 'M', 'N', 'R', 'V', 'W', 'Y', 'Z']
    #unvoiced = ['C', 'F', 'H', 'K', 'P', 'S', 'T']
    
    for i in range(len(w1)+1):
        x = []
        for j in range(len(w2)+1):
            x.append(0)
        cost.append(x)
    
    for i in range(len(w1)+1):
        cost[i][0] = i
    for j in range(len(w2)+1):
        cost[0][j] = j
        
    # baseline costs
    del_cost = 1
    add_cost = 1
    sub_cost = 2
        
    for i in range(1, len(w1)+1):
        for j in range(1, len(w2)+1):
            # delete cost for vowels should be high
            if w1[i-1][0] in vowels:
                del_cost = 2
                sub_cost = 4
            # add cost for vowels should be high
            if w2[j-1][0] in vowels:
                add_cost = 2
                sub_cost = 4
            # if they are both vowels, sub should be lower than add + del, but still high
            if w1[i-1][0] in vowels and w2[j-1][0] in vowels:
                sub_cost = 2
            # if they are the same sound, the sub cost should be 0
            if w1[i-1] == w2[j-1]:
                sub_cost = 0
            # get the totals
            del_total = cost[i-1][j] + del_cost
            add_total = cost[i][j-1] + add_cost
            sub_total = cost[i-1][j-1] + sub_cost
            # choose the lowest cost from the options
            options = [del_total, add_total, sub_total]
            options.sort()
            cost[i][j] = options[0]
           
#     print(w1)
#     print(w2)
#     for row in cost:
#         print(row)
    return cost[-1][-1]

In [None]:
# Make sure it works
edit_distance('cat', 'scant')

In [None]:
# Get our list of words, pronunciations, and POSs from CSV
real_list = []
real_words = []
with open('all_words.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        real_list.append((row[0], eval(row[1]), row[2]))
        real_words.append(row[0])
real_dict = {w[0]:{'pron':w[1], 'pos':w[2]} for w in real_list}

In [None]:
# We only need unique words
# whether or not there are multiple entries
real_words = set(real_words)

In [None]:
# Get all the possible substitutions for a word
w = 'cat'
word1 = real_dict[w]['pron']
options = []
for entry in real_list:
    price = edit_distance(word1, entry[1])
    if price < 3 and w not in entry[0]:
        options.append(entry)

In [None]:
# How many viable options are there?
len(options)

In [None]:
# Choose one randomly
random.choice(options)

In [None]:
def main():
    
    # NLTK's list of stop words:
    stop_words = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours',
                 'ourselves', 'you', 'your', 'yours', 'yourself',
                 'yourselves', 'he', 'him', 'his', 'himself', 'she',
                 'her', 'hers', 'herself', 'it', 'its', 'itself',
                 'they', 'them', 'their', 'theirs', 'themselves',
                 'what', 'which', 'who', 'whom', 'this', 'that',
                 'these', 'those', 'am', 'is', 'are', 'was', 'were',
                 'be', 'been', 'being', 'have', 'has', 'had', 'having',
                 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and',
                 'but', 'if', 'or', 'because', 'as', 'until', 'while',
                 'of', 'at', 'by', 'for', 'with', 'about', 'against',
                 'between', 'into', 'through', 'during', 'before',
                 'after', 'above', 'below', 'to', 'from', 'up', 'down',
                 'in', 'out', 'on', 'off', 'over', 'under', 'again',
                 'further', 'then', 'once', 'here', 'there', 'when',
                 'where', 'why', 'how', 'all', 'any', 'both', 'each',
                 'few', 'more', 'most', 'other', 'some', 'such', 'no',
                 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 
                 'too', 'very', 's', 't', 'can', 'will', 'just', 'don',
                 'should', 'now']
    
    # Get phrase from user
    phrase = input('Enter a phrase: ').lower()
    word_list = phrase.split()
    
    # Initialize empty choice list
    # This will be filled with 1 list for each word in input phrase
    choice_list = []
    for word in word_list:
        # If the word is a stop word
        # we don't want to replace it
        # Put an empty list so there are no possible replacements
        if word in stop_words:
            choice_list.append([])
        # If the word is a "real" word
        # find all possible subs
        elif word in real_words:
            choices = []
            pos1 = real_dict[word]['pos']
            w1 = real_dict[word]['pron']
            for entry in real_list:
                pos2 = real_dict[entry[0]]['pos']
                if pos1 == pos2:
                    price = edit_distance(w1, entry[1])
                    if price < 3 and word not in entry[0] and entry[0] not in word:
                        choices.append(entry[0])
            choice_list.append(choices)
        # If it is not a "real" word
        # put an empty list in its position in choice_list
        else:
            choice_list.append([])
                
    # check to see if any subs were found:
    existing_choice = False
    for c in choice_list:
        if c:
            existing_choice = True
            break
    # if subs are available, sub each possible word
    if existing_choice:
        for i, _ in enumerate(word_list):
            if choice_list[i]:
                new_word = random.choice(choice_list[i])
                word_list[i] = new_word
        print(' '.join(word_list))
    else:
        print('No suitable substitution found')

In [None]:
main()