In [1]:
import pandas as pd
import string

In [2]:
# Read in word counts file
word_counts = pd.read_csv('./word_counts.txt', sep=' ', 
                          header=None, names=['word', 'count'])

# Compute probabilities of words
word_counts['P(W=w)'] = word_counts['count'] / word_counts['count'].sum(axis=0)

word_counts.head()

Unnamed: 0,word,count,P(W=w)
0,AARON,413,5.4e-05
1,ABABA,199,2.6e-05
2,ABACK,64,8e-06
3,ABATE,69,9e-06
4,ABBAS,290,3.8e-05


In [3]:
def p_e_given_w(correct , incorrect):
    '''
    Function to give P(evidence|word) for each word. Checks if the guessed correctly/incorrectly
    matches with each word.

    Evidence refers to the information at the current stage of the game.
    
    Args:
        correct:   list of correct guesses
        incorrect: list of incorrect guesses
    
    Returns:
        p_e_w: list of probabilities P(E|W) for each word
    '''

    # Initialize P(E|W) for each word
    p_e_w = [None]*len(word_counts.index)
    
    # Compare words to correct/incorrect guesses to determine P(E|W)
    for i in range(len(word_counts.index)): 
        for letter, guess in zip(word_counts['word'][i], correct):
            # P(E|W) = 0 if the word contains a letter that is "incorrect"
            if letter in incorrect:
                p_e_w[i] = 0
                break
            
            # P(E|W) = 0 if the letter is in the guessed spot but the
            # word doesn't have the same letter in that spot.
            elif (guess != None) and (letter != guess):
                p_e_w[i] = 0
                break
                
            # P(E|W) = 0 if a letter has been guessed and is in the word
            # but not in the same positions as the word in question.
            elif (letter in correct) and (guess == None):
                p_e_w[i] = 0
                break
                
            else:
                p_e_w[i] = 1
    
    return p_e_w  

In [4]:
def p_l_given_w(letter):
    '''
    Function that gives the probabilites of a certain letter given a word P(letter | word).
    
    Args:
        letter: letter of interest from alphabet
    
    Returns:
        p_l_w: probability of that letter for each word
    '''
    # Initialize P(L|W) for each word
    p_l_w = [None]*len(word_counts.index)
    
    # Check if letter is anywhere in word
    for i in range(len(word_counts.index)): 
        if letter in list(word_counts['word'][i]):
            p_l_w[i] = 1
        else:
            p_l_w[i] = 0
    
    return p_l_w 

In [5]:
def best_guess(correct, incorrect):
    '''
    Gives the best next guess letter (l) and P(Letter=l|Evidence)
    
    Args:
        correct: list of correct guesses in order
        incorrect: list of incorrect guessses
    '''
    
    # Compute P(E|W=w)
    word_counts['P(E|W=w)'] = p_e_given_w(correct, incorrect)
    
    # Compute P(W=w|E)
    word_counts['P(E|W=w)*P(W=w)'] = word_counts['P(E|W=w)'] * word_counts['P(W=w)']
    word_counts['P(W=w|E)'] = word_counts['P(E|W=w)*P(W=w)'] / word_counts['P(E|W=w)*P(W=w)'].sum(axis=0)
    del word_counts['P(E|W=w)*P(W=w)']
    
    # Compute P(L=l|W=w)
    for letter in string.ascii_uppercase:
        word_counts['P(L={}|W=w)'.format(letter)] = p_l_given_w(letter)
    
    # Compute P(L=l|W=w)*P(W=w|E) for each letter and word
    for letter in string.ascii_uppercase:
        word_counts['P(L={}|W=w)*P(W=w|E)'.format(letter)] = p_l_given_w(letter) * word_counts['P(W=w|E)'] 
        
    # Compute P(L=l|E)
    p_l_e = [None]*len(string.ascii_lowercase)
    for letter, i in zip(string.ascii_uppercase, range(len(string.ascii_lowercase))):
        p_l_e[i] = word_counts['P(L={}|W=w)*P(W=w|E)'.format(letter)].sum(axis=0)
        
    p_letter_e = pd.DataFrame(p_l_e, columns=['P(L=l|E)'], index=list(string.ascii_uppercase))
    
    best_guess = p_letter_e.loc[p_letter_e[p_letter_e['P(L=l|E)'] < 0.9999999].idxmax()].index.values[0]
    max_p_l_e = p_letter_e.loc[p_letter_e[p_letter_e['P(L=l|E)'] < 0.9999999].idxmax()].values[0][0]
    
    print('For correct guesses', correct, 'and incorrect guesses {}:'.format(incorrect))
    print('Your best next guess is', best_guess, 'with a probability P(L={}|E) of'.format(best_guess),round(max_p_l_e, 4),'\n')

In [6]:
# Check against given solutions
correct = [None] * 5
incorrect = ['E', 'O']
best_guess(correct, incorrect)

correct = ['D', None, None, 'I', None]
incorrect = []
best_guess(correct, incorrect)

incorrect = ['A']
best_guess(correct, incorrect)

correct = [None, 'U', None, None, None]
incorrect = ['A', 'E', 'I', 'O', 'S']
best_guess(correct, incorrect)

For correct guesses [None, None, None, None, None] and incorrect guesses ['E', 'O']:
Your best next guess is I with a probability P(L=I|E) of 0.6366 

For correct guesses ['D', None, None, 'I', None] and incorrect guesses []:
Your best next guess is A with a probability P(L=A|E) of 0.8207 

For correct guesses ['D', None, None, 'I', None] and incorrect guesses ['A']:
Your best next guess is E with a probability P(L=E|E) of 0.7521 

For correct guesses [None, 'U', None, None, None] and incorrect guesses ['A', 'E', 'I', 'O', 'S']:
Your best next guess is Y with a probability P(L=Y|E) of 0.627 



In [11]:
import tkinter as tk

window = tk.Tk()

greeting = tk.Label(text="Hello, Tkinter")

greeting.pack()

In [7]:
def main():
    # Check against given solutions
    correct = [None] * 5
    incorrect = ['E', 'O']
    best_guess(correct, incorrect)

    correct = ['D', None, None, 'I', None]
    incorrect = []
    best_guess(correct, incorrect)
    
    incorrect = ['A']
    best_guess(correct, incorrect)
    
    correct = [None, 'U', None, None, None]
    incorrect = ['A', 'E', 'I', 'O', 'S']
    best_guess(correct, incorrect)

if __name__ == "__main__":
    main()

For correct guesses [None, None, None, None, None] and incorrect guesses ['E', 'O']:
Your best next guess is I with a probability P(L=I|E) of 0.6366 

For correct guesses ['D', None, None, 'I', None] and incorrect guesses []:
Your best next guess is A with a probability P(L=A|E) of 0.8207 

For correct guesses ['D', None, None, 'I', None] and incorrect guesses ['A']:
Your best next guess is E with a probability P(L=E|E) of 0.7521 

For correct guesses [None, 'U', None, None, None] and incorrect guesses ['A', 'E', 'I', 'O', 'S']:
Your best next guess is Y with a probability P(L=Y|E) of 0.627 

