# Wordle Solver

### 1. Data, packages, functions

Run this cell first

In [None]:
from tqdm.notebook import tqdm

## Import lists of possible solutions and valid guesses
solutions_file = open("wordle-answers-alphabetical.txt")
solutions = solutions_file.read().splitlines()
non_solutions_file = open("wordle-allowed-guesses.txt")
non_solutions = non_solutions_file.read().splitlines()
guesses = solutions + non_solutions

## Function to get all remaining possible solutions after results of a guess
def get_solutions(guess_result, remaining_solutions):
    possible_solutions = remaining_solutions
    
    for index, letter in enumerate(guess_result[0]):
        result = guess_result[1][index]
        #g: keep solutions that containt letter in position
        if (result == 'g'):
            possible_solutions = [x for x in possible_solutions if letter == x[index]]
        #y: keep solutions that containt letter, do not contain in position
        elif (result == 'y'):
            possible_solutions = [x for x in possible_solutions if letter in x]
            possible_solutions = [x for x in possible_solutions if letter != x[index]]
        #x: it's complicated
        elif (result == 'x'):
            #If unique letter, not in word
            letter_indices = [ind for ind, el in enumerate(guess_result[0]) if el == letter]
            if (len(letter_indices) >= 2):
                #If all instances are grey, not in word
                letter_results = list( guess_result[1][i] for i in letter_indices)
                if all(el == 'x' for el in letter_results):
                    possible_solutions = [x for x in possible_solutions if letter not in x]
                #If only one is grey, only one instance in word
                else:
                    letter_occurences = letter_results.count('g') + letter_results.count('y')
                    possible_solutions = [x for x in possible_solutions if x.count(letter) == letter_occurences]
            else:
                possible_solutions = [x for x in possible_solutions if letter not in x]

    num_solutions = len(possible_solutions)
    
    return(num_solutions, possible_solutions)

## Function to get a result (colors) from a guess and a solution
def get_result(guess, solution):
    result = ''
    
    for index, letter in enumerate(guess):
        #Right position: Green
        if (letter == solution[index]):
            letter_result = 'g'
        #Not in solution: X
        elif (letter not in solution):
            letter_result = 'x'
        #Not in position: It's complicated
        elif (guess.count(letter) >= 2):
            #Indices of Guesses
            guess_indices = [ind for ind, el in enumerate(guess) if el == letter]
            #Indices of Solutions
            solution_indices = [ind for ind, el in enumerate(solution) if el == letter]
            #Indices of Greens
            greens = list(set(guess_indices).intersection(solution_indices))
            #Indices of Non-Greens
            non_greens = [x for x in guess_indices if x not in greens]
            #Indices of Yellows
            yellows = non_greens[0:(len(solution_indices) - len(greens))]
            #if two in solution, yellow
            if (index in yellows):
                letter_result = 'y'
            else:
                letter_result = 'x'
        else:
            letter_result = 'y'
        result += letter_result
    
    return result

## Function to find the best guess from remaining solutions and possible guesses
def best_guess(valid_guesses, remaining_solutions):
    expected_solutions = []

    for i in (range(len(valid_guesses))): #Iterate over each guess (with progress bar)
        guess = valid_guesses[i]
        guess_num_solutions = [] #number of solutions available after results from guess after each possible solution
        for solution in remaining_solutions: #Iterate over each possible solution
            if guess == solution:
                new_num_solutions = 0
            else:
                guess_result = [guess, get_result(guess, solution)]
                new_num_solutions = get_solutions(guess_result, remaining_solutions)[0]
            guess_num_solutions.append(new_num_solutions)
        
        #expected solutions available for guess
        if len(guess_num_solutions) == 0:
            avg_guess_num_solutions = 0
        else: 
            avg_guess_num_solutions = sum(guess_num_solutions)/len(guess_num_solutions) 
        expected_solutions.append(avg_guess_num_solutions)
    
    #Next guess is guess with lowest expected number of remaining solutions
    min_exp_solutions = min(expected_solutions)
    next_guess = guesses[expected_solutions.index(min_exp_solutions)]
    
    return(next_guess, expected_solutions)


### 2. Solve a World Puzzle using known starting word

##### Instructions...
Run the following cell.

The program will suggest a word to guess. Guess that word in Wordle. 

Wordle will color the letters. Input the colors as prompted by the program. The format should be as follows.

A five character string where...

    x = grey letter
    y = yellow letter
    g = green letter

##### Example...

![wordle example](example_img.png "Wordle Example")

For this result, you would input "yxgxg"

In [None]:
##Solve a Wordle

#Start with all possible solutions and guesses, no eliminated letters
possible_solutions = solutions
possible_guesses = guesses

#1st guess is always 'ACUTE'
guess_word = 'acute'
li = [['60', 'acute']]

#Unitl Solved
while len(possible_solutions) > 1:

    #Prompt for guess results
    print(str(len(possible_solutions)) + ' possible solutions: ', end='')
    for i in range(min(len(possible_solutions) - 1, 5)):
        print (possible_solutions[i].upper(), end=", ")
    if len(possible_solutions)>5:
        print('...')
    else:
        print(possible_solutions[-1].upper())
    print('Guess ' + guess_word.upper() + '. Enter result:')
    guess_result = input()
    print()
    
    #Update Possible Solutions
    guess = [guess_word, guess_result]
    possible_solutions = get_solutions(guess, possible_solutions)[1]
    
    #Generate Next Word(s)
    print('Thinking...', end='\r')
    guess_word, exp_sols = best_guess(guesses, possible_solutions)
    li = []
    for i in range(len(exp_sols)):
          li.append([exp_sols[i],guesses[i]])
    li.sort()

#Solved
if len(possible_solutions) == 1:
    print("1 possible solution.\nToday's word is: " + possible_solutions[0].upper() + ". Woo! 🎉")
else:
    print("No possible solutions. ☹️")

### 3. Finding the Best Starting Words

Run this cell to find the best starting words

Spoiler... the best word is **ACUTE**

In [None]:
##Rank all Starting Words
next_guess, expected_solutions = best_guess(guesses, solutions)

#Order Starting words by rank
for i in range(len(exp_sols)):
    li.append([exp_sols[i],guesses[i]])
li.sort()

#Ouput best words
print('The best starting word is: ' + next_guess.upper)
print('The expected number of solutions remaining after making this guess is: ' + min(expected_solutions))
print()
print('The top 20 best starting words are:')
for i in range(min(len(li),20)):
    print ('   ', li[i][1].upper(), li[i][0])

### 4. Evaluation

Run this cell to complete the program evaluation. 

##### Results:

GUESS DISTRIBUTION

    1: 0.0% (1)
    2: 3.2% (73)
    3: 40.8% (944)
    4: 53.0% (1228)
    5: 3.0% (69)

Average Guesses: 3.56

In [None]:
####Evaluation

##Create Dictionary lookup 2nd Guess based on every outcome of 'ACUTE' guess

#Find 'acute' results for all possible solutions
acute_results = []
for solution in solutions:
    acute_results.append(get_result('acute', solution))

#Initiate Dictionary
acute_results_dict = {}
for i in tqdm(range(len(acute_results))):
    guess_result = acute_results[i]
    
    #Find each unique result for 'acute', and calculate the next guess word
    if guess_result not in acute_results_dict.keys():
        result_indices = [ind for ind, el in enumerate(acute_results) if el == guess_result]
        possible_solutions = list(solutions[i] for i in result_indices)
        next_guess = best_guess(guesses, possible_solutions)[0]
        acute_results_dict[guess_result] = next_guess

##Create Guess Distribution

num_guesses = []
#For each solution, find number of guesses it takes to find
for i in tqdm(range(len(solutions))):
    solution = solutions[i]
    possible_solutions = solutions
    guess_words = ['acute']
    
    if solution=='acute': #If word is accute, 1 guess
        num_guesses.append(1)
    elif solution in acute_results_dict.values(): #If word is in second guess dictionary, 2 guesses
        num_guesses.append(2)
        guess_words.append(solution)
    else:
        #Second Guess Known, dictionary lookup
        guess_word = 'acute'
        guess_result = get_result(guess_word, solution)
        guess = [guess_word, guess_result]
        possible_solutions = get_solutions(guess, possible_solutions)[1]
        guess_word = acute_results_dict[guess_result]
        guess_words.append(guess_word)
        
        #Third+ Guesses are unknown, Calculate total number of guesses
        while guess_words[-1] != solution:
            guess_word = guess_words[-1]
            guess_result = get_result(guess_word, solution)
            guess = [guess_word, guess_result]
            possible_solutions = get_solutions(guess, possible_solutions)[1]
            guess_words.append(best_guess(guesses, possible_solutions)[0])
        
        #Output total number of guesses needed
        num_guesses.append(len(guess_words))

#Output Guess Distribution results
print('GUESS DISTRIBUTION')
for i in range(1, max(num_guesses)+1):
    i_guesses = num_guesses.count(i)
    if i_guesses == 0:
        i_proportion = 0
    else:
        i_proportion = i_guesses/len(num_guesses)
    print(str(i) + ': ' + str(round(100*i_proportion, 1)) + "% (" + str(i_guesses) + ')')
print()
print('Average Guesses: ' + str(round(sum(num_guesses)/len(num_guesses), 2)))