In [1]:
import urllib.request

### Pre-Processing
- Get all possible words that the target word could be
- For each word in the target words list, get counts of each letter to create letter distribution across entire vocabulary

In [2]:
### Downloading exhaustive wordle dataset taken from game source code (see link below) -- writing to a .txt file as backup in case link doesn't work
location = "https://raw.githubusercontent.com/tabatkins/wordle-list/main/words"

f = urllib.request.urlopen(location)

file_path = "data/wordle_words.txt"
wordle_file = open(file_path, "w", encoding = "utf-8") # opens connections of file at given file path, in WRITE mode ("w")

all_words = set() # set of all words

for line in f.readlines():
    word = line.decode("utf-8").strip() # str of each line (/word)
    wordle_file.write(f"{word}\n") # writes each word on a separate line of file
    all_words.add(word.lower()) # lowercases word and adds to set

wordle_file.close() # closes connection to file

print(len(all_words))

14855


In [3]:
words_counts_dict = {}

# for word in ten_words: # test dataset
for word in all_words: # real dataset
    word_dict = {}
 
    for letter in word:
        if letter in word_dict:
            word_dict[letter] += 1
        else:
            word_dict[letter] = 1
    words_counts_dict[word] = word_dict

len(words_counts_dict)
# words_counts_dict

14855

In [4]:
### Initialize dict of all english letters and an empty (0) count

letters = "abcdefghijklmnopqrstuvwxyz"

letters_counts_dict = {}

for letter in letters:
    letters_counts_dict[letter] = 0

# print(len(letters_counts_dict)) # 26
letters_counts_dict

{'a': 0,
 'b': 0,
 'c': 0,
 'd': 0,
 'e': 0,
 'f': 0,
 'g': 0,
 'h': 0,
 'i': 0,
 'j': 0,
 'k': 0,
 'l': 0,
 'm': 0,
 'n': 0,
 'o': 0,
 'p': 0,
 'q': 0,
 'r': 0,
 's': 0,
 't': 0,
 'u': 0,
 'v': 0,
 'w': 0,
 'x': 0,
 'y': 0,
 'z': 0}

In [5]:
### Most common letters of all words of the dataset

words_counts_dict

for word, count_dict in words_counts_dict.items():
    for letter, count in count_dict.items():
        letters_counts_dict[letter] += count

letters_counts_dict

{'a': 7128,
 'b': 1849,
 'c': 2246,
 'd': 2735,
 'e': 7455,
 'f': 1240,
 'g': 1864,
 'h': 1993,
 'i': 4381,
 'j': 342,
 'k': 1753,
 'l': 3780,
 'm': 2414,
 'n': 3478,
 'o': 5212,
 'p': 2436,
 'q': 145,
 'r': 4714,
 's': 7319,
 't': 3707,
 'u': 2927,
 'v': 801,
 'w': 1127,
 'x': 326,
 'y': 2400,
 'z': 503}

### Logic of the task
- Find out the "best" possible starting guesses
    - "best" == word that includes the highest number of the most frequent characters (top 5 most frequent characters is ideal)
- Once `guess_1` is made, evalute `guess_1`
    - for each letter in `guess_1`:
        - if the letter is correct, keep it for next guess
        - for incorrect letters where location is wrong, change it to a new location (where the correct letters also exist in the word in the same spot)
        - for incorrect letters where letter is wrong (doesn't exist in the word at all), change it to the next most frequent letter where the new combination of letters forms a new word in the possible words
- Repeat same logic for `guess_2-5`

In [6]:
import operator

# generates random target word from all possible words
all_words_list = list(all_words) # list because it has to be ordered

sorted_letters_counts = sorted(letters_counts_dict.items(), key = operator.itemgetter(1), reverse = True)
sorted_letters_counts[:5] # each first guess should always contain these 5 letters -- one of each, ideally

best_opening_words = set()
best_letters = set()

### Get 5 top most frequent letters 
for letter, freq in sorted_letters_counts[:5]:
    best_letters.add(letter)

### Get all words that have one of each of the 5 top most frequent letters
for word in all_words:
    word_set = set(word)
    if best_letters == word_set:
        best_opening_words.add(word)

print(best_letters)
print(best_opening_words)
sorted_letters_counts

{'r', 'o', 's', 'e', 'a'}
{'soare', 'arose', 'aeros'}


[('e', 7455),
 ('s', 7319),
 ('a', 7128),
 ('o', 5212),
 ('r', 4714),
 ('i', 4381),
 ('l', 3780),
 ('t', 3707),
 ('n', 3478),
 ('u', 2927),
 ('d', 2735),
 ('p', 2436),
 ('m', 2414),
 ('y', 2400),
 ('c', 2246),
 ('h', 1993),
 ('g', 1864),
 ('b', 1849),
 ('k', 1753),
 ('f', 1240),
 ('w', 1127),
 ('v', 801),
 ('z', 503),
 ('j', 342),
 ('x', 326),
 ('q', 145)]

In [7]:
import random
# target_word = all_words_list[random.randint(0, len(all_words_list))]
target_word = "argos"
best_opening_words_list = list(best_opening_words)
randomint = random.randint(0, len(best_opening_words_list) - 1) # len will be 1 larger than last index because len counts first as 1 and index counts first as 0
opening_word = best_opening_words_list[randomint]

print(f"Target: {target_word}")
print(f"Opening guess: {opening_word}")

Target: argos
Opening guess: aeros


In [8]:
# ### VERSION 1

# def solve_wordle(opening_guess: str, target_word: str, guesses = 0, verbose = False):
#     """
#     Mimicking the popular web game, this function matches a current word to a target word in the most statistically optimal way possible.

#     -----
#     Parameters:
#     opening_guess: str
#         a five-letter string
#     target_word: str
#         a five-letter string
#     verbose: bool
#         if True, prints progress and explanation of how function solves the puzzle. If False, prints only the guessed word at each guess.

#     -----
#     Returns:
#         None
#     """

#     all_letter_positions = set(i for i in range(0, 5)) # creates a set of 0-4 (for each letter position) -- just to avoid regenerating this each time it's needed
#     next_guess_list = ['_' for i in range(0, 5)] # initializes new list of 5 "_"

#     correct_positions = set()

#     incorrect_positions_words = set()
#     incorrect_letters = set()

#     incorrect_pos_letters = set()
#     incorrect_positions = set()

#     if guesses == 0:

#         if verbose == True:
#             print(f"----------------------------------------------------\n")

#     guesses += 1

#     if verbose == True:
#         print (f"Guess {guesses}:\n\t{opening_guess}\n")
#     else:
#         print (f"Guess {guesses}: {opening_guess}")

#     # evaluating current guess against target and returning correct letters
#     for i in all_letter_positions: # 5 letters in each word (current word and target word)
#         corr_curr_letts_list = ['_' for i in range(0, 5)]
#         incorr_curr_letts_list = ['_' for i in range(0, 5)]
#         incorr_pos_letters_list = ['_' for i in range(0, 5)]
        
#         if opening_guess[i] == target_word[i]: # if letters exist and are in the correct position
#             next_guess_list[i] = opening_guess[i]
#             correct_positions.add(i)            

#         elif opening_guess[i] not in target_word: # if letters do not exist at all in target word
#             incorrect_letters.add(opening_guess[i])
#             incorrect_positions_words.add(i)

#         elif (opening_guess[i] != target_word[i] and  opening_guess[i] in target_word): # if not at that position but is elsewhere in the word
#             incorrect_pos_letters.add(opening_guess[i])
#             incorrect_positions.add(i)

#     if verbose == True:

#         if len(correct_positions) > 0:
#             for pos in correct_positions:
#                 corr_curr_letts_list[pos] = opening_guess[pos]
#                 corr_curr_letts_str = " ".join(corr_curr_letts_list)

#             print (f"Current letters in correct locations:\n\t{corr_curr_letts_str}\n")
        
#         if len(incorrect_positions) > 0:
#             for pos in incorrect_positions:
#                 incorr_pos_letters_list[pos] = opening_guess[pos]
#                 incorr_pos_letts_str = " ".join(incorr_pos_letters_list)

#             print (f"Current correct letters in incorrect locations:\n\t{incorr_pos_letts_str}\n")

#         if len(incorrect_positions_words) > 0:
#             for pos in incorrect_positions_words:
#                 incorr_curr_letts_list[pos] = opening_guess[pos]
#                 incorr_curr_letts_str = " ".join(incorr_curr_letts_list)

#             print (f"Current letters not in target word:\n\t{incorr_curr_letts_str}\n")
#         else:
#             print (f"All current letters are in target word.\n")


#     # if the guess match the target, print a successful response and end function call
#     if opening_guess == target_word:

#         if guesses == 1:
#             print (f"Wordle has been solved in {guesses} guess!")
        
#         else:
#             print (f"Wordle has been solved in {guesses} guesses!")
#             print (f"{opening_guess}")
    
#     # if guess does not match the target, continue calling function until either max guesses is reached or guessed word matches target
#     else:
#         incorrect_positions_letters = all_letter_positions.difference(correct_positions) # works fine

#         # print (incorrect_letters, next_guess_list)

#         if len(incorrect_pos_letters) > 0:
            
#             for new_guess_position in incorrect_positions_letters:
#                 # print(new_guess_position)

#                 for incorr_pos_letter in list(incorrect_pos_letters):

#                     next_guess_list[new_guess_position] = incorr_pos_letter
            
#                     if "_" not in next_guess_list:
#                         next_guess_str = "".join(next_guess_list)
                        
#                         if next_guess_str in all_words:
#                             break

#         # fill empty spots with next best new unused letters
#         else:
            
#             for letter, freq in sorted_letters_counts:

#                 if letter not in (incorrect_letters or next_guess_list):
                    
#                     for new_guess_position in incorrect_positions_letters:
#                         next_guess_list[new_guess_position] = letter

#                 if "_" not in next_guess_list:
#                     next_guess_str = "".join(next_guess_list)
                    
#                     if next_guess_str in all_words:
#                         break

#         if verbose == True:
#             print(f"----------------------------------------------------\n")

#         # recursively call function with new guess until max number of tries is reached
#         if guesses < len(opening_guess):
#             opening_guess = next_guess_str
#             solve_wordle(opening_guess, target_word, guesses, verbose)

#         else:
#             print(f"\nUnfortunately, this Wordle could not be solved in {guesses} guesses.\n\nThe correct word was '{target_word}'.\n\nBetter luck next time.")

#             if verbose == True:
#                 print(f"\n----------------------------------------------------")

# # solve_wordle(opening_word, target_word, verbose = True)
# solve_wordle("aeros", target_word, verbose = False)

In [9]:
# ### VERSION 2

# def solve_wordle(opening_guess: str, target_word: str, guesses = 0, verbose = False):
#     """
#     Mimicking the popular web game, this function matches a current word to a target word in the most statistically optimal way possible.

#     -----
#     Parameters:
#     opening_guess: str
#         a five-letter string
#     target_word: str
#         a five-letter string
#     verbose: bool
#         if True, prints progress and explanation of how function solves the puzzle. If False, prints only the guessed word at each guess.

#     -----
#     Returns:
#         None
#     """

#     all_letter_positions = set(i for i in range(0, 5)) # creates a set of 0-4 (for each letter position) -- just to avoid regenerating this each time it's needed

#     if guesses == 0:

#         if verbose == True:
#             print(f"----------------------------------------------------\n")

#     guesses += 1

#     if verbose == True:
#         print (f"Guess {guesses}:\n\t{opening_guess}\n")
#     else:
#         print (f"Guess {guesses}: {opening_guess}")

#     corr_all_dict = {} # letter == correct and position == correct
#     incorr_letts_set = set() # can be set because position doesn't matter. Letter is not relevant at all
#     corr_letts_incorr_pos_dict = {} # letter == correct and position != correct

#     # evaluating current guess against target and returning correct letters
#     for i in all_letter_positions: # 5 letters in each word (current word and target word)
#         corr_all_dict[opening_guess[i]] = set()
#         corr_letts_incorr_pos_dict[opening_guess[i]] = set()
        
#         if opening_guess[i] == target_word[i]: # letter == correct and position == correct
#             corr_all_dict[opening_guess[i]].add(i)

#         elif opening_guess[i] not in target_word: # if letter is not relevant at all
#             incorr_letts_set.add(opening_guess[i])

#         elif (opening_guess[i] != target_word[i] and  opening_guess[i] in target_word): # letter == correct and position != correct
#             corr_letts_incorr_pos_dict[opening_guess[i]].add(i)

#     print (f"corr_all_dict: {corr_all_dict}")
#     print(f"incorr_letts_set: {incorr_letts_set}")
#     print(f"corr_letts_incorr_pos_dict: {corr_letts_incorr_pos_dict}")

#     next_guess_list = ["_" for i in range(0, 5)] # initializes new list of 5 "_"

#     # filling next guess with current correct letters in correct locations
#     for lett, positions in corr_all_dict.items():
#         if len(positions) > 0:
#             for pos in positions:
#                 next_guess_list[pos] = lett
    
#     # getting locations of all empty positions still in list
#     if "_" in next_guess_list:
        
#         # get empty positions(s)
#         empty_positions = set()
#         for i in next_guess_list:
#             if i == "_":
#                 empty_positions.add(next_guess_list.index(i))

#         # using current letter(s) in incorrect locations to fill empty position(s)
#         for lett, positions in corr_letts_incorr_pos_dict.items():
#             if len(positions) > 0:
#                 for pos in empty_positions.difference(positions):
#                     next_guess_list[pos] = lett

#         # get empty positions again
#         empty_positions = set()
#         for i in next_guess_list:
#             if i == "_":
#                 empty_positions.add(next_guess_list.index(i))                    

#         # using new letter(s) in incorrect locations to fill empty position(s)

#         for i in empty_positions:
#             for lett, freq in sorted_letters_counts:
#                 if lett not in (incorr_letts_set or next_guess_list):
#                     next_guess_list[i] = lett

#                     # check if new word is in wordle list:
#                     next_guess_str = "".join(next_guess_list)
#                     if next_guess_str in all_words:
#                         break

#         print(next_guess_str)       
#         # solve_wordle(next_guess_str, target_word, guesses, verbose)
#         print ("still blanks")
#     else:
#         print ("no more blanks")
#         next_guess_str = "".join(next_guess_list)

#         # recursively call function until max attempts is reached or wordle is solved
#         # solve_wordle(next_guess_str, target_word, guesses, verbose)

# # solve_wordle(opening_word, target_word, verbose = True)
# solve_wordle("aeros", target_word, verbose = False)

In [10]:
### VERSION 3

def solve_wordle(opening_word: str, target_word: str, max_attempts: int = len(opening_word), verbose = False):
    """
    Mimicking the popular web game, this function matches a current word to a target word in the most statistically optimal way possible.

    ------
    Parameters:
    ------
    `opening_word`: str
        a string -- must be the same length as `target_word`
    `target_word`: str
        a string -- must be the same length as `opening_word`
    `max_attempts`: int
        the maximum number of attempts allowed to solve the Wordle
    `verbose`: bool
        if True, prints progress and explanation of how function solves the puzzle. If False, prints only the guessed word at each guess.

    ------
    Returns:
    ------
        None
    """

    incorr_letts_set = set() # can be a set because position doesn't matter (only considers unique letters)
    all_letter_positions = set(i for i in range(0, len(opening_word)))
    guesses = 0
    opening_word_list = [letter for letter in opening_word]

    ### Can set a number of max guesses different than 5
    if max_attempts:
        max_guesses = max_attempts
    else:
        max_guesses = len(opening_word) 

    while opening_word != target_word:
        corr_all_dict = {} # letter == correct and position == correct
        corr_letts_incorr_pos_dict = {} # letter == correct and position != correct

        if guesses == 0:
            if verbose == True:
                print("-----------------------------\n")
        
        guesses += 1
        
        if verbose == True:
            print(f"Guess {guesses}:\n\t{' '.join(opening_word_list)}\n")
        else:
            print(f"Guess {guesses}: {opening_word}")

        ### EVALUATING CURRENT GUESS
        for i in all_letter_positions: # number of letters in each word (current word and target word)
            corr_all_dict[opening_word[i]] = set()
            corr_letts_incorr_pos_dict[opening_word[i]] = set()
            
            if opening_word[i] == target_word[i]: # letter == correct and position == correct
                corr_all_dict[opening_word[i]].add(i)

            elif opening_word[i] not in target_word: # if letter is not relevant at all
                incorr_letts_set.add(opening_word[i])

            elif (opening_word[i] != target_word[i] and  opening_word[i] in target_word): # letter == correct and position != correct
                corr_letts_incorr_pos_dict[opening_word[i]].add(i)

        ### CREATING NEXT WORD GUESS
        next_guess_list = ["_" for i in range(0, 5)] # initializes new list of 5 "_"

        # filling next guess with current correct letters in correct locations
        for lett, positions in corr_all_dict.items():
            if len(positions) > 0:
                for pos in positions:
                    next_guess_list[pos] = lett
        
        # getting locations of all empty positions still in list
        if "_" in next_guess_list:
            
            # get empty positions(s)
            empty_positions = set()
            for i in next_guess_list:
                if i == "_":
                    empty_positions.add(next_guess_list.index(i))

            # using current letter(s) in incorrect locations to fill empty position(s)
            for lett, positions in corr_letts_incorr_pos_dict.items():
                if len(positions) > 0:
                    for pos in empty_positions.difference(positions):
                        next_guess_list[pos] = lett

            # get empty positions again
            empty_positions = set()
            for i in next_guess_list:
                if i == "_":
                    empty_positions.add(next_guess_list.index(i)) 

            # using new letter(s) in incorrect locations to fill empty position(s)
            for i in empty_positions:
                for lett, freq in sorted_letters_counts:
                    if lett not in (incorr_letts_set or next_guess_list):
                        next_guess_list[i] = lett

                        # check if new word is in wordle list:
                        opening_word = "".join(next_guess_list)
                                                
                        if opening_word in all_words:
                            break

        if verbose == True:
            correct_positions = ["_" for i in range(0, len(opening_word))]
            for lett, positions in corr_all_dict.items():
                if len(positions) > 0:
                    for pos in positions:
                        correct_positions[pos] = lett

            incorrect_positions = ["_" for i in range(0, len(opening_word))]
            for lett, positions in corr_letts_incorr_pos_dict.items():
                if len(positions) > 0:
                    for pos in positions:
                        incorrect_positions[pos] = lett

        if opening_word == target_word:
            print(f"Congratulations! The Wordle has been solved in {guesses} guesses!")
            print(f"There were still {max_attempts - guesses} guesses remaining.")
            # print(f"Here are some statistics about this Wordle:") # come up with some stats about it
            print("\n-----------------------------")
            break
        else:
            if verbose == True:
                print (f"Correct letters in correct positions:\n\t{' '.join(correct_positions)}\n")
                print(f"Correct letters in incorrect positions:\n\t{' '.join(incorrect_positions)}\n")
                print(f"All incorrect letters:\n\t{', '.join(letter for letter in incorr_letts_set)}\n")

        # condition to break the loop
        if guesses == max_guesses:
            if verbose == True:
                print("-----------------------------\n")
                print(f"Unfortunately, the Wordle could not be solved in {max_attempts} guesses.\n")
                print(f"The target word was '{target_word}'. Better luck next time!\n")
                print("-----------------------------\n")
            else:
                print(f"\nUnfortunately, the Wordle could not be solved in {max_attempts} guesses.")
                print(f"The target word was '{target_word}'. Better luck next time!\n")
            break
        
        opening_word_list = [letter for letter in opening_word]

        if verbose == True:
            print (f"Next guess:\n\t{' '.join(opening_word_list)}\n")
            print("-----------------------------\n")



opening_word = "aeros"
target_word = "argos"

solve_wordle(opening_word, target_word, verbose = True)

-----------------------------

Guess 1:
	a e r o s

Correct letters in correct positions:
	a _ _ o s

Correct letters in incorrect positions:
	_ _ r _ _

All incorrect letters:
	e

Next guess:
	a r c o s

-----------------------------

Guess 2:
	a r c o s

Correct letters in correct positions:
	a r _ o s

Correct letters in incorrect positions:
	_ _ _ _ _

All incorrect letters:
	c, e

Next guess:
	a r v o s

-----------------------------

Guess 3:
	a r v o s

Correct letters in correct positions:
	a r _ o s

Correct letters in incorrect positions:
	_ _ _ _ _

All incorrect letters:
	v, c, e

Next guess:
	a r q o s

-----------------------------

Guess 4:
	a r q o s

Correct letters in correct positions:
	a r _ o s

Correct letters in incorrect positions:
	_ _ _ _ _

All incorrect letters:
	v, q, c, e

Next guess:
	a r x o s

-----------------------------

Guess 5:
	a r x o s

Correct letters in correct positions:
	a r _ o s

Correct letters in incorrect positions:
	_ _ _ _ _

All inco

In [11]:
sorted_letters_counts

[('e', 7455),
 ('s', 7319),
 ('a', 7128),
 ('o', 5212),
 ('r', 4714),
 ('i', 4381),
 ('l', 3780),
 ('t', 3707),
 ('n', 3478),
 ('u', 2927),
 ('d', 2735),
 ('p', 2436),
 ('m', 2414),
 ('y', 2400),
 ('c', 2246),
 ('h', 1993),
 ('g', 1864),
 ('b', 1849),
 ('k', 1753),
 ('f', 1240),
 ('w', 1127),
 ('v', 801),
 ('z', 503),
 ('j', 342),
 ('x', 326),
 ('q', 145)]