In [131]:
import itertools
import math
import numpy as np
import json
import matplotlib.pyplot as plt
import os

**WORDLE BEST GUESS CALCULATOR**

Step 1: Create functions that simulate the wordle game

- checked checks whether the position of the letter that is being checked in the answer word has been considered before. (prevents multiple green for one letter)
- iterates through the 5 letter word guessed one letter at a time comparing them to the answer
- returns an output string representing the word

In [22]:
def find(my_list, value):
    for index, element in enumerate(my_list):
        if element == value:
            return index
    return -1

In [154]:
def check_word(guess, answer):
    result = ['N'] * len(guess)  # Default all to 'N'
    answer_list = list(answer)
    
    # First pass: Check for correct positions (greens)
    for i, letter in enumerate(guess):
        if letter == answer_list[i]:
            result[i] = 'G'
            answer_list[i] = None  # Mark this letter as used
    
    # Second pass: Check for correct letters in wrong positions (yellows)
    for i, letter in enumerate(guess):
        if result[i] == 'N' and letter in answer_list:
            result[i] = 'Y'
            answer_list[answer_list.index(letter)] = None  # Mark this letter as used
    
    return "".join(result)

In [70]:
def create_hash_map():
    hash_map = {}
    for i in wordlist:
        for j in answerlist:
            hash_map[i + ", " + j] = check_word(i, j)
    return hash_map

Preferably dont run this command, it provides a significant computational speedup but takes up 700 MB of memory

In [72]:
word_map = create_hash_map()

- wordfile : file containing list of all valid words in a wordle game
- wordlist : wordfile content unpacked into a python list
- answerfile : file containing list of all answer words in a wordle game
- answerlist : answerfile content unpacked into a python list

In [117]:
with open(r'wordlist.txt', 'r') as wordfile:
    wordlist = wordfile.read().split()
len(wordlist)

14855

In [119]:
with open(r'solutionlist.txt', 'r') as answerfile:
    answerlist = answerfile.read().split()
len(answerlist)

2309

- <b> generate_permutations </b> : creates a list of all permutations of the specified characters
- <b> permutations </b> : list of all permutations possible in a wordle game
- <b> permutations_dict </b> : used for calculating probabilities of each permutation occuring for a specific word

In [122]:
def generate_permutations(characters):
    permutations = [''.join(p) for p in itertools.product(characters, repeat=5)]
    return permutations

In [124]:
permutations = generate_permutations(['G', 'N', 'Y']);

In [126]:
permutation_dict = {perm : 0 for perm in permutations}

<b> new_list </b> : used to update the list of words to a newer list based upon the result of a guess

In [129]:
def new_list(guess, permutation, list):
    newlist = []
    for answer in list:
        if check_word(guess, answer) == permutation:
            newlist.append(answer)
    return newlist

<b> find_probability_dict </b> : creates a dictionary of probabilities of each permutation for a specified guess

In [132]:
def find_probability_dict(guess, answerlist):
    probability_dict = {}
    for answer in answerlist:
        perm = word_map[guess + ", " + answer]
        probability_dict[perm] = probability_dict.get(perm, 0) + (1 / len(answerlist))
    return probability_dict

<b> calculate_entropy </b> : calculates the entropy for a word based on the probability dictionary generated by find_probability_dict

In [135]:
def calculate_entropy(probability_dict):
    entropy = 0
    for probability in probability_dict.values():
        entropy += -1 * (probability * math.log(probability, 2))
    return entropy

In [137]:
#Creating a copy to not alter any information within wordlist and make it easier to experiment with
remaining_words_list = wordlist.copy()

<b> entropy_dict</b> : A dictionary containing all words and the amount of information gained from each word

In [130]:
#Calculate the entropy for each word in the list
entropy_dict = {}
for word in remaining_words_list:
            probdict = find_probability_dict(word, answerlist)
            entropy = calculate_entropy(probdict)
            entropy_dict[word] = entropy

<b> sorted_entropy_dict</b> : Sorted version of the entropy_dict 

In [134]:
#Sort the dictionary based on entropy values
sorted_entropy_dict = dict(sorted(entropy_dict.items(), key = lambda item: item[1], reverse = True))

In [None]:
entropy_dict_two_steps : Performs a second 

In [102]:
item_list = list(sorted_entropy_dict.items())
entropy_dict_two_steps = sorted_entropy_dict.copy()
word_list_copy = wordlist.copy()
answer_list_copy = answerlist.copy()

for i in range(0, 200):
    prob_dict1 = find_probability_dict(itemlist[i][0], answerlist)
    entropy_dictionary = permutation_dict.copy()
    for j in entropy_dictionary.keys():
        remaining_words_list = new_list(itemlist[i][0], j, answer_list_copy)
        max_entropy = 0
        for word in word_list_copy:
            prob_dict2 = find_probability_dict(word, remaining_words_list)
            entropy = calculate_entropy(prob_dict2)
            max_entropy = max(max_entropy, entropy)
        entropy_dictionary[j] += max_entropy * prob_dict1.get(j, 0)

    avg_entropy = sum(list(entropy_dictionary.values()))
    print("WORD : " + itemlist[i][0] + " ENTROPY : " + str(avg_entropy))
    entropy_dict_two_steps[itemlist[i][0]] += avg_entropy
    

WORD : tarse ENTROPY : 4.1068145555471665
WORD : tiare ENTROPY : 4.076608858095578
WORD : soare ENTROPY : 4.107623263495029
WORD : roate ENTROPY : 4.110602875896228
WORD : raise ENTROPY : 4.076821629156655
WORD : reast ENTROPY : 4.151920545448849
WORD : raile ENTROPY : 4.125283838598162
WORD : slate ENTROPY : 4.183570042068703
WORD : salet ENTROPY : 4.182314795562495
WORD : crate ENTROPY : 4.179291376808235
WORD : irate ENTROPY : 4.134813176196052
WORD : trace ENTROPY : 4.187590457766297
WORD : sater ENTROPY : 4.1853758617617425
WORD : arise ENTROPY : 4.115017193445619
WORD : orate ENTROPY : 4.137798593467071
WORD : stare ENTROPY : 4.182436100533191
WORD : carte ENTROPY : 4.204828670076519
WORD : raine ENTROPY : 4.187660639047795
WORD : ranse ENTROPY : 4.214081872134109
WORD : caret ENTROPY : 4.205392633582906
WORD : ariel ENTROPY : 4.1607654580817455
WORD : taler ENTROPY : 4.160382068927504
WORD : carle ENTROPY : 4.24078259902201
WORD : slane ENTROPY : 4.268327443048271
WORD : snare E

In [31]:
sorted_entropy_dict_two_steps = dict(sorted(entropy_dict_two_steps.items(), key = lambda item: item[1], reverse = True))
print(list(sorted_entropy_dict_two_steps.items())[0:200])

[('tarse', 10.055789065502687), ('slate', 10.039389286178217), ('slane', 10.037284764807037), ('reast', 10.01965856629241), ('salet', 10.018337577654979), ('trace', 10.018019565846053), ('crate', 10.014507359441517), ('carle', 10.010147005753758), ('sater', 10.00980946500571), ('tiare', 10.006563080409087), ('roast', 10.004724796339833), ('torse', 10.001672954831172), ('carte', 9.9999360049904), ('carse', 9.998114992994116), ('toile', 9.997879201173951), ('trone', 9.99586073534721), ('roate', 9.995459189628235), ('soare', 9.992826007787786), ('ranse', 9.992385221882884), ('raile', 9.99043766763943), ('stare', 9.98932472278296), ('caret', 9.983599569512458), ('crane', 9.979400410252289), ('least', 9.977343724283344), ('stale', 9.977315314563764), ('carne', 9.975176739142272), ('slart', 9.975025985194538), ('raine', 9.973853994838336), ('sacre', 9.973526570852595), ('snare', 9.97087851090006), ('trice', 9.970364214037517), ('liane', 9.970170539739147), ('stole', 9.969438516031188), ('ria

In [70]:
def get_next_guess(remaining_words_list, answer_list):
    max = 0
    maxword = "trace"
    permutation = input("Enter the result of the guess: ")
    answer_list = find_word(maxword, permutation, answer_list)
    while permutation != "GGGGG" :
        if len(answer_list) == 1:
            return answer_list[0]
        for word in remaining_words_list:
            probdict = find_probability_dict(word, answer_list)
            entropy = calculate_entropy(probdict)
            if entropy > max:
                max = entropy
                maxword = word
        print("MAXWORD : ", maxword)
        print(len(remaining_words_list))
        print(len(answer_list))
        permutation = input("Enter the result of the guess: ")
        answer_list = find_word(maxword, permutation, answer_list)
        maxword = answer_list[0]
        max = 0

In [69]:
def get_next_guess(remaining_words_list, answer_list, starting_word):
    max_entropy = 0
    maxword = starting_word
    original_answer_list = answer_list.copy()
    print("Word to guess :", maxword)
    result = input("Enter the resulting colors :")
    answer_list = new_list(maxword, result , answer_list)
    counter = 1
    while result != "GGGGG" :
        if len(answer_list) == 1:
            return answer
        if len(answer_list) >= 4:
            maxword = next_guess_initial_step(remaining_words_list, answer_list)
        else:
            print(answer_list)
            maxword = next_guess_final_step(remaining_words_list, answer_list, [])
        max_entropy = 0
        counter += 1
        print("Word to guess :", maxword)
        result = input("Enter the resulting colors :")
        answer_list = new_list(maxword, result, answer_list)
        answer = answer_list[0]
    return answer, counter

In [96]:
def next_guess_initial_step(remaining_words_list, answer_list):
    max1 = 0
    maxword = ""
    for word in remaining_words_list:
            probdict = find_probability_dict(word, answer_list)
            entropy = calculate_entropy(probdict)
            if entropy > max1:
                max1 = entropy
                maxword = word
    return maxword

In [144]:
def next_guess_final_step(remaining_words_list, answer_list, original_answer_list):
    max1 = 0
    maxword = ""
    for word in remaining_words_list:
            probdict = find_probability_dict(word, answer_list)
            entropy = calculate_entropy(probdict)
            if word not in answer_list:
                entropy *= 0.3
            if entropy > max1:
                max1 = entropy
                maxword = word
    return maxword

In [132]:
def get_next_guess_automated(remaining_words_list, answer_list, answer, starting_word):
    max = 0
    maxword = starting_word
    original_answer_list = answer_list.copy()
    answer_list = new_list(maxword, check_word(maxword, answer) , answer_list)
    counter = 1
    guess_list = []
    guess_list.append(starting_word)
    while check_word(maxword, answer) != "GGGGG" :
        if len(answer_list) == 1:
            guess_list.append(answer)
            return counter + 1, guess_list
        if len(answer_list) >= 4:
            maxword = next_guess_initial_step(remaining_words_list, answer_list)
        else:
            maxword = next_guess_final_step(remaining_words_list, answer_list, original_answer_list)
        guess_list.append(maxword)
        answer_list = new_list(maxword, check_word(maxword, answer), answer_list)
        answer = answer_list[0]
        max = 0
        counter += 1
        
    return counter, guess_list

In [27]:
def store_word_counts(wordlist, answerlist, starting_word):
    hash_map = {}
    for answer in answerlist:
        counter = get_next_guess_automated(wordlist, answerlist, answer, starting_word)
        hash_map[answer] = counter
    return hash_map

In [29]:
def get_word_counts(wordlist, answerlist, starting_word):
    counter = 0
    for answer in answerlist:
        counter += get_next_guess_automated(wordlist, answerlist, answer, starting_word)
    return (counter/ len(answerlist))

In [80]:
def get_word_counts_dictionary(wordlist, answerlist, starting_word):
    word_counts_dictionary = {}
    for answer in answerlist:
        count, guess_list = get_next_guess_automated(wordlist, answerlist, answer, starting_word)
        word_counts_dictionary[answer] = {"count": count, "guesses" : guess_list}
    return (word_counts_dictionary)

Additional: creating an extension to solve wordle puzzles real time.

In [None]:
for i in ["tarse", "salet", "slate", "reast", "trace", "crate"]:
    word_counts_dictionary = get_word_counts_dictionary(wordlist, answerlist, i)
    with open(i + "v2.json", "w") as file:
        json.dump(word_counts_dictionary, file)

In [None]:
for i in ["tarse", "salet", "slate", "reast", "trace", "crate"]:
    word_counts_dictionary = get_word_counts_dictionary(wordlist, answerlist, i)
    with open(i + "v2.json", "w") as file:
        json.dump(word_counts_dictionary, file)

In [83]:
for i in ["tarse", "salet", "slate", "reast", "trace", "crate"]:
    with open("datasetsv2/" + i +"v2.json", "r") as file:
        test_dict = json.load(file)
        no_of_steps = 0
        for j in test_dict.keys():
            no_of_steps += test_dict[j]["count"]
        no_of_steps /= len(test_dict.keys())
        print(i , " : " ,no_of_steps)

tarse  :  3.432221741013426
salet  :  3.440450411433521
slate  :  3.426158510177566
reast  :  3.4274577739281074
trace  :  3.4854915547856216
crate  :  3.4928540493720224


Least average number of guesses is currently "SLATE" with 3.426158510177566 guesses on average.

According to Johnathan Olsen and 3Blue1Brown, the optimal answer is supposed to be "SALET" with 3.42 implying a minor bug in the code.

In [86]:
temp_lists = []
for i in ["tarse", "salet", "slate", "reast", "trace", "crate"]:
    with open("datasetsv2/" + i +"v2.json", "r") as file:
        temp_list = []
        test_dict = json.load(file)
        for j in test_dict.keys():
            for word in test_dict[j]["guesses"]:
                if word not in temp_list:
                    temp_list.append(word)
    temp_lists.append(temp_list)

In [94]:
for i,j in zip(temp_lists, ["tarse", "salet", "slate", "reast", "trace", "crate"]):
    with open("usedwords/" + j +"v2.json", "w") as file:
        json.dump(i, file)

In [100]:
for i in temp_lists[2]:
    temp_dict = {}
    for j in answerlist:
        temp_dict[j] = check_word(i, j)
    with open("check_word/" + i + "_check_word.json", "w") as file:
        json.dump(temp_dict, file)

In [135]:
for i in slate_dict.keys():
    for word in slate_dict[i]["guesses"]:
        file_path = f"check_word_shortened/{word}_check_word.json"

        # Load existing data or start fresh
        if os.path.exists(file_path):
            with open(file_path, "r") as file:
                try:
                    temp_dict = json.load(file)
                except json.JSONDecodeError:
                    temp_dict = {}
        else:
            temp_dict = {}

        # Update dictionary
        temp_dict[i] = check_word(word, i)

        # Save updated dictionary
        with open(file_path, "w") as file:
            json.dump(temp_dict, file, indent=2)


In [None]:
word = "cigar"
with open("datasetsv2/" + "slate" +"v2.json", "r") as file:
        test_dict = json.load(file)
        guess = "slate"
        while check_word(guess, word) != "GGGGG":
            
            

Dump the notebook data into a db file to make it easier to continue progress. <br>
Kept it at the bottom because I dont want to accidentally run these.

In [106]:
%who

In	 Out	 a	 answer	 answer_list_copy	 answerfile	 answerlist	 avg_entropy	 b	 
build_permutation_tree	 calculate_entropy	 check_word	 counter	 create_hash_map	 dataframe_columns	 dataframe_hash	 dict1	 dill	 
dtypes_str	 entropy	 entropy_dict	 entropy_dict_two_steps	 entropy_dictionary	 file	 find	 find_probability_dict	 find_word	 
for_loop	 generate_permutations	 get_dataframes	 get_next_guess	 get_next_guess_automated	 get_word_counts	 getpass	 hashlib	 i	 
ifPermutation	 import_pandas_safely	 is_data_frame	 item_list	 itemlist	 itertools	 j	 json	 math	 
max_entropy	 maxword	 new_list	 next_guess_final_step	 next_guess_initial_step	 next_word	 no_of_steps	 np	 number_of_steps_list	 
open	 perm	 perm_dict	 perm_level2	 permutation_dict	 permutation_dictionary	 permutations	 plt	 prob_dict1	 
prob_dict2	 probability_dictionary_dict	 probdict	 remaining_words_list	 sorted_entropy_dict	 sorted_entropy_dict_two_steps	 temp_dict	 temp_list	 temp_lists	 
test_dict	 word	 word_list_copy	 w

Final thoughts: i can definitely guess all the words. 

eg : if perm is NNYNN then the next thing to run is always morin

instead of current dict:
create a dict where NNYNN : morin
etc


In [3]:
import dill
dill.load_session('notebook_env.db')

In [145]:
dill.dump_session("notebook_env.db")