In [1]:
import urllib.request # for downloading wordle words from source
import numpy as np # for stats
import random # for randomly generating target and start words
import operator # for sorting letter frequency distribution
import time # for #dramaticeffect
import pandas as pd
# from collections import defaultdict # for letter frequency distribution

## Making Datasets
- Get all possible words that the target word could be
- For each word in the target words list, get counts of each letter to create letter distribution across entire vocabulary

### `alt_words_1` dataset
- len() == 14855

In [2]:
### If getting words from local file -- should be 14855 words in total

alt_words_1 = set() # set of all words

file_path = "data/alt_words_1.txt" # taken from "https://raw.githubusercontent.com/tabatkins/wordle-list/main/words"
f = open(file_path, "r", encoding = "utf-8")

with open(file_path, "r", encoding = "utf-8") as f:
    for word in f.read().split("\n"):
        if len(word) > 0: # there's one blank entry at the start
            alt_words_1.add(word)

f.close() # closes connection to file

print(len(alt_words_1))
alt_words_1 = list(alt_words_1)
alt_words_1[:10]

14855


['salps',
 'poohs',
 'oases',
 'skody',
 'sehri',
 'clote',
 'golps',
 'slebs',
 'avant',
 'beigy']

### `official_words` list dataset
- len() == 2309

In [3]:
official_words = set() # set of all words

file_path = "data/official_words.txt"
f = open(file_path, "r", encoding = "utf-8")

with open(file_path, "r", encoding = "utf-8") as f:
    for line in f.read().split("\n"):
        word = line.split(" ")[-1]
        if (len(word) != 5 or word.isalpha() == False):
            pass
        else:
            official_words.add(word.lower())

f.close() # closes connection to file

for word in official_words:
    if len(word) != 5:
        print (word)

official_words = list(set(official_words))
print(len(official_words))
official_words[:10]

2309


['comic',
 'bosom',
 'pluck',
 'spelt',
 'merit',
 'silky',
 'state',
 'queue',
 'grime',
 'trick']

### `get_letter_counts()` function

In [4]:
### Gets most common letters of all words of the dataset

def get_letter_counts(letters: str, word_list: list, sort: str = "descending"):
    """
    Given a passed str of letters and a list of words, produces a frequency distribution of all letters
    
    ------
    Parameters:
    ------
    `letters`: str
        a string of letters to be counted. String must only be desired letters, with no spaces. Default is local variable containing all letters of the English alphabet
    `word_list`: list
        list of words (str) from which word frequencies will be counted
    `sort`: str
        if either "descending" or "ascending" are passed, returned list of tuples will be sorted accoringly, else returned dictionary will be unsorted

    ------
    Returns:
    ------
    `letters_counts_dict`: dict
        dictionary of {letter : count} pairs for each letter in passed `letters` sequence
    `sorted_counts_dicts`: list of tuples
        list of tuples. Format is ("letter", frequency). Ordered according to `sort` values
    """

    words_counts_dict = {}

    for word in word_list: # real dataset
        word_dict = {}
 
        for letter in word:
            if letter in word_dict:
                word_dict[letter] += 1
            else:
                word_dict[letter] = 1
        words_counts_dict[word] = word_dict

    letters_counts_dict = {}

    for letter in letters:
        letters_counts_dict[letter] = 0

    for word, count_dict in words_counts_dict.items():
        # print (word, count_dict)
        for letter, count in count_dict.items():
            letters_counts_dict[letter] += count

    if sort == "ascending":
        sorted_counts_dict = (sorted(letters_counts_dict.items(), key = operator.itemgetter(1), reverse = False))
        return sorted_counts_dicts

    if sort == "descending":
        sorted_counts_dict = sorted(letters_counts_dict.items(), key = operator.itemgetter(1), reverse = True)
        return sorted_counts_dict
    else:
        return letters_counts_dict

In [5]:
english_alphabet = "abcdefghijklmnopqrstuvwxyz"

alt_sorted_counts = get_letter_counts(english_alphabet, alt_words_1, sort = "descending")
alt_sorted_counts

[('e', 7455),
 ('s', 7319),
 ('a', 7128),
 ('o', 5212),
 ('r', 4714),
 ('i', 4381),
 ('l', 3780),
 ('t', 3707),
 ('n', 3478),
 ('u', 2927),
 ('d', 2735),
 ('p', 2436),
 ('m', 2414),
 ('y', 2400),
 ('c', 2246),
 ('h', 1993),
 ('g', 1864),
 ('b', 1849),
 ('k', 1753),
 ('f', 1240),
 ('w', 1127),
 ('v', 801),
 ('z', 503),
 ('j', 342),
 ('x', 326),
 ('q', 145)]

In [6]:
english_alphabet = "abcdefghijklmnopqrstuvwxyz"

official_sorted_counts = get_letter_counts(english_alphabet, official_words, sort = "descending")
official_sorted_counts

[('e', 1230),
 ('a', 975),
 ('r', 897),
 ('o', 753),
 ('t', 729),
 ('l', 716),
 ('i', 670),
 ('s', 668),
 ('n', 573),
 ('c', 475),
 ('u', 466),
 ('y', 424),
 ('d', 393),
 ('h', 387),
 ('p', 365),
 ('m', 316),
 ('g', 310),
 ('b', 280),
 ('f', 229),
 ('k', 210),
 ('w', 194),
 ('v', 152),
 ('z', 40),
 ('x', 37),
 ('q', 29),
 ('j', 27)]

### `best_guess_words()` function
- given a passed `word_list`, return the best possible words to guess in Wordle

In [7]:
#### RATE WORDS INDIVIDUALLY

# def rate_word(word: str, word_list: list):
#     """
#     Given a passed word and a list of words, calculates a rating of how
    
#     """
#     for word in best_words:
#         ratings_dict = {}
#         for letter in word:
#             for freq_letter, freq in sorted_counts:
#                 if letter == freq_letter:
#                     ratings_dict[letter] = freq
        
#         total_rating = 0
#         for letter, rating in ratings_dict.items():
#             total_rating += rating
        
#         word_ratings.append((word, round(total_rating / all_letters_count * 100, 2)))

#     word_ratings = sorted(word_ratings, key = operator.itemgetter(1), reverse = True)

In [231]:
### Best first guesses for a given Wordle list

def best_guess_words(word_list: list, show_letters: bool = False):
    """
    Given a passed list of English words of a consistent length, calculates the most statistically optimal first guess words, alongside a rating for each word. 
    
    Rating = sum(frequency of each unique letter in that word) / sum (all unique letter frequencies in word_list) * 100, rounded to 2 decimals.

    ------
    Parameters:
    ------
    `word_list`: list
        list of words (str) of consistent length
    `show_letters`: bool
        if True, also prints set of most optimal letters to guess

    ------
    Returns:
    ------
    `word_ratings`: list
        list of tuples. Format is [(word, rating)], where rating is calculated according to above formula
    `sorted_counts`: list of tuples
        list of tuples. Format is ("letter", frequency). Sorted according to `sort` value; ["descending" or "ascending"] if passed
    """
        
    english_alphabet = "abcdefghijklmnopqrstuvwxyz"

    sorted_counts = get_letter_counts(english_alphabet, word_list, sort = "descending")

    max_len_possible = len(word_list[0])

    ### Get words with the highest letter diversity
    while max_len_possible:

        best_letters = set()
        best_words = []

        for letter, freq in sorted_counts:
            best_letters.add(letter)
            if len(best_letters) == max_len_possible:
                break

        ### Get all words that have one of each of the 5 top most frequent letters
        for word in word_list:
            word_set = set()

            for letter in word:
                word_set.add(letter)

            if best_letters.issubset(word_set):
                best_words.append(word)

        if len(best_words) > 0:
            break
        else:
            max_len_possible -= 1 # only try the top 4 letters, then 3, then 2, ...
        
        if max_len_possible == 0:
            break

    all_letters_count = 0
    for letter, freq in sorted_counts:
        all_letters_count += freq

    word_ratings = []
    for word in best_words:
        ratings_dict = {}
        for letter in word:
            for freq_letter, freq in sorted_counts:
                if letter == freq_letter:
                    ratings_dict[letter] = freq
        
        total_rating = 0
        for letter, rating in ratings_dict.items():
            total_rating += rating
        
        word_ratings.append((word, round(total_rating / all_letters_count * 100, 2)))

    word_ratings = sorted(word_ratings, key = operator.itemgetter(1), reverse = True)

    if show_letters == True:
        return word_ratings, sorted_counts
    else:
        return word_ratings

In [232]:
# best_guess_words(alt_words_1, show_letters = True)
best_guess_words(official_words, show_letters = False)

[('arose', 39.18), ('adore', 36.8), ('opera', 36.55)]

In [239]:
def get_word_entropy(words_to_rate: list, all_words: list, ascending: bool = False):
    """
    Given a word and a word list, calculates entropy each word as a measure of its impact to the next possible guesses in Wordle, ordered according to `reverse` parameter.
    
    ------
    Parameters:
    ------
    `words_to_rate`: list
        list of strings to be rated
    `all_words`: list
        list of all possible words (str) of consistent length, to which each word in `words_to_rate` will be compared
    `ascending`: bool
        if True, returns list ordered ascending. If False, returns list in descending order

    ------
    Returns:
    ------
    `word_ratings`: list
        list of tuples. Format is [(word, rating)], where rating is calculated according to above formula
    `sorted_counts`: list of tuples
        list of tuples. Format is ("letter", frequency). Sorted according to `sort` value; ["descending" or "ascending"] if passed
    """

    if ascending == True:
        sorted_counts = get_letter_counts(english_alphabet, all_words, sort = "ascending")
    else:
        sorted_counts = get_letter_counts(english_alphabet, all_words, sort = "descending")

    all_letters_count = 0
    for letter, freq in sorted_counts:
        all_letters_count += freq

    word_ratings = []
    for word in words_to_rate:
        word = word.lower()
        ratings_dict = {}
        for letter in word:
            for freq_letter, freq in sorted_counts:
                if letter == freq_letter:
                    ratings_dict[letter] = freq
        
        total_rating = 0
        for letter, rating in ratings_dict.items():
            total_rating += rating
        
        word_ratings.append((word, round(total_rating / all_letters_count * 100, 2)))

    word_ratings = sorted(word_ratings, key = operator.itemgetter(1), reverse = True)

    return word_ratings


In [238]:
get_word_entropy(['tests'], official_words)

[('tests', 22.75)]

In [10]:
def count_vows_cons(word: str, y_vow = True):
    """
    Given a passed word, calculate the number of non-unique vowels and consonants in the word (duplicates counted more than once).
    
    ------
    Parameters:
    ------
    `word`: str
        a single passed word (str)
    `y_vow`: bool
        if True, "y" is considered a vowel. If False, "y" considered a consonant. Default is True

    ------
    Returns:
    ------
    `counts`: dict
        dictionary, where format is {letter type : count}
    """

    word = word.lower() # for consistency

    if y_vow == True:
        vows = "aeiouy"
        cons = "bcdfghjklmnpqrstvwxz"
    elif y_vow == False:
        vows = "aeiou"
        cons = "bcdfghjklmnpqrstvwxyz"

    counts = {}
    counts["vows"] = 0
    counts["cons"] = 0
    for letter in word:
        if letter in vows:
            counts["vows"] += 1
        if letter in cons:
            counts["cons"] += 1

    return counts

In [72]:
count_vows_cons("douce", y_vow = False)

{'vows': 3, 'cons': 2}

## Solving Wordle Functions

### broken function versions

In [12]:
# ### VERSION 1

# def solve_wordle(opening_guess: str, target_word: str, guesses = 0, verbose = False):
#     """
#     Mimicking the popular web game, this function matches a current word to a target word in the most statistically optimal way possible.

#     -----
#     Parameters:
#     opening_guess: str
#         a five-letter string
#     target_word: str
#         a five-letter string
#     verbose: bool
#         if True, prints progress and explanation of how function solves the puzzle. If False, prints only the guessed word at each guess.

#     -----
#     Returns:
#         None
#     """

#     all_letter_positions = set(i for i in range(0, 5)) # creates a set of 0-4 (for each letter position) -- just to avoid regenerating this each time it's needed
#     next_guess_list = ['_' for i in range(0, 5)] # initializes new list of 5 "_"

#     correct_positions = set()

#     incorrect_positions_words = set()
#     incorrect_letters = set()

#     incorrect_pos_letters = set()
#     incorrect_positions = set()

#     if guesses == 0:

#         if verbose == True:
#             print(f"----------------------------------------------------\n")

#     guesses += 1

#     if verbose == True:
#         print (f"Guess {guesses}:\n\t{opening_guess}\n")
#     else:
#         print (f"Guess {guesses}: {opening_guess}")

#     # evaluating current guess against target and returning correct letters
#     for i in all_letter_positions: # 5 letters in each word (current word and target word)
#         corr_curr_letts_list = ['_' for i in range(0, 5)]
#         incorr_curr_letts_list = ['_' for i in range(0, 5)]
#         incorr_pos_letters_list = ['_' for i in range(0, 5)]
        
#         if opening_guess[i] == target_word[i]: # if letters exist and are in the correct position
#             next_guess_list[i] = opening_guess[i]
#             correct_positions.add(i)            

#         elif opening_guess[i] not in target_word: # if letters do not exist at all in target word
#             incorrect_letters.add(opening_guess[i])
#             incorrect_positions_words.add(i)

#         elif (opening_guess[i] != target_word[i] and  opening_guess[i] in target_word): # if not at that position but is elsewhere in the word
#             incorrect_pos_letters.add(opening_guess[i])
#             incorrect_positions.add(i)

#     if verbose == True:

#         if len(correct_positions) > 0:
#             for pos in correct_positions:
#                 corr_curr_letts_list[pos] = opening_guess[pos]
#                 corr_curr_letts_str = " ".join(corr_curr_letts_list)

#             print (f"Current letters in correct locations:\n\t{corr_curr_letts_str}\n")
        
#         if len(incorrect_positions) > 0:
#             for pos in incorrect_positions:
#                 incorr_pos_letters_list[pos] = opening_guess[pos]
#                 incorr_pos_letts_str = " ".join(incorr_pos_letters_list)

#             print (f"Current correct letters in incorrect locations:\n\t{incorr_pos_letts_str}\n")

#         if len(incorrect_positions_words) > 0:
#             for pos in incorrect_positions_words:
#                 incorr_curr_letts_list[pos] = opening_guess[pos]
#                 incorr_curr_letts_str = " ".join(incorr_curr_letts_list)

#             print (f"Current letters not in target word:\n\t{incorr_curr_letts_str}\n")
#         else:
#             print (f"All current letters are in target word.\n")


#     # if the guess match the target, print a successful response and end function call
#     if opening_guess == target_word:

#         if guesses == 1:
#             print (f"Wordle has been solved in {guesses} guess!")
        
#         else:
#             print (f"Wordle has been solved in {guesses} guesses!")
#             print (f"{opening_guess}")
    
#     # if guess does not match the target, continue calling function until either max guesses is reached or guessed word matches target
#     else:
#         incorrect_positions_letters = all_letter_positions.difference(correct_positions) # works fine

#         # print (incorrect_letters, next_guess_list)

#         if len(incorrect_pos_letters) > 0:
            
#             for new_guess_position in incorrect_positions_letters:
#                 # print(new_guess_position)

#                 for incorr_pos_letter in list(incorrect_pos_letters):

#                     next_guess_list[new_guess_position] = incorr_pos_letter
            
#                     if "_" not in next_guess_list:
#                         next_guess_str = "".join(next_guess_list)
                        
#                         if next_guess_str in all_words:
#                             break

#         # fill empty spots with next best new unused letters
#         else:
            
#             for letter, freq in sorted_letters_counts:

#                 if letter not in (incorrect_letters or next_guess_list):
                    
#                     for new_guess_position in incorrect_positions_letters:
#                         next_guess_list[new_guess_position] = letter

#                 if "_" not in next_guess_list:
#                     next_guess_str = "".join(next_guess_list)
                    
#                     if next_guess_str in all_words:
#                         break

#         if verbose == True:
#             print(f"----------------------------------------------------\n")

#         # recursively call function with new guess until max number of tries is reached
#         if guesses < len(opening_guess):
#             opening_guess = next_guess_str
#             solve_wordle(opening_guess, target_word, guesses, verbose)

#         else:
#             print(f"\nUnfortunately, this Wordle could not be solved in {guesses} guesses.\n\nThe correct word was '{target_word}'.\n\nBetter luck next time.")

#             if verbose == True:
#                 print(f"\n----------------------------------------------------")

# # solve_wordle(opening_word, target_word, verbose = True)
# solve_wordle("fried", target_word, verbose = True)

In [13]:
# ### VERSION 2

# def solve_wordle(opening_guess: str, target_word: str, guesses = 0, verbose = False):
#     """
#     Mimicking the popular web game, this function matches a current word to a target word in the most statistically optimal way possible.

#     -----
#     Parameters:
#     opening_guess: str
#         a five-letter string
#     target_word: str
#         a five-letter string
#     verbose: bool
#         if True, prints progress and explanation of how function solves the puzzle. If False, prints only the guessed word at each guess.

#     -----
#     Returns:
#         None
#     """

#     all_letter_positions = set(i for i in range(0, 5)) # creates a set of 0-4 (for each letter position) -- just to avoid regenerating this each time it's needed

#     if guesses == 0:

#         if verbose == True:
#             print(f"----------------------------------------------------\n")

#     guesses += 1

#     if verbose == True:
#         print (f"Guess {guesses}:\n\t{opening_guess}\n")
#     else:
#         print (f"Guess {guesses}: {opening_guess}")

#     corr_all_dict = {} # letter == correct and position == correct
#     incorr_letts_set = set() # can be set because position doesn't matter. Letter is not relevant at all
#     corr_letts_incorr_pos_dict = {} # letter == correct and position != correct

#     # evaluating current guess against target and returning correct letters
#     for i in all_letter_positions: # 5 letters in each word (current word and target word)
#         corr_all_dict[opening_guess[i]] = set()
#         corr_letts_incorr_pos_dict[opening_guess[i]] = set()
        
#         if opening_guess[i] == target_word[i]: # letter == correct and position == correct
#             corr_all_dict[opening_guess[i]].add(i)

#         elif opening_guess[i] not in target_word: # if letter is not relevant at all
#             incorr_letts_set.add(opening_guess[i])

#         elif (opening_guess[i] != target_word[i] and  opening_guess[i] in target_word): # letter == correct and position != correct
#             corr_letts_incorr_pos_dict[opening_guess[i]].add(i)

#     print (f"corr_all_dict: {corr_all_dict}")
#     print(f"incorr_letts_set: {incorr_letts_set}")
#     print(f"corr_letts_incorr_pos_dict: {corr_letts_incorr_pos_dict}")

#     next_guess_list = ["_" for i in range(0, 5)] # initializes new list of 5 "_"

#     # filling next guess with current correct letters in correct locations
#     for lett, positions in corr_all_dict.items():
#         if len(positions) > 0:
#             for pos in positions:
#                 next_guess_list[pos] = lett
    
#     # getting locations of all empty positions still in list
#     if "_" in next_guess_list:
        
#         # get empty positions(s)
#         empty_positions = set()
#         for i in next_guess_list:
#             if i == "_":
#                 empty_positions.add(next_guess_list.index(i))

#         # using current letter(s) in incorrect locations to fill empty position(s)
#         for lett, positions in corr_letts_incorr_pos_dict.items():
#             if len(positions) > 0:
#                 for pos in empty_positions.difference(positions):
#                     next_guess_list[pos] = lett

#         # get empty positions again
#         empty_positions = set()
#         for i in next_guess_list:
#             if i == "_":
#                 empty_positions.add(next_guess_list.index(i))                    

#         # using new letter(s) in incorrect locations to fill empty position(s)

#         for i in empty_positions:
#             for lett, freq in sorted_letters_counts:
#                 if lett not in (incorr_letts_set or next_guess_list):
#                     next_guess_list[i] = lett

#                     # check if new word is in wordle list:
#                     next_guess_str = "".join(next_guess_list)
#                     if next_guess_str in all_words:
#                         break

#         print(next_guess_str)       
#         # solve_wordle(next_guess_str, target_word, guesses, verbose)
#         print ("still blanks")
#     else:
#         print ("no more blanks")
#         next_guess_str = "".join(next_guess_list)

#         # recursively call function until max attempts is reached or wordle is solved
#         # solve_wordle(next_guess_str, target_word, guesses, verbose)

# # solve_wordle(opening_word, target_word, verbose = True)
# solve_wordle("aeros", target_word, verbose = False)

In [14]:
# ### VERSION 3

# def solve_wordle(opening_word: str, target_word: str, max_attempts: int = len(opening_word), verbose: bool = False, drama: float = None):
#     """
#     Mimicking the popular web game, this function matches a current word to a target word in the most statistically optimal way possible.

#     ------
#     Parameters:
#     ------
#     `opening_word`: str
#         a string -- must be the same length as `target_word`
#     `target_word`: str
#         a string -- must be the same length as `opening_word`
#     `max_attempts`: int
#         the maximum number of attempts allowed to solve the Wordle
#     `verbose`: bool
#         if True, prints progress and explanation of how function solves the puzzle. If False, prints only the guessed word at each guess.
#     `drama`: float or int
#         if int provided, each guess' output is delayed by that number of seconds, else each output is shown as quickly as possible. For ~dRaMaTiC eFfEcT~

#     ------
#     Returns:
#     ------
#         None
#     """

#     incorr_letts_set = set() # can be a set because position doesn't matter (only considers unique letters)
#     all_letter_positions = set(i for i in range(0, len(opening_word)))
#     guesses = 0
#     opening_word_list = [letter for letter in opening_word]

#     ### Can set a number of max guesses different than 5
#     if max_attempts:
#         max_guesses = max_attempts
#     else:
#         max_guesses = len(opening_word)

#     if opening_word == target_word:
#         print(f"Congratulations! The Wordle was solved on the first try. That's incredible!")
#         if max_attempts == 2:
#             print(f"There was 1 guess remaining.")
#         else:
#             print(f"There were still {max_attempts - 1} guesses remaining.")
#         # print(f"Here are some statistics about this Wordle:") # come up with some stats about it
#         print("\n-----------------------------")

#     ### "GUESSING" BEGINS
#     # while opening_word != target_word:
#     while opening_word:

#         if drama:
#             if type(drama) == float or int:
#                 time.sleep(np.absolute(drama))

#         corr_all_dict = {} # letter == correct and position == correct
#         corr_letts_incorr_pos_dict = {} # letter == correct and position != correct

#         if guesses == 0:
#             if verbose == True:
#                 print("-----------------------------\n")
        
#         guesses += 1
        
#         if verbose == True:
#             print(f"Guess {guesses}:\n\t{' '.join(opening_word_list)}\n")
#         else:
#             print(f"Guess {guesses}: {opening_word}")

#         ### EVALUATING CURRENT GUESS
#         for i in all_letter_positions: # number of letters in each word (current word and target word)
#             corr_all_dict[opening_word[i]] = set()
#             corr_letts_incorr_pos_dict[opening_word[i]] = set()
            
#             if opening_word[i] == target_word[i]: # letter == correct and position == correct
#                 corr_all_dict[opening_word[i]].add(i)

#             elif (opening_word[i] != target_word[i] and  opening_word[i] in target_word): # letter == correct and position != correct
#                 corr_letts_incorr_pos_dict[opening_word[i]].add(i)

#             elif opening_word[i] not in target_word: # if letter is not relevant at all
#                 incorr_letts_set.add(opening_word[i])

#         ### CREATING NEXT WORD GUESS
#         next_guess_list = ["_" for i in range(0, 5)] # initializes new list of 5 "_"

#         # filling next guess with current correct letters in correct locations
#         for lett, positions in corr_all_dict.items():
#             if len(positions) > 0:
#                 for pos in positions:
#                     next_guess_list[pos] = lett

#         # print (next_guess_list)

#         # getting locations of all empty positions still in list
#         if "_" in next_guess_list:

#             # print ("EMPTIESSS")

#             # get empty positions(s)
#             empty_positions = set()

#             for i, val in enumerate(next_guess_list):

#                 if val == "_":
#                     empty_positions.add(i)

#             # using current letter(s) in incorrect locations to fill empty position(s)
#             for lett, positions in corr_letts_incorr_pos_dict.items():
#                 if len(positions) > 0:
#                     # print (empty_positions.difference(positions))
#                     for pos in empty_positions.difference(positions):
#                         # print (lett)
#                         next_guess_list[pos] = lett

#             # print (incorr_letts_set)
#             # print (next_guess_list)


# ############################# ISSUE IS HERE #############################

#             # using new letter(s) in incorrect locations to fill empty position(s)
#             for i in empty_positions:
#                 for lett, freq in sorted_letters_counts:

#                     # make sure the new letter is one that hasn't been used it
#                     if lett not in (incorr_letts_set or set(next_guess_list)):
#                         next_guess_list[i] = lett

#                     # check if new word is in wordle list:
#                     if "".join(next_guess_list) in all_words:
#                         break
#                     else:
#                         continue

#                 # print (next_guess_list)
#             opening_word = "".join(next_guess_list)

#             # print (opening_word)

# ############################# ISSUE IS HERE #############################

#         if verbose == True:
#             correct_positions = ["_" for i in range(0, len(opening_word))]
#             for lett, positions in corr_all_dict.items():
#                 if len(positions) > 0:
#                     for pos in positions:
#                         correct_positions[pos] = lett

#             incorrect_positions = ["_" for i in range(0, len(opening_word))]
#             for lett, positions in corr_letts_incorr_pos_dict.items():
#                 if len(positions) > 0:
#                     for pos in positions:
#                         incorrect_positions[pos] = lett

#         if opening_word == target_word:
#             if guesses == 1:
#                 print(f"Congratulations! The Wordle has been solved in {guesses} guess!")
#             else:
#                 print(f"\nCongratulations! The Wordle has been solved in {guesses} guesses!")
#                 print(f"There were still {max_attempts - guesses} guesses remaining.")
#                 # print(f"Here are some statistics about this Wordle:") # come up with some stats about it
#                 # print("\n-----------------------------")
#             break
#         else:
#             if verbose == True:
#                 print (f"Correct letters in correct positions:\n\t{' '.join(correct_positions)}\n")
#                 print(f"Correct letters in incorrect positions:\n\t{' '.join(incorrect_positions)}\n")
#                 print(f"All incorrect letters:\n\t{', '.join(letter for letter in incorr_letts_set)}\n")

#         # condition to break the loop
#         if guesses == max_guesses:
#             if verbose == True:
#                 print("-----------------------------\n")
#                 print(f"Unfortunately, the Wordle could not be solved in {max_attempts} guesses.\n")
#                 print(f"The target word was '{target_word}'. Better luck next time!\n")
#                 print("-----------------------------\n")
#             else:
#                 print(f"\nUnfortunately, the Wordle could not be solved in {max_attempts} guesses.")
#                 print(f"The target word was '{target_word}'. Better luck next time!\n")
#             break
        
#         opening_word_list = [letter for letter in opening_word]

#         if verbose == True:
#             print (f"Next guess:\n\t{' '.join(opening_word_list)}\n")
#             print("-----------------------------\n")

# solve_wordle("perks", "agros", max_attempts = 5, verbose = False, drama = 0)

In [15]:
# ### VERSION 4

# guessword = "fired"
# # guessword = "album"
# targetword = "tried"

# guessword = guessword.lower()
# targetword = targetword.lower()

# print (f"Guess: {guessword}")
# print (f"Target: {targetword}\n")

# wordlength = len(guessword)

# simple_solve = True

# letter_positions = set(i for i in range(0, wordlength))
# all_perfect_dict = {}
# wrong_pos_dict = {}
# dont_guess_again_set = set()

# ########################################## THIS WORKS ##########################################            

# next_guess_list = ["_" for i in range(0, wordlength)] # have to establish this ahead of the immediate next section

# ##### FIRST GUESS IS EVALUTED
# for i in letter_positions: # number of letters in each word (current word and target word)
    
#     all_perfect_dict[guessword[i]] = set()
#     wrong_pos_dict[guessword[i]] = set()
#     # dont_guess_again_set[guessword[i]] = set()
    
#     if guessword[i] == targetword[i]: # letter is correct and position is correct
#         # all_perfect_dict[guessword[i]].add(i)
#         next_guess_list[i] = guessword[i]

#     if (guessword[i] != targetword[i] and  guessword[i] in targetword): # letter is correct but position is incorrect
#         wrong_pos_dict[guessword[i]].add(i)

#     if guessword[i] not in targetword: # if letter is not relevant at all
#         dont_guess_again_set.add(guessword[i]) # intent: "don't guess this letter at any position again"
#         for pos in letter_positions:
#             wrong_pos_dict[guessword[i]].add(pos) # intent: "don't guess this letter at any position again"

# ##### had to do this to get all_perfect_dict to be able to account for duplicate letters in guessword
# for i, letter in enumerate(next_guess_list):
#     if letter != "_":
#         all_perfect_dict[next_guess_list[i]].add(i)

# # print (next_guess_list)

# ##### if no letters are correct, add them all to dont_guess_again_set and pick the best new word from the list that doesn't include any of them (just picks the first)
# if len(set(guessword).difference(set(targetword))) == 5:

#     print ("\n----- No letters were correct. A new word with new letters will be chosen. -----\n")

#     # for letter in guessword:
#     #     for pos in letter_positions:
#     #         dont_guess_again_set[guessword[i]].add(pos)

#     new_word_letters = set()

#     while len(new_word_letters) < wordlength:

#         for letter, freq in sorted_letters_counts:

#             if letter not in dont_guess_again_set:
#                 new_word_letters.add(letter)

#                 if len(new_word_letters) == wordlength:
#                     break
                    
#         for word in all_words:

#             if set(word) == new_word_letters:
#                 guessword = word # setting new guessword to this -- here, it should loop back up to the start of the whole evaluation process
                
#                 # print (f"\nNext guess: {guessword}\n")

#                 break

# ########################################## THIS WORKS ##########################################

# ##### if at least one letter is correct (even in the wrong position)
# else:
#     if simple_solve == False:
#         print ("\n----- At least one letter was correct. A new word with similar letters will be chosen. -----\n")

#         #### getting set of letters previously in incorrect positions
#         try_again_letters = set()
#         for letter, positions in wrong_pos_dict.items():
#             if len(wrong_pos_dict[letter]) > 0 and len(wrong_pos_dict[letter]) != 5: # gets only letters that were guessed and were in wrong positions
#                 if letter not in next_guess_list:
#                     try_again_letters.add(letter)

#         #### fills next guess with try_again_letters in new positions, if they are not already in the word (maximizing letter guessing potential)
#         for i, letter in enumerate(next_guess_list): # i is position, letter is letter in that position
#             if letter == "_":
#                 for lett in try_again_letters:
#                     if lett not in next_guess_list:
#                         next_guess_list[i] = lett

#         if "_" in next_guess_list:
#             print ("YASS BITCHES")
#             valid_word = False
#             while valid_word == False:
            
#                 for i, letter in enumerate(next_guess_list): # i is position, letter is letter in that position
                    
#                     if letter == "_":
                        
#                         for lett, freq in sorted_letters_counts:
                            
#                             if lett not in (next_guess_list and dont_guess_again_set):
                                
#                                 next_guess_list[i] = lett
#                                 # print (next_guess_list)

#                                 if "".join(next_guess_list) in all_words:
#                                     break
#                                 else:
#                                     dont_guess_again_set.add(lett)
#                 break

#             #### janky af and should definitely change this but it works for now
#             dont_guess_again_set = dont_guess_again_set.difference(set(all_perfect_dict))
#             for letter, positions in wrong_pos_dict.items():
#                 if len(positions) == 5:
#                     dont_guess_again_set.add(letter)

#             guessword = "".join(next_guess_list)
#             # print (guessword)

#     elif simple_solve == True:
        
#         valid_letters = set()

#         for letter, positions in all_perfect_dict.items():
#             if len(positions) > 0:
#                 # print (letter)
#                 valid_letters.add(letter)
        
#         for letter, positions in wrong_pos_dict.items():
#             if 0 < len(positions) < 5:
#                 # print (letter)
#                 valid_letters.add(letter)

#         print (dont_guess_again_set)


#         # print (valid_letters)
#         # else:
#         #     for word in all_words:
#         #         if set(word) == valid_letters:
#         #             valid_words.add(word)
        



# # print (valid_letters)
# # print(f"\nAll perfect: {all_perfect_dict}")
# # print(f"Wrong positions: {wrong_pos_dict}")
# # print(f"All wrong: {dont_guess_again_set}")
# # print(next_guess_list)
# print (f"\n{guessword}")

In [16]:
# #### VERSION 5

# #### Step 1
# guess = "aeros"
# target = "tired"

# print (f"Guess: {guess}")
# print (f"Target: {target}\n")

# wordlen = len(guess)
# letter_positions = set(i for i in range(0, wordlen))

# guess_set = set()
# target_set = set() # might not need this for anything -- is cheating

# perfect_dict = {}
# wrong_pos_dict = {}
# dont_guess_again = set()
# guessed_words = set() # running set of guessed words

# #### Step 2 -- ALL PERFECT
# for i in letter_positions: # number of letters in each word (current word and target word)
#     guess_set.add(guess[i])
#     perfect_dict[guess[i]] = set()
#     wrong_pos_dict[guess[i]] = set()
    
#     if guess[i] == target[i]: # letter == correct and position == correct
#         perfect_dict[guess[i]].add(i)

#     if (guess[i] != target[i] and  guess[i] in target): # letter == correct and position != correct
#         wrong_pos_dict[guess[i]].add(i)

#     if guess[i] not in target: # if letter is not relevant at all
#         dont_guess_again.add(guess[i])

# guessed_words.add(guess)

# # print(guess_set) # works
# # print(target_set) # works
# print(f"Perfect dict: {perfect_dict}") # works
# print(f"wrong pos dict: {wrong_pos_dict}") # works
# print(f"dont guess again: {dont_guess_again}\n") # works
    
# #### Step 3 -- ALL PERFECT
# next_letters = set()
# for letter, positions in perfect_dict.items():
#     if len(positions) > 0:
#         next_letters.add(letter)

# for letter, positions in wrong_pos_dict.items():
#     if len(positions) > 0:
#         next_letters.add(letter)

# # print (next_letters)

# # #### Step 4 -- ALL PERFECT
# switch = None
# if len(next_letters) != wordlen:
#     switch = False

# while switch == False:

#     for letter, freq in sorted_letters_counts:
#         if letter not in dont_guess_again:
#             next_letters.add(letter) # duplicates don't matter because it's a set

#         if len(next_letters) == wordlen:
#             switch == True # should break while loop
#             break # breaks for loop (stops adding more letters)
#     break

# print (f"Next letters: {next_letters}")

# #### Step 5
# valid_words = []
# for word in all_words:
#     word_set = set()

#     for letter in word:
#         word_set.add(letter)
    
#     if word_set == next_letters:
#         valid_words.append(word)

# print (f"Valid next words: {valid_words}\n")

# ### Step 6
# valid_word_dicts = []
# for word in valid_words:
#     new_perfect_dict = {}

#     for i in letter_positions:
#         new_perfect_dict[word[i]] = set()

#         if word[i] == target[i]: # letter == correct and position == correct
#             new_perfect_dict[word[i]].add(i)

#     valid_word_dicts.append((word, new_perfect_dict))

# # print(valid_word_dicts)

# #### List of tuples of correct letter positions in new valid words. Eg: [('e', 2), ('a', 3)]
# perfect_letters = []
# for letter, positions in perfect_dict.items():
#     for pos in positions:
#         if len(positions) > 0:
#             perfect_letters.append((letter, pos))

# #### List of tuples of perfect letters. Eg: [('e', 2), ('a', 3)]
# valid_words_perfect_letters_list = []
# for word, dictionary in valid_word_dicts:
#     potential_letters = []
#     for letter, positions in dictionary.items():
#         for pos in positions:
#             if len(positions) > 0:
#                 potential_letters.append((letter, pos))
#     valid_words_perfect_letters_list.append((word, potential_letters))

# print (f"perfect_letters: {perfect_letters}")
# print (f"valid_words_perfect_letters_list: {valid_words_perfect_letters_list}")

# for old_lett, old_pos in perfect_letters:
#     for word, each_list in valid_words_perfect_letters_list:
#         for new_lett, new_pos in each_list:
#             if (old_lett, old_pos) in each_list:
#                 guess = word
#                 guessed_words.add(guess)
#                 break
#             else:                
#                 random_int = random.randint(0, len(valid_words) - 1)
#                 guess = valid_words[random_int]
#                 guessed_words.add(guess)
#             break
#         break
#     break

# guessed_words

In [17]:
# #### VERSION 6 --- THIS WORKS --- FOR SAFEKEEPING

# #### Step 1
# guess = "aeros"
# target = "tired"

# print (f"Guess: {guess}")
# print (f"Target: {target}\n")

# wordlen = len(guess)
# letter_positions = set(i for i in range(0, wordlen))

# guess_set = set()
# target_set = set() # might not need this for anything -- is cheating

# perfect_dict = {}
# wrong_pos_dict = {}
# wrong_pos_set = set()
# dont_guess_again = set()
# guessed_words = [] # running set of guessed words

# guess_num = 0

# ################################################ WHILE LOOP GOES HERE ################################################

# #### Step 2 -- ALL PERFECT
# for i in letter_positions: # number of letters in each word (current word and target word)
#     guess_set.add(guess[i])
#     perfect_dict[guess[i]] = set()
#     wrong_pos_dict[guess[i]] = set()
    
#     if guess[i] == target[i]: # letter == correct and position == correct
#         perfect_dict[guess[i]].add(i)

#     if (guess[i] != target[i] and  guess[i] in target): # letter == correct and position != correct
#         wrong_pos_dict[guess[i]].add(i)
#         wrong_pos_set.add(guess[i])

#     if guess[i] not in target: # if letter is not relevant at all
#         dont_guess_again.add(guess[i])

# guessed_words.append(guess)

# # print(guess_set) # works
# # # print(target_set) # works
# # print(f"Perfect dict: {perfect_dict}") # works
# # print(f"wrong pos dict: {wrong_pos_dict}") # works

    
# #### Step 3 -- ALL PERFECT
# next_letters = set()
# for letter, positions in perfect_dict.items():
#     if len(positions) > 0:
#         next_letters.add(letter)

# for letter, positions in wrong_pos_dict.items():
#     if len(positions) > 0:
#         next_letters.add(letter)

# print (f"next letters: {next_letters}")

# #### List of tuples of correct letter positions in new valid words. Eg: [('e', 2), ('a', 3)]
# perfect_letters = []
# for letter, positions in perfect_dict.items():
#     for pos in positions:
#         if len(positions) > 0:
#             perfect_letters.append((letter, pos))

# #### all words that have correct letters in same spots
# words_matching_correct_letters = []
# for word in all_words:
#     word_set = set()
#     for letter, pos in perfect_letters:
#         if word[pos] == letter:
#             words_matching_correct_letters.append(word)

# #### excluding words with letters in known incorrect positions
# incorrect_positions = []
# for letter, positions in wrong_pos_dict.items():
#     for pos in positions:
#         if len(positions) > 0:
#             incorrect_positions.append((letter, pos))


# #### all words that have correct letters in incorrect spots -- so they can be excluded efficiently
# words_with_incorrect_letters = []
# for word in all_words:
#     word_set = set()
#     for letter, pos in incorrect_positions:
#         if word[pos] == letter:
#             words_with_incorrect_letters.append(word)

# print(f"dont guess again: {dont_guess_again}") # works
# print(f"correct_positions: {perfect_letters}")
# print(f"incorrect_positions: {incorrect_positions}\n")


# # Returns True
# # print(A.issubset(B)) # "if everything in A is in B", returns Bool

# #### all unguessed words that are potential next guesses. Contains all correct letters,
# #### including matching letters in correct positions, doesn't include incorrect letters
# potential_next_guess = []
# for word in words_matching_correct_letters:
#     word_set = set()
#     if word not in words_with_incorrect_letters:
#         for letter in word:
#             word_set.add(letter)
#         if next_letters.issubset(word_set):# == True and word_set.issubset(dont_guess_again) == False:
#             if word not in guessed_words:
#                 potential_next_guess.append(word)
                
# print(f"Number of valid next guesses: {len(potential_next_guess)}")
# # potential_next_guess[:10]

# #### Guessing next word
# randomint = random.randint(0, len(potential_next_guess) - 1) # len will be 1 larger than last index because len counts first as 1 and index counts first as 0
# next_guess = potential_next_guess[randomint]
# guessed_words.append(next_guess)

# ################################################ WHILE LOOP ENDS HERE ################################################

# print(guessed_words)
# print(next_guess)

In [18]:
# #### VERSION 6 -- THIS WORKS

# #### FUNCTION ARGUMENTS -- COPY FROM OTHER FUNCTION
# guess = "aeros"
# target = "tired"
# max_attempts = 10
# verbose = True
# drama = None

# #### FUNCTION
# print (f"Target: {target}\n")

# wordlen = len(guess)
# letter_positions = set(i for i in range(0, wordlen))

# guess_set = set()
# perfect_dict = {}
# wrong_pos_dict = {}
# wrong_pos_set = set()

# dont_guess_again = set()

# guessed_words = [] # running set of guessed words
# guess_num = 0 # baseline for variable
# dont_guess_words = []
# set_of_dont_guess_words = set()
# set_of_incorrect_positions = set()
# incorrect_positions = []


# if max_attempts:
#         max_guesses = max_attempts
# else:
#     max_guesses = len(guess)
# ################################################ WHILE LOOP GOES HERE ################################################

# while guess: # while there is any guess -- there are conditions to break it at the bottom

#     guessed_words.append(guess)

#     guess_num += 1 # each time the guess is processed
#     print(f"Guess {guess_num}: {guess}")

#     guess_set = set()
#     # perfect_dict = {}
#     # wrong_pos_dict = {}
#     wrong_pos_set = set()

#     # dont_guess_again = set()

#     #### Step 2 -- ALL PERFECT
#     for i in letter_positions: # number of letters in each word (current word and target word)
#         guess_set.add(guess[i])

#         if guess[i] not in perfect_dict:
#             perfect_dict[guess[i]] = set()
#         if guess[i] not in wrong_pos_dict:
#             wrong_pos_dict[guess[i]] = set()

#         ### EVALUATE CURRENT GUESS
#         if guess[i] == target[i]: # letter == correct and position == correct
#             perfect_dict[guess[i]].add(i)

#         if (guess[i] != target[i] and  guess[i] in target): # letter == correct and position != correct
#             wrong_pos_dict[guess[i]].add(i)
#             wrong_pos_set.add(guess[i])

#         if guess[i] not in target: # if letter is not relevant at all
#             dont_guess_again.add(guess[i])

#     print (f"perfect dict: {perfect_dict}")

#     #### Step 3 -- ALL PERFECT
#     next_letters = set()
#     for letter, positions in perfect_dict.items():
#         if len(positions) > 0:
#             next_letters.add(letter)

#     for letter, positions in wrong_pos_dict.items():
#         if len(positions) > 0:
#             next_letters.add(letter)

#     # print (f"next letters: {next_letters}")

#     #### List of tuples of correct letter positions in new valid words. Eg: [('e', 2), ('a', 3)]
#     perfect_letters = []
#     for letter, positions in perfect_dict.items():
#         for pos in positions:
#             if len(positions) > 0:
#                 perfect_letters.append((letter, pos))

#     #### all words that have correct letters in same spots
#     words_matching_correct_all = []
#     for word in all_words:
#         word_set = set()
#         for letter, pos in perfect_letters:
#             if word[pos] == letter:
#                 words_matching_correct_all.append(word)

#     print(f'number of potential next guesses TEST: {len(words_matching_correct_all)}')

#     #### excluding words with letters in known incorrect positions
#     # incorrect_positions = []
#     for letter, positions in wrong_pos_dict.items():
#         for pos in positions:
#             if len(positions) > 0:
#                 if (letter, pos) not in incorrect_positions:
#                     incorrect_positions.append((letter, pos))

#     #### all words that have correct letters in incorrect spots -- so they can be excluded efficiently
#     # dont_guess_words = []
#     for word in all_words:
#         word_set = set()
#         for letter, pos in incorrect_positions:
#         # for letter, pos in set_of_incorrect_positions:
#             if word[pos] == letter:
#                 dont_guess_words.append(word)

#     if verbose == True:
#         print(f"correct_positions: {perfect_letters}")
#         print (f"next letters: {next_letters}")
#         print(f"incorrect_positions: {incorrect_positions}")
#         # print(f"incorrect_positions: {set_of_incorrect_positions}")
#         print(f"dont guess again: {dont_guess_again}\n") # works

#     # Returns True
#     # print(A.issubset(B)) # "if everything in A is in B", returns Bool

#     #### all unguessed words that are potential next guesses. Contains all correct letters,
#     #### including matching letters in correct positions, doesn't include incorrect letters
#     potential_next_guesses = []
#     for word in words_matching_correct_all:
#         word_set = set()
#         if word not in dont_guess_words:
#             for letter in word:
#                 word_set.add(letter)

#             if next_letters.issubset(word_set):# == True and word_set.issubset(dont_guess_again) == False:
#                 if word not in guessed_words:
#                     switch = True
#                     for bad_letter in dont_guess_again:
#                         if bad_letter not in word:
#                             potential_next_guesses.append(word)

#     if verbose == True:                    
#         print(f"Number of valid next guesses: {len(potential_next_guesses)}")
#         print (f"Words guessed so far: {guessed_words}")

#     #### Guessing next word
#     if len(potential_next_guesses) == 1:
#         guess = potential_next_guesses[0]
#     else:
#         # randomly guessing next word
#         # randomint = random.randint(1, len(potential_next_guesses) - 1) # len will be 1 larger than last index because len counts first as 1 and index counts first as 0
#         # guess = potential_next_guesses[randomint]
#         for word in potential_next_guesses:
#             for letter, freq in sorted_letters_counts:
#                 if letter not in (next_letters):
#                     if letter in word:
#                         guess = word
#                         break
#                     break

#     if guess == target:
#         if guess_num == 1:
#             print(f"Congratulations! The Wordle has been solved in {guess_num} guess!")
#         else:
#             print(f"\nCongratulations! The Wordle has been solved in {guess_num} guesses!")
#             # print(f"There were still {max_attempts - guess_num} guesses remaining.")
#             # print(f"Here are some statistics about this Wordle:") # come up with some stats about it
#             # print("\n-----------------------------")
#         break

#     if guess_num == max_guesses:
#         if verbose == True:
#             # print("-----------------------------\n")
#             print(f"Unfortunately, the Wordle could not be solved in {max_attempts} guesses.\n")
#             print(f"The target word was '{target}'. Better luck next time!\n")
#             print("-----------------------------\n")
#         else:
#             print(f"\nUnfortunately, the Wordle could not be solved in {max_attempts} guesses.")
#             print(f"The target word was '{target}'. Better luck next time!\n")
#         break
#     else: # if not at max guesses yet
#         if verbose == True:
#             print (f"Next guess: {guess}")
#             print("\n-----------------------------\n")


#     ################################################ WHILE LOOP ENDS HERE ################################################

### `solve_wordle()` function

In [273]:
#### VERSION 6 -- THIS ONE WORKS

def solve_wordle(word_list: list, max_guesses: int = None, guess: str = None, target: str = None, bias: bool = True, random_guess: bool = False, random_target: bool = False, verbose: bool = False, drama: float = None, return_stats: bool = False):
    """
    Mimicking the popular web game, this function matches a current word to a target word in the same way a human would, in the most statistically optimal way possible.

    ------
    Parameters:
    ------
    `word_list`: list
        list of valid words to be considered
    `guess`: str
        a string -- must be the same length as `target_word`
    `target`: str
        a string -- must be the same length as `opening_word`
    `bias`: str ['entropy', 'common', 'rare', None]
        'entropy' biases next word guesses to be the ones with the highest impact on the range of next possible guesses.

        'common' biases next word guesses to be words that are more commonly used

        'rare' biases next word guesses to be words that are more rarely used

        None chooses a next guess at random of all available guesses

    `max_guesses`: int
        the maximum number of attempts allowed to solve the Wordle
    `random_guess`: bool
        if True, randomly chooses a starting word from all words within `word_list`. If False, passed starting word must be used instead
    `random_target`: bool
        if True, randomly chooses a target word from all words within `word_list`. If False, passed target word must be used instead
    `verbose`: bool
        if True, prints progress and explanation of how function solves the puzzle. If False, prints only the guessed word at each guess.
    `drama`: float or int
        if int provided, each guess' output is delayed by that number of seconds, else each output is shown as quickly as possible. For ~dRaMaTiC eFfEcT~
    `return_stats`: bool
        if True, prints nothing and returns a dictionary of various statistics about the function's performance trying to solve the puzzle

    ------
    Returns:
    ------
    `stats_dict`: dict
        dictionary containing various statistics about the function's performance trying to solve the puzzle
    """

    if random_guess == True:
        randomint_guess = random.randint(0, len(word_list) - 1)
        guess = word_list[randomint_guess]

    if random_target == True:
        randomint_target = random.randint(0, len(word_list) - 1)
        target = word_list[randomint_target]

    stats_dict = {}
    stats_dict['first_guess'] = guess
    stats_dict['target_word'] = target
    stats_dict['first_guess_vowels'] = float(count_vows_cons(guess, y_vow = True)['vows'])
    stats_dict['first_guess_consonants'] = float(count_vows_cons(guess, y_vow = True)['cons'])
    stats_dict['target_vowels'] = float(count_vows_cons(target, y_vow = True)['vows'])
    stats_dict['target_consonants'] = float(count_vows_cons(target, y_vow = True)['cons'])

    english_alphabet = "abcdefghijklmnopqrstuvwxyz"

    word_list_sorted_counts = get_letter_counts(english_alphabet, word_list, sort = "descending")
    
    wordlen = len(guess)
    letter_positions = set(i for i in range(0, wordlen))

    guess_set = set()
    perfect_dict = {}
    wrong_pos_dict = {}
    wrong_pos_set = set()
    dont_guess_again = set()

    guessed_words = [] # running set of guessed words
    guess_num = 0 # baseline for variable
    dont_guess_words = set()
    incorrect_positions = []

    if max_guesses == None: # if no value is passed, default is len(guess)
        max_guesses = wordlen
    else: # else it is the value passed
        max_guesses = max_guesses

    perfect_letts_per_guess = []
    wrong_pos_per_guess = []
    wrong_letts_per_guess = []

    while guess: # while there is any guess -- there are conditions to break it at the bottom

        guess_num += 1

        guessed_words.append(guess)

        if drama:
            time.sleep(drama)

        # guess_num += 1 # each time the guess is processed
        if return_stats == False:
            if guess_num == 1:
                print("-----------------------------\n")
    
        if return_stats == False:
            print(f"Guess {guess_num}: '{guess}'")

        if guess == target:
            stats_dict['target_guessed'] = True
            if return_stats == False:
                if guess_num == 1:
                    print(f"Congratulations! The Wordle has been solved in {guess_num} guess, that's amazingly lucky!")
                    print(f"The target word was {target}")
                    perfect_letts_per_guess.append(5)
                    wrong_pos_per_guess.append(0)
                    wrong_letts_per_guess.append(0)
            break

        guess_set = set()
        wrong_pos_set = set()

        #### Step 2 -- ALL PERFECT
        for i in letter_positions: # number of letters in each word (current word and target word)
            guess_set.add(guess[i])

            if guess[i] not in perfect_dict:
                perfect_dict[guess[i]] = set()
            if guess[i] not in wrong_pos_dict:
                wrong_pos_dict[guess[i]] = set()

            ### EVALUATE CURRENT GUESS
            if guess[i] == target[i]: # letter == correct and position == correct
                perfect_dict[guess[i]].add(i)

            if (guess[i] != target[i] and  guess[i] in target): # letter == correct and position != correct
                wrong_pos_dict[guess[i]].add(i)
                wrong_pos_set.add(guess[i])

            if guess[i] not in target: # if letter is not relevant at all
                dont_guess_again.add(guess[i])

        #### Step 3 -- ALL PERFECT
        next_letters = set()
        for letter, positions in perfect_dict.items():
            if len(positions) > 0:
                next_letters.add(letter)

        for letter, positions in wrong_pos_dict.items():
            if len(positions) > 0:
                next_letters.add(letter)

        #### List of tuples of correct letter positions in new valid words. Eg: [('e', 2), ('a', 3)]
        perfect_letters = []
        for letter, positions in perfect_dict.items():
            for pos in positions:
                if len(positions) > 0:
                    perfect_letters.append((letter, pos))

        #### all words that have correct letters in same spots
        words_matching_correct_all = []
        for word in word_list:
            word_set = set()
            for letter, pos in perfect_letters:
                if word[pos] == letter:
                    words_matching_correct_all.append(word)

        #### excluding words with letters in known incorrect positions
        for letter, positions in wrong_pos_dict.items():
            for pos in positions:
                if len(positions) > 0:
                    if (letter, pos) not in incorrect_positions:
                        incorrect_positions.append((letter, pos))

        # sorting lists of tuples just to make them look nice in the printout
        incorrect_positions = sorted(incorrect_positions, key = operator.itemgetter(1), reverse = False)
        perfect_letters = sorted(perfect_letters, key = operator.itemgetter(1), reverse = False)

        #### all words that have correct letters in incorrect spots -- so they can be excluded efficiently
        for word in word_list:
            word_set = set()
            for letter, pos in incorrect_positions:
                if word[pos] == letter:
                    dont_guess_words.add(word)

        for bad_letter in dont_guess_again:
            for word in word_list:
                if (bad_letter in word and word not in dont_guess_words):
                    dont_guess_words.add(word)

        if return_stats == False:
            if verbose == True:
                print(f"Letters in correct positions:\n\t{perfect_letters}\n")
                print(f"Letters in incorrect positions:\n\t{incorrect_positions}\n")
                print (f"Letters to guess again:\n\t{sorted(list(next_letters), reverse = False)}\n")
                print(f"Letters to not guess again:\n\t{sorted(list(dont_guess_again), reverse = False)}\n") # works

        # Returns True
        # print(A.issubset(B)) # "if everything in A is in B", returns Bool

        perfect_letts_per_guess.append(len(perfect_letters))
        wrong_pos_per_guess.append(len(incorrect_positions))
        wrong_letts_per_guess.append(len(dont_guess_again))

        #### all unguessed words that are potential next guesses. Contains all correct letters,
        #### including matching letters in correct positions, doesn't include incorrect letters
    
        potential_next_guesses = set()
        middle_set = set()

        if len(perfect_letters) == 0 and len(incorrect_positions) == 0: # if there are NEITHER perfect letters, NOR incorrect positions, ....
            for word in word_list:
                if word not in dont_guess_words:
                    if word not in guessed_words:
                        potential_next_guesses.add(word)
                                        
            # print(f"GUESS {guess_num} : TEST 1-1")

        if len(perfect_letters) == 0 and len(incorrect_positions) != 0: # if there are no perfect letters whatsoever, but there ARE incorrect positions ....
            for word in word_list:
                for incor_letter, incor_pos in incorrect_positions:
                    if word[incor_pos] != incor_letter:
                        if word not in dont_guess_words: # just in case
                            word_set = set()
                            for letter in word:
                                word_set.add(letter)

                                if next_letters.issubset(word_set):
                                    if word not in guessed_words:
                                        if len(dont_guess_again) > 0:
                                            for bad_letter in dont_guess_again:
                                                if bad_letter not in word:
                                                    # potential_next_guesses.append(word)
                                                    potential_next_guesses.add(word)
                                        else:
                                            potential_next_guesses.add(word)
            
            # print(f"GUESS {guess_num} : TEST 2-1")

        else:
            for word in word_list:
                if word not in dont_guess_words: # just in case
                    word_set = set()
                    for letter in word:
                        word_set.add(letter)
                        if next_letters.issubset(word_set):
                            if word not in guessed_words:
                                # print ("TEST 3-2")

                                if len(dont_guess_again) > 0:
                                    for bad_letter in dont_guess_again:
                                        if bad_letter not in word:
                                            # potential_next_guesses.append(word)
                                            # potential_next_guesses.add(word)
                                            middle_set.add(word)
                                else:
                                    # potential_next_guesses.add(word)
                                    middle_set.add(word)
            for word in middle_set:
                dummy_list = []
                for good_lett, good_pos in perfect_letters:
                    if word[good_pos] == good_lett:
                        dummy_list.append(1)
                        if len(dummy_list) == len(perfect_letters):
                            potential_next_guesses.add(word)
            for word in middle_set:
                dummy_list = []
                for bad_lett, bad_pos in incorrect_positions:
                    if word[bad_pos] == bad_lett:
                        dummy_list.append(1)
                        if len(dummy_list) > 0:
                            potential_next_guesses.remove(word)
                                        
            # print(f"GUESS {guess_num} : TEST 3-1")

        if return_stats == False:
            if verbose == True:
                print(f"At this point:")
                print(f"\t{len(word_list) - len(potential_next_guesses)}, {round((len(word_list) - len(potential_next_guesses)) / len(word_list) * 100, 2)}% of total words have been eliminated, and")
                print(f"\t{len(potential_next_guesses)}, {round(len(potential_next_guesses) / len(word_list) * 100, 2)}% of total words remain possible.\n")
                
        #### Guessing next word
        if len(potential_next_guesses) == 1:

            if verbose == True:
                print(f"The only remaining possible word is: '{potential_next_guesses}'")
            
            guess = list(potential_next_guesses)[0]

        else:

            if bias == "entropy":
                
                best_next_guesses = list(potential_next_guesses)

                word_ratings = get_word_entropy(best_next_guesses, best_next_guesses, ascending = False)

                # Get max rated word
                max_rating = -np.inf
                for word, rating in word_ratings:
                    if rating > max_rating:
                        max_rating = rating

                for word, rating in word_ratings:
                    if rating == max_rating:
                        guess = word

                if verbose == True:
                    if len(word_ratings) <= 40:
                        print(f"Potential next guesses:\n\t{word_ratings}\n")
                        print (f"Words guessed so far:\n\t{guessed_words}.\n")

            if bias == None:
                best_next_guesses = set()
                for word in potential_next_guesses:
                    for letter, freq in word_list_sorted_counts:
                        if letter not in dont_guess_again:
                            if len(next_letters) > 0:
                                if letter in next_letters:
                                    if letter in word:
                                        best_next_guesses.add(word)
                                        break
                            else:
                                if letter in word:
                                    best_next_guesses.add(word)
                                    break
                if verbose == True:
                    if len(best_next_guesses) <= 40:
                        print(f"Potential next guesses:\n\t{best_next_guesses}\n")
                        print (f"Words guessed so far:\n\t{guessed_words}.\n") 

                guess = list(best_next_guesses)[0]

        #### Guess has now been made -- what to do next
        if guess_num == max_guesses: # if at max guesses allowed
            stats_dict['target_guessed'] = False
            if return_stats == False:
                if verbose == True:
                    # print("-----------------------------\n")
                    print(f"Unfortunately, the Wordle could not be solved in {max_guesses} guesses.\n")
                    print(f"The target word was '{target}'. Better luck next time!\n")
                    print("-----------------------------\n")
                else:
                    print(f"\nUnfortunately, the Wordle could not be solved in {max_guesses} guesses.")
                    print(f"The target word was '{target}'. Better luck next time!\n")
            break
        else: # if not at max guesses yet allowed
            # stats_dict['target_guessed'] = False
            if return_stats == False:
                if verbose == True:
                    print (f"Next guess:\n\t'{guess}'")
                    print("\n-----------------------------\n")

        if guess == target:
            guess_num += 1
            stats_dict['target_guessed'] = True
            if return_stats == False:
                print(f"Guess {guess_num}: '{guess}'\n")
                print(f"Congratulations! The Wordle has been solved in {guess_num} guesses!")
                if max_guesses - guess_num == 0:
                    print(f"Lucky! It was the last guess.")
                else:
                    print(f"There were still {max_guesses - guess_num} guesses remaining.")
            # else:
            #     stats_dict['target_guessed'] = True
            if return_stats == False:   
                # stats_dict['target_guessed'] = True                 
                print(f"\nThe target word was '{target}'.")
                print("\n-----------------------------")
            break


    #### STATS STUFF    
    mid_guesses_vows = 0
    mid_guesses_cons = 0
    avg_perf_letters = 0
    avg_wrong_pos_letters = 0
    avg_wrong_letters = 0

    for word in guessed_words:
        mid_guesses_vows += count_vows_cons(word, y_vow = True)['vows']
        mid_guesses_cons += count_vows_cons(word, y_vow = True)['cons']
        
    for i in range(0, len(guessed_words)):  
        # if len(guessed_words) > len(perfect_letts_per_guess):
        #     # print (len(guessed_words) - len(perfect_letts_per_guess))
        #     # print ("TEST 1")

        # else:
        #     # print ("TEST 2")
            avg_perf_letters += perfect_letts_per_guess[i]
            avg_wrong_pos_letters += wrong_pos_per_guess[i]
            avg_wrong_letters += wrong_letts_per_guess[i]

    # print(perfect_letts_per_guess)
    # print(wrong_pos_per_guess)
    # print(wrong_letts_per_guess)

    stats_dict['mid_guesses_avg_vows'] = float(round(mid_guesses_vows / len(guessed_words), 2))
    stats_dict['mid_guesses_avg_cons'] = float(round(mid_guesses_cons / len(guessed_words), 2))

    stats_dict['avg_perf_letters'] = float(round(avg_perf_letters / len(guessed_words), 2))
    stats_dict['avg_wrong_pos_letters'] = float(round(avg_wrong_pos_letters / len(guessed_words), 2))
    stats_dict['avg_wrong_letters'] = float(round(avg_wrong_letters / len(guessed_words), 2))

    stats_dict['num_guesses'] = float(guess_num)
    stats_dict['bias'] = bias

    # if guess_num <= len(guess):
    if guess_num <= 6:
        stats_dict['valid_success'] = True
    else:
        stats_dict['valid_success'] = False

    if return_stats == True:
        return stats_dict

In [274]:
solve_wordle(word_list = official_words, max_guesses = 6, 
        guess = "moral", target = "admit", bias = 'entropy', 
        random_guess = False, random_target = False, 
        verbose = True, drama = 0, return_stats = False)

-----------------------------

Guess 1: 'moral'
Letters in correct positions:
	[]

Letters in incorrect positions:
	[('m', 0), ('a', 3)]

Letters to guess again:
	['a', 'm']

Letters to not guess again:
	['l', 'o', 'r']

At this point:
	2284, 98.92% of total words have been eliminated, and
	25, 1.08% of total words remain possible.

Potential next guesses:
	[('sigma', 66.4), ('shame', 66.4), ('amuse', 65.6), ('stamp', 64.8), ('swami', 64.8), ('scamp', 64.0), ('amiss', 63.2), ('chasm', 63.2), ('swamp', 62.4), ('smack', 60.8), ('spasm', 60.8), ('smash', 60.0), ('image', 59.2), ('anime', 59.2), ('admit', 56.8), ('amass', 56.8), ('amend', 56.0), ('admin', 56.0), ('amity', 54.4), ('champ', 53.6), ('gamut', 52.8), ('enema', 52.8), ('datum', 52.8), ('amaze', 50.4), ('gamma', 46.4)]

Words guessed so far:
	['moral'].

Next guess:
	'shame'

-----------------------------

Guess 2: 'shame'
Letters in correct positions:
	[]

Letters in incorrect positions:
	[('m', 0), ('a', 2), ('a', 3), ('m', 3)]

## Simluations

In [21]:
# for guess_word in official_words:
#     try:
#         solve_wordle(word_list = official_words, max_guesses = 10, 
#             guess = word, target = "score", 
#             random_guess = False, random_target = False, 
#             verbose = False, drama = 0, return_stats = True)
#     except:
#         IndexError
#         print (word)
#         continue

### Running simulations

In [93]:
##### sample to set up empty stats_master dict keys
stats_dict = solve_wordle(word_list = official_words, max_guesses = 15, 
    guess = "tests", target = "tests", 
    random_guess = True, random_target = True, 
    verbose = False, drama = 0, return_stats = True)

stats_master = {}
# making empty list for values
for metric, result in stats_dict.items():
    stats_master["game"] = []
    stats_master[metric] = []

# print(stats_master)

##### Simulations

sims = 1

### Number of simulated games
for i in range(0, sims):

    for word in official_words:

        for bias_option in [True, False]:

            try:
                stats_master["game"].append(i + 1)
                complete = solve_wordle(word_list = official_words, max_guesses = 15, 
                    guess = "arose", target = word, 
                    random_guess = False, random_target = False, 
                    verbose = False, drama = 0, return_stats = True)

            except:
                IndexError
                print(complete["first_guess"], complete["target_word"])

            for metric, result in complete.items():
                stats_master[metric].append(result)

sims_df = pd.DataFrame(stats_master)
sims_df

queue state


Unnamed: 0,game,first_guess,target_word,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,num_guesses,valid_success
0,1,queue,comic,4.0,1.0,2.0,3.0,True,3.00,2.00,0.50,0.00,5.00,3.0,True
1,1,queue,bosom,4.0,1.0,2.0,3.0,True,2.33,2.67,0.00,0.33,7.33,4.0,True
2,1,queue,pluck,4.0,1.0,1.0,4.0,True,2.00,3.00,2.00,3.33,3.00,4.0,True
3,1,queue,spelt,4.0,1.0,1.0,4.0,True,2.25,2.75,2.25,2.25,5.00,5.0,True
4,1,queue,merit,4.0,1.0,2.0,3.0,True,2.50,2.50,1.25,3.25,6.00,5.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2304,1,queue,cloud,4.0,1.0,2.0,3.0,True,2.67,2.33,1.33,1.33,5.33,4.0,True
2305,1,queue,vista,4.0,1.0,2.0,3.0,True,2.67,2.33,0.33,2.67,5.00,4.0,True
2306,1,queue,friar,4.0,1.0,2.0,3.0,True,2.67,2.33,1.67,1.33,4.67,4.0,True
2307,1,queue,truer,4.0,1.0,2.0,3.0,True,3.00,2.00,1.50,4.00,2.00,3.0,True


In [199]:
for metric, result in stats_master.items():
    print (metric, len(result))

# stats_master["game"]

game 13854
first_guess 13854
target_word 13854
first_guess_vowels 13854
first_guess_consonants 13854
target_vowels 13854
target_consonants 13854
target_guessed 13854
mid_guesses_avg_vows 13854
mid_guesses_avg_cons 13854
avg_perf_letters 13854
avg_wrong_pos_letters 13854
avg_wrong_letters 13854
num_guesses 13854
bias 13854
valid_success 13854


### Simluations df

In [201]:
sims_df = pd.DataFrame(stats_master)
print(sims_df.value_counts("target_guessed", normalize = True))
print("-----")
print(sims_df.value_counts("valid_success", normalize = True))
print("-----")
print(sims_df.value_counts("num_guesses", normalize = True))

target_guessed
True    1.0
dtype: float64
-----
valid_success
True     0.982821
False    0.017179
dtype: float64
-----
num_guesses
4.0     0.397214
3.0     0.301501
5.0     0.183485
6.0     0.052115
2.0     0.048506
7.0     0.012126
8.0     0.003681
9.0     0.001155
10.0    0.000217
dtype: float64


In [204]:
sims_df.sort_values(by = "num_guesses", ascending = False)

Unnamed: 0,game,first_guess,target_word,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,num_guesses,bias,valid_success
4584,1,arose,rover,3.0,2.0,2.0,3.0,True,2.22,2.78,2.44,5.56,9.89,10.0,True,False
3699,1,arose,haunt,3.0,2.0,2.0,3.0,True,2.11,2.89,2.89,4.67,9.22,10.0,False,False
12935,1,opera,haunt,3.0,2.0,2.0,3.0,True,2.00,3.00,2.89,4.00,9.89,10.0,False,False
11505,1,opera,fight,3.0,2.0,1.0,4.0,True,1.50,3.50,2.25,2.12,12.38,9.0,False,False
4781,1,adore,mound,3.0,2.0,2.0,3.0,True,2.12,2.88,3.12,3.75,7.38,9.0,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
440,1,arose,worst,3.0,2.0,1.0,4.0,True,3.00,2.00,1.00,2.00,2.00,2.0,True,True
1723,1,arose,groan,3.0,2.0,2.0,3.0,True,3.00,2.00,2.00,1.00,2.00,2.0,False,True
3194,1,arose,organ,3.0,2.0,2.0,3.0,True,3.00,2.00,1.00,2.00,2.00,2.0,True,True
1722,1,arose,groan,3.0,2.0,2.0,3.0,True,3.00,2.00,2.00,1.00,2.00,2.0,True,True


### Evaluating Most Optimal Words in Wordle

In [26]:
best_official_words = best_guess_words(official_words)
print(best_official_words)

for tup in best_official_words:
    print (tup[0])

[('arose', 39.18), ('adore', 36.8), ('opera', 36.55)]
arose
adore
opera


In [281]:
##### sample to set up empty stats_master dict keys
stats_dict = solve_wordle(word_list = official_words, max_guesses = 15, 
    guess = "tests", target = "tests", 
    random_guess = True, random_target = True, 
    verbose = False, drama = 0, return_stats = True)

stats_master = {}
# making empty list for values
for metric, result in stats_dict.items():
    stats_master["game"] = []
    stats_master[metric] = []

# print(stats_master)

##### Simulations

# sims = 1
info = []

### Number of simulated games == i
for tup in best_official_words:
    
    guess_word = (tup[0])

    for word in official_words:

        for bias_option in ["entropy", None]:

            try:
                stats_master["game"].append(i + 1)
                complete = solve_wordle(word_list = official_words, max_guesses = 15, 
                    guess = guess_word, target = word, bias = bias_option,
                    random_guess = False, random_target = False, 
                    verbose = False, drama = 0, return_stats = True)

                info.append((i, complete["first_guess"], complete["target_word"]))
                
            except:
                IndexError
                print(complete["first_guess"], complete["target_word"])

            for metric, result in complete.items():
                stats_master[metric].append(result)

sims_df = pd.DataFrame(stats_master)
sims_df

arose cough
arose cough
adore ankle
adore ankle
opera feast
opera feast


Unnamed: 0,game,first_guess,target_word,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,num_guesses,bias,valid_success
0,1,arose,comic,3.0,2.0,2.0,3.0,True,2.5,2.5,0.25,4.50,7.75,5.0,entropy,True
1,1,arose,comic,3.0,2.0,2.0,3.0,True,3.0,2.0,0.00,1.00,4.00,2.0,,True
2,1,arose,bosom,3.0,2.0,2.0,3.0,True,2.5,2.5,0.50,2.50,4.50,3.0,entropy,True
3,1,arose,bosom,3.0,2.0,2.0,3.0,True,2.5,2.5,0.50,2.50,4.50,3.0,,True
4,1,arose,pluck,3.0,2.0,1.0,4.0,True,2.0,3.0,0.67,2.00,7.33,4.0,entropy,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13849,1,opera,friar,3.0,2.0,2.0,3.0,True,2.2,2.8,1.80,3.20,8.20,6.0,,True
13850,1,opera,truer,3.0,2.0,2.0,3.0,True,2.5,2.5,1.50,2.00,4.00,3.0,entropy,True
13851,1,opera,truer,3.0,2.0,2.0,3.0,True,2.5,2.5,1.00,2.50,4.00,3.0,,True
13852,1,opera,fault,3.0,2.0,2.0,3.0,True,2.0,3.0,0.50,2.00,5.00,3.0,entropy,True


In [282]:
sims_df.sort_values(by = "num_guesses", ascending = False)

Unnamed: 0,game,first_guess,target_word,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,num_guesses,bias,valid_success
12935,1,opera,haunt,3.0,2.0,2.0,3.0,True,2.00,3.00,2.89,4.00,9.89,10.0,,False
3699,1,arose,haunt,3.0,2.0,2.0,3.0,True,2.11,2.89,2.89,4.67,9.22,10.0,,False
4781,1,adore,mound,3.0,2.0,2.0,3.0,True,2.12,2.88,3.12,3.75,7.38,9.0,,False
7521,1,adore,cater,3.0,2.0,2.0,3.0,True,2.25,2.75,2.62,6.50,7.38,9.0,,False
11505,1,opera,fight,3.0,2.0,1.0,4.0,True,1.50,3.50,2.25,2.12,12.38,9.0,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4206,1,arose,afire,3.0,2.0,3.0,2.0,True,3.00,2.00,2.00,1.00,2.00,2.0,entropy,True
136,1,arose,sober,3.0,2.0,2.0,3.0,True,3.00,2.00,0.00,4.00,1.00,2.0,entropy,True
5211,1,adore,space,3.0,2.0,2.0,3.0,True,3.00,2.00,1.00,1.00,3.00,2.0,,True
7533,1,adore,snipe,3.0,2.0,2.0,3.0,True,3.00,2.00,1.00,0.00,4.00,2.0,,True


In [283]:
sims_df = pd.DataFrame(stats_master)
print(sims_df.value_counts("target_guessed", normalize = True))
print("-----")
print(sims_df.value_counts("valid_success", normalize = True))
print("-----")
print(sims_df.value_counts("num_guesses", normalize = True))

target_guessed
True    1.0
dtype: float64
-----
valid_success
True     0.98477
False    0.01523
dtype: float64
-----
num_guesses
4.0     0.406236
3.0     0.313267
5.0     0.171503
2.0     0.048506
6.0     0.045258
7.0     0.010827
8.0     0.003320
9.0     0.000938
10.0    0.000144
dtype: float64


In [296]:
arose_df = sims_df.query("first_guess == 'arose' & valid_success == True & bias == 'entropy'")
print(round(arose_df['num_guesses'].mean(), 2))
arose_df

3.71


Unnamed: 0,game,first_guess,target_word,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,num_guesses,bias,valid_success
0,1,arose,comic,3.0,2.0,2.0,3.0,True,2.50,2.50,0.25,4.50,7.75,5.0,entropy,True
2,1,arose,bosom,3.0,2.0,2.0,3.0,True,2.50,2.50,0.50,2.50,4.50,3.0,entropy,True
4,1,arose,pluck,3.0,2.0,1.0,4.0,True,2.00,3.00,0.67,2.00,7.33,4.0,entropy,True
6,1,arose,spelt,3.0,2.0,1.0,4.0,True,2.00,3.00,1.50,3.00,3.00,3.0,entropy,True
8,1,arose,merit,3.0,2.0,2.0,3.0,True,2.50,2.50,0.50,3.50,3.50,3.0,entropy,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4608,1,arose,cloud,3.0,2.0,2.0,3.0,True,1.67,3.33,2.33,0.00,6.00,4.0,entropy,True
4610,1,arose,vista,3.0,2.0,2.0,3.0,True,2.00,3.00,0.67,4.33,5.00,4.0,entropy,True
4612,1,arose,friar,3.0,2.0,2.0,3.0,True,2.50,2.50,1.00,2.00,4.00,3.0,entropy,True
4614,1,arose,truer,3.0,2.0,2.0,3.0,True,2.33,2.67,2.00,1.00,5.67,4.0,entropy,True


In [297]:
arose_df = sims_df.query("first_guess == 'arose' & valid_success == True & bias == None")
print(round(adore_df['num_guesses'].mean(), 2))
adore_df

3.88


Unnamed: 0,game,first_guess,target_word,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,num_guesses,bias,valid_success
4618,1,adore,comic,3.0,2.0,2.0,3.0,True,2.33,2.67,1.00,2.33,6.67,4.0,True,True
4619,1,adore,comic,3.0,2.0,2.0,3.0,True,3.00,2.00,0.00,1.00,4.00,2.0,False,True
4620,1,adore,bosom,3.0,2.0,2.0,3.0,True,2.50,2.50,0.50,1.00,6.00,3.0,True,True
4621,1,adore,bosom,3.0,2.0,2.0,3.0,True,2.33,2.67,1.00,2.33,5.67,4.0,False,True
4622,1,adore,pluck,3.0,2.0,1.0,4.0,True,2.00,3.00,1.00,1.33,7.67,4.0,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9231,1,adore,friar,3.0,2.0,2.0,3.0,True,2.33,2.67,1.67,3.33,4.67,4.0,False,True
9232,1,adore,truer,3.0,2.0,2.0,3.0,True,2.33,2.67,1.67,3.00,4.67,4.0,True,True
9233,1,adore,truer,3.0,2.0,2.0,3.0,True,2.25,2.75,1.00,4.25,7.00,5.0,False,True
9234,1,adore,fault,3.0,2.0,2.0,3.0,True,2.00,3.00,0.50,2.00,5.00,3.0,True,True


In [301]:
opera_df = sims_df.query("first_guess == 'arose' & valid_success == True & bias == 'entropy'")
print(round(opera_df['num_guesses'].mean(), 2))
opera_df

3.71


Unnamed: 0,game,first_guess,target_word,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,num_guesses,bias,valid_success
0,1,arose,comic,3.0,2.0,2.0,3.0,True,2.50,2.50,0.25,4.50,7.75,5.0,entropy,True
2,1,arose,bosom,3.0,2.0,2.0,3.0,True,2.50,2.50,0.50,2.50,4.50,3.0,entropy,True
4,1,arose,pluck,3.0,2.0,1.0,4.0,True,2.00,3.00,0.67,2.00,7.33,4.0,entropy,True
6,1,arose,spelt,3.0,2.0,1.0,4.0,True,2.00,3.00,1.50,3.00,3.00,3.0,entropy,True
8,1,arose,merit,3.0,2.0,2.0,3.0,True,2.50,2.50,0.50,3.50,3.50,3.0,entropy,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4608,1,arose,cloud,3.0,2.0,2.0,3.0,True,1.67,3.33,2.33,0.00,6.00,4.0,entropy,True
4610,1,arose,vista,3.0,2.0,2.0,3.0,True,2.00,3.00,0.67,4.33,5.00,4.0,entropy,True
4612,1,arose,friar,3.0,2.0,2.0,3.0,True,2.50,2.50,1.00,2.00,4.00,3.0,entropy,True
4614,1,arose,truer,3.0,2.0,2.0,3.0,True,2.33,2.67,2.00,1.00,5.67,4.0,entropy,True


In [289]:
opera_df = sims_df.query("first_guess == 'opera' & valid_success == True & bias == None")
print(round(opera_df['num_guesses'].mean(), 2))
opera_df

nan


Unnamed: 0,game,first_guess,target_word,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,num_guesses,bias,valid_success
