In [1]:
!pip install english-words



In [2]:
from enum import Enum, auto

import numpy as np
import pandas as pd

from english_words import english_words_lower_alpha_set

class color(Enum):
    green = auto()
    orange = auto()
    red = auto()

In [28]:
def get_word_scores(word_set, freq_dict):
    """This function creates a dictionary with english words as keys numeric 'word scores' as values.
    
    Python dictionaries preserve the order by which key-value pairs are inserted. \
    By first creating the (word, score) pairs as an iterable, we can sort them by descending score order.
    Following this, we can preserve this order when inserting them into the dictionary.
    This means we have the best of both worlds:
    
    A sorted collection, which means we can easily select the word with the best score to be our guess each round
    
    A hash-map structure with constant-time deletion of key-value pairs (which will be happening VERY frequently as we rule out impossible words).
    
    Args:
        - word_set: a set of the english words to be scored
        - freq_dict: a dictionary of the value associated with each alphabetical character.
        
    Returns:
        - word_scores: a dictionary with english words as keys numeric 'word scores' as values, inserting in order of descending point-value.   
    """
    
    # Create the score-word pairs
    score_word_pairs = list()
    for word in word_set:
        word_score = 0
        used_chars = set()
        for char in word:
            # A character should only contribute to a word score if:
                #  The character has a score in the freq_dict
                #  AND
                #  The character has not been used in this word before (we don't want to encourage repeating letters)
            if char in freq_dict and char not in used_chars: # Only count the characters that are in the frequency dict
                word_score += freq_dict[char]
                used_chars.add(char)
        score_word_pairs.append((word_score, word))
    
    # Add the pairs to the dictionary in descending score order.
    word_scores = {word : round(score,2) for score, word in sorted(score_word_pairs, reverse = True)}
        
    return word_scores

In [29]:
def prune_word_scores(char, pos, color, word_scores):  
    to_remove = set()
    
    for word in word_scores:
        # If the color is red, the char is not in the word. 
        if color == color.red:
            # Any word with the char should be removed.
            if char in word:
                to_remove.add(word)
        # If the color is orange, the char is in the word, but not at the current position.
        elif color == color.orange:
            # Any word with the char in the current position should be removed. 
            if char == word[pos]:
                to_remove.add(word)
        # If the color is green, the char is in the correct position.
        else: # color is green
            # Any word with a different char in this position should be removed.
            if char != word[pos]:
                to_remove.add(word)
    # Remove all the words that meet the above conditions.
    for word in to_remove:
        del word_scores[word]

In [30]:
def make_guess(guess_word, target_word):
    
    # intialise a 1D numpy array with 5 values to hold the guess result
    result = np.empty(5, dtype = object)
    
    for i, (guess_char, target_char) in enumerate(zip(guess_word.lower(), target_word.lower())):
        if guess_char == target_char: # green if chars match
            char_color = color.green
        elif guess_char in target_word: # orange if char is in word (but not position match)
            char_color = color.orange
        else:
            char_color = color.red # red if above conditions not met.
        
        result[i] = char_color
        
    return result    

In [31]:
def get_next_guess(word_scores):
    # creating an iterable just to grab the first element seems wasteful, 
    # but I'm not sure how to do it any other way. Suggestions welcome!
    return next(iter(word_scores))

In [32]:
def check_target_match(guess_result):
    return (np.array(guess_result) == color.green).all()

In [33]:
wordle_words = {word for word in english_words_lower_alpha_set if len(word) == 5}
# Read in the proportion of letter occurance. https://www.lexico.com/explore/which-letters-are-used-most
freq = pd.read_csv('letter_frequency.csv')

# Strip the percentage sign from the "Percentage" column, and cast to floating point number.
freq.Percentage = pd.Series([float(val.strip("%")) for val in freq.Percentage])

# Convert the pandas dataframe into a frequency dictionary. Keys are the letters, values are the proportions.
freq_dict = dict()
for _, (letter, percentage) in freq.iterrows():
    freq_dict[letter.lower()] = percentage

In [34]:
word_scores = get_word_scores(wordle_words, freq_dict)

In [35]:
target_word = np.random.choice(list(word_scores.keys()))

In [36]:
target_word

'avail'

In [26]:
while True:
    # Get next 'best guess'
    guess_word = get_next_guess(word_scores)
    
    print("Best next guess is:", guess_word.upper())
    
    
    print("r = red", "o = orange", "g = green", sep = "\n")
    
    guess_result = np.zeros(5, dtype = object)
    for i in range(5):
        color_input = None
        while color_input not in {"r", "o", "g"}:
            color_input = input(f"\'{guess_word[i].upper()}\' in position {i + 1} was (r)ed, (o)range, (g)reen:    ")
        if color_input == "r":
            guess_result[i] = color.red
        elif color_input == "o":
            guess_result[i] = color.orange
        else:
            guess_result[i] = color.green
        
    # Make the guess, and get the resulting colors 
    
    
    # Check if the guess is a full match (all green)
    if check_target_match(guess_result):    
        final_word = guess_word
        break
    else:
        for i, (color, guess_char) in enumerate(zip(guess_result, guess_word)):
            prune_word_scores(char = guess_char, pos = i, color = color, word_scores = word_scores)

Best next guess is: IRATE
r = red
o = orange
g = green
'I' in position 1 was (r)ed, (o)range, (g)reen:    r
'R' in position 2 was (r)ed, (o)range, (g)reen:    r
'A' in position 3 was (r)ed, (o)range, (g)reen:    o
'T' in position 4 was (r)ed, (o)range, (g)reen:    r
'E' in position 5 was (r)ed, (o)range, (g)reen:    r
Best next guess is: SLOAN
r = red
o = orange
g = green
'S' in position 1 was (r)ed, (o)range, (g)reen:    r
'L' in position 2 was (r)ed, (o)range, (g)reen:    o
'O' in position 3 was (r)ed, (o)range, (g)reen:    r
'A' in position 4 was (r)ed, (o)range, (g)reen:    o
'N' in position 5 was (r)ed, (o)range, (g)reen:    r
Best next guess is: CULPA
r = red
o = orange
g = green
'C' in position 1 was (r)ed, (o)range, (g)reen:    r
'U' in position 2 was (r)ed, (o)range, (g)reen:    o
'L' in position 3 was (r)ed, (o)range, (g)reen:    o
'P' in position 4 was (r)ed, (o)range, (g)reen:    r
'A' in position 5 was (r)ed, (o)range, (g)reen:    o
Best next guess is: LAUGH
r = red
o = or

In [None]:
while True:
    # Get next 'best guess'
    guess_word = get_next_guess(word_scores)
    print(guess_word)
    # Make the guess, and get the resulting colors 
    guess_result = make_guess(guess_word, target_word)
    
    # Check if the guess is a full match (all green)
    if check_target_match(guess_result):    
        final_word = guess_word
        break
    else:
        for i, (color, guess_char) in enumerate(zip(guess_result, guess_word)):
            prune_word_scores(char = guess_char, pos = i, color = color, word_scores = word_scores)