In [None]:
input = open('data/CSW19.txt')
out = open('data/CSW19-5.txt', 'w')

out.write("".join([line for line in input if len(line.strip()) == 5]))

input.close()
out.close()

In [3]:
import random as random
from common_words import common_words
input = open('data/CSW19-5.txt')

class Game:
  __all_words = [line.strip().lower() for line in input]
  def __init__(self, word = None, max_guesses = 6):
    self.__max_guesses = max_guesses
    self.__guesses = 0
    self.__status = 'in progress'
    if not word:
      self.__word = self.__choose_random_start()
    else:
      if word.lower() not in self.__all_words:
        print('Word must be in dictionary')
        return
      self.__word = word

  def __choose_random_start(self):
    return common_words[random.randint(0, len(common_words)-1)].lower()

  def get_status(self):
    return self.__status

  def get_guesses(self):
    return self.__guesses

  def get_word(self):
    return self.__word

  def evaluate_guess(self, guess):
    lower_guess = guess.lower()
    if self.__status != 'in progress':
      return { 'outcome': 'fail', 'reason': 'Game is over'}
    if lower_guess not in self.__all_words:
      return { 'outcome': 'fail', 'reason': 'Word is not in dictionary'}
    self.__guesses += 1

    answer_array = list(self.__word)
    guess_array = list(lower_guess)
    # start with all gray
    feedback = [(letter, 'gray') for letter in guess_array]

    # check for greens
    for idx, letter in enumerate(guess_array):
      if self.__word[idx] == letter: # add green
        feedback[idx] = (letter, 'green')
        answer_array[idx] = None # remove from answer array
        guess_array[idx] = '-' # Don't evaluate this one again

    # check for yellows
    for idx, letter in enumerate(guess_array):
      if letter in answer_array:
        feedback[idx] = (letter, 'yellow')
        answer_array[answer_array.index(letter)] = None # remove from answer array
        guess_array[idx] = '-' # remove from guess array

    # win
    if [outcome for (letter, outcome) in feedback] == ['green', 'green', 'green', 'green', 'green']:
      self.__status = 'win'
    elif self.__guesses >= self.__max_guesses: # lost
      self.__status = 'loss'
    return { 'outcome': 'success', 'status': self.__status, 'feedback': feedback }

In [4]:
import json

with open('./data/freq_map.json') as json_file:
    data = json.load(json_file)

sorted_data = sorted(data.items(), key=lambda x: x[1], reverse=True)

# method 1: hard cutoff
percentile = 0.45
quantile = int(percentile*len(sorted_data))
freq_data = {word: 1 for (word, freq) in sorted_data[:quantile]}
reduced = {word: 0.2 for (word, freq) in sorted_data[quantile:]}
freq_data.update(reduced)

# method 2: scaled
scaled_data = {word: (len(sorted_data) - index)/len(sorted_data) for (index, (word, freq)) in enumerate(sorted_data)}

In [11]:
from common_words import common_words
import pandas as pd
import json
from freq_map_cutoff import freq_data, scaled_data

input = open('data/CSW19-5.txt')
all_words = [line.strip().lower() for line in input]
with open('./data/freq_map.json') as json_file:
    data = json.load(json_file)
df = pd.read_csv("./data/Wordle letter frequencies.txt", sep = '\t')

def filter_words(eligible_words, feedback):
    eligible = eligible_words
    # filter words with green matches
    for (position, letter_feedback) in enumerate(feedback):
        (letter, color) = letter_feedback
        if(color == 'green'):
            eligible = { word: [None if i == position else l for i, l in enumerate(letters)] for (word, letters) in eligible.items() if letters[position] == letter}

    # filter based on yellows
    for (position, letter_feedback) in enumerate(feedback):
        (letter, color) = letter_feedback
        if(color == 'yellow'):
            new_eligible = {}
            for (word, letters) in eligible.items():
                if letter in letters and letters[position] != letter:
                    letters[letters.index(letter)] = None
                    new_eligible[word] = letters
            eligible = new_eligible

    # filter based on grays
    for (position, letter_feedback) in enumerate(feedback):
        (letter, color) = letter_feedback
        if(color == 'gray'):
            eligible = { word: letters for (word, letters) in eligible.items() if letter not in letters}
    return eligible

# based on frequency of remaining letters
def get_word_by_letter_freq(potential_words, past_guesses):
    letter_counts = {}
    multipliers = scaled_data
    for word in potential_words:
        multiplier = multipliers[word]
        for letter in word:
            letter_counts[letter] = letter_counts.get(letter, 0)+multiplier
    # get letters we've guessed already
    guessed_letters = []
    for guess in past_guesses:
        for (letter, color) in guess:
            guessed_letters.append(letter)
    guessed_letters = list(set(guessed_letters))
    # remove letters we've already guessed
    for letter in guessed_letters:
        if letter in letter_counts:
            del letter_counts[letter]

    # for each potential word, get a freq score
    scores = { word: sum([letter_counts.get(letter, 0) for letter in set(list(word))])*multipliers[word] for word in potential_words }
    (word, count) = sorted(scores.items(), key=lambda x: x[1], reverse=True)[0]

    return word

# based on frequency in english dictionary
def get_word_by_freq(potential_words):
    better_words = {word: data[word] for word in potential_words}
    better_words = sorted(better_words.items(), key=lambda x: x[1], reverse=True)
    (guess, frequencies) = better_words[0]
    return guess


def play_game(starting_word = None, show_output = False):
    game = Game()
    potential_words = {word: list(word) for word in all_words}
    guesses = []
    last_turn = None
    while game.get_status() == 'in progress':
        show_output and print(f'DOING TURN {game.get_guesses()+1}')
        potential_words = {word: list(word) for word in potential_words.keys()} # reset
        if last_turn and last_turn['feedback']:
            potential_words = filter_words(potential_words, last_turn['feedback'])
        if starting_word and game.get_guesses() == 0:
            guess = starting_word
        else:
            #
            # Here is where we can adjust the choosing method
            # We want to try different strategies and compare the results
            #
            # Naive method - choose first word remaining
            # guess = list(potential_words.keys())[0]
            #
            # Choose based on English word frequency
            # guess = get_word_by_freq(list(potential_words.keys()))
            #
            # Choose based on remaining letter frequency (maximize words selected)
            guess = get_word_by_letter_freq(list(potential_words.keys()), guesses)
            #
            # Do a hybrid approach:
            # if game.get_guesses() <= 3:
            #     guess = get_word_by_letter_freq(list(potential_words.keys()), guesses)
            # else:
            #     guess = get_word_by_freq(list(potential_words.keys()))
        show_output and print(f'Guessing {guess}')
        last_turn = game.evaluate_guess(guess)
        if last_turn['outcome'] == 'success':
            guesses.append(last_turn['feedback'])
        else: 
            # try again with another word
            show_output and print(last_turn['reason'])
            guess = potential_words[0]

    if(show_output):
        if game.get_status() == 'win':
            print(f'Woohoo, \U0001F973 we won in {game.get_guesses()} turns with word {guess}')
        elif game.get_status() == 'loss':
            print(f'Noooo \U0001F616 we lost after {game.get_guesses()} turns')
            print(f'The word was {game.get_word()}')
            print(f'The guesses were {guesses}')
        else:
            print(f'\U0001F450 I don\'t know what happened')
        
    return game


In [12]:
play_game(show_output=True)

hello
DOING TURN 1
Guessing rates
DOING TURN 2
Guessing along
DOING TURN 3
Guessing claim
DOING TURN 4
Guessing flaky
Woohoo, 🥳 we won in 4 turns with word flaky


<__main__.Game at 0x11a0364f0>

In [13]:

def simulate(trials = 1000, starting_word = None, show_output = False):
    wins = 0
    total_guesses = 0
    for i in range(0,trials):
        game = play_game(starting_word, show_output)
        if game.get_status() == 'win':
            wins += 1
        total_guesses += game.get_guesses()

    p_wins = wins/trials
    print(f'Won {p_wins*100}% of {trials} games, averaging {total_guesses/trials} guesses per game')
    return p_wins

simulate(starting_word='stale')

hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hell

0.974