In [1]:
%load_ext line_profiler

In [9]:
from english_words import english_words_set
from wordfreq import word_frequency
from collections import Counter
import numpy as np
import random
ALPHABET = 'abcdefghijklmnopqrstuvwxyz'
WORD_LEN = 5
MIN_FREQ = 5e-7

In [10]:
def load_words(path):
    words = []
    with open(path) as file:
        for line in file:
            word = line.split('\n')[0]
            words.append(word)
    return(words)

wordles = load_words('wordles_shuffled')
print(random.sample(wordles, 10))
accepted_words = load_words('words')
accepted_words.extend(wordles)
accepted_words.sort()
print(random.sample(accepted_words, 10))

['drunk', 'barge', 'snare', 'grown', 'erase', 'enjoy', 'upset', 'pixel', 'mange', 'fuzzy']
['shops', 'punch', 'lemes', 'welch', 'bevue', 'baggy', 'gebur', 'neums', 'bowel', 'swore']


In [11]:
RESTRICT = lambda word: word.islower() and word.isalpha() and len(word) == WORD_LEN \
    and word_frequency(word, 'en') >= MIN_FREQ

def removeProperNouns(words):
    result = words.copy()
    for word in words:
        if word[0].isupper():
            result.remove(word)
    return result

print(random.sample(removeProperNouns(english_words_set), 10))

def restrictWordLength(words, length):
    result = words.copy()
    for word in words:
        if len(word) != length:
            result.remove(word)
    return result

print(random.sample(restrictWordLength(english_words_set, 5), 10))

def restrictSet(words, key):
    result = words.copy()
    for word in words:
        if not key(word):
            result.remove(word)
    return result

words = restrictSet(english_words_set, RESTRICT)
for word in random.sample(words, 15):
    print(f'{word}: {word_frequency(word, "en")}')

['toddy', 'giant', 'fail', 'nuclear', 'crocodile', 'bacterial', 'deepen', 'sixtyfold', 'churchwoman', 'beverage']
['probe', 'torso', 'idyll', 'Gregg', 'alert', 'endow', 'aster', 'Dhabi', 'flesh', 'sleet']
genie: 2.57e-06
hence: 2.4e-05
juror: 1.35e-06
supra: 1.2e-06
bound: 2.88e-05
thing: 0.000525
neath: 6.92e-07
stork: 7.59e-07
glory: 2.29e-05
fiend: 1.26e-06
booty: 3.89e-06
anode: 7.24e-07
crane: 8.32e-06
handy: 9.33e-06
brake: 8.91e-06


In [12]:
WEIGHTS = {word:word_frequency(word, 'en') for word in words}
def getLetterFrequencies(words, weighted=True):
    freqs, overall = [{letter: 0 for letter in ALPHABET} for i in range(5)], {letter: 0 for letter in ALPHABET}
    for word in words:
        weight = WEIGHTS[word] if weighted else 1
        for index, letter in enumerate(word):
            freqs[index][letter] += weight
            overall[letter] += weight
            
    return freqs, overall

In [13]:
ORDINALS = ['1st', '2nd', '3rd', '4th', '5th', '6th']
def printMaxIndexFreqs(freqs, overall):
    letters = sorted(ALPHABET, reverse=True, key=lambda letter: overall[letter])
    print(f'Overall frequency order:\t{"|".join(letters)}')
    
    for index, dic in enumerate(freqs):
        letters = sorted(ALPHABET, reverse=True, key=lambda letter: dic[letter] if letter in dic else 0)
        print(f'Frequency order for {ORDINALS[index]} letter:\t{"|".join(letters)}')

In [14]:
# print('Weighted by word frequency')
# freqs, overall = getFrequencies(words, weighted=True)
# printMaxIndexFreqs(freqs, overall)

print('Unweighted')
freqs, overall = getLetterFrequencies(wordles, weighted=False)
printMaxIndexFreqs(freqs, overall)

Unweighted
Overall frequency order:	e|a|r|o|t|l|i|s|n|c|u|y|d|h|p|m|g|b|f|k|w|v|z|x|q|j
Frequency order for 1st letter:	s|c|b|t|p|a|f|g|d|m|r|l|w|e|h|v|o|n|i|u|q|j|k|y|z|x
Frequency order for 2nd letter:	a|o|r|e|i|l|u|h|n|t|p|w|c|m|y|d|b|s|v|x|g|k|f|q|j|z
Frequency order for 3rd letter:	a|i|o|e|u|r|n|l|t|s|d|g|m|p|b|c|v|y|w|f|k|x|z|h|j|q
Frequency order for 4th letter:	e|n|s|a|l|i|c|r|t|o|u|g|d|m|k|p|v|f|h|w|b|z|x|y|j|q
Frequency order for 5th letter:	e|y|t|r|l|h|n|d|k|a|o|p|m|g|s|c|f|w|b|i|x|z|u|j|q|v


In [15]:
def satisfies(word, exact, contains, absent):
    word = list(word)
    for index, letter in exact:
        if word[index] != letter:
            return False
        word[index] = '_'
        
    for index, letter in contains:
        if (word[index] == letter or letter not in word):
            return False
        word[word.index(letter)] = '_'
        
    for index, letter in absent:
        if letter in word:
            return False
        
    return True

def score(words, guess, word, print_possible=False):
    exact, contains, absent = [], [], []
    if guess not in accepted_words:
        if print_possible:
            print(f'Warning: "{guess}" not in word list')
        
    counts = Counter(word)
    for index, letter in enumerate(guess):
        if word[index] == letter:
            exact.append((index, letter))
            counts[letter] -= 1
        elif letter in word and counts[letter] > 0:
            contains.append((index, letter))
            counts[letter] -= 1
        else:
            absent.append((index, letter))
            
    possible = set()
    for word in words:
        if satisfies(word, exact, contains, absent):
            possible.add(word)
    
    score = len(words) - len(possible)
    if print_possible:
        print(f'Green letters: {exact}')
        print(f'Yellow letters: {contains}')
        print(f'Gray letters: {absent}')
        print(f'All possible words ({len(possible)}): {possible}')
        print(f'Score ({len(words)}-{len(possible)})): {score}')
        
    return score

score(wordles, 'bread', 'bears', print_possible=True)

Green letters: [(0, 'b')]
Yellow letters: [(1, 'r'), (2, 'e'), (3, 'a')]
Gray letters: [(4, 'd')]
All possible words (4): {'baker', 'blare', 'baler', 'barge'}
Score (2315-4)): 2311


2311

In [16]:
def computeAverageScore(words, guess):
    sc = 0
    for word in words:
        sc += score(words, guess, word)
        
    return np.round(sc/len(words), 3)

# jank way of testing if saved_scores is a variable
try:
    print('rates' in saved_scores)
except:
    saved_scores = {}

In [17]:
def prof():
    computeAverageScore(wordles, 'rates')

%lprun -f satisfies prof()

UsageError: Line magic function `%lprun` not found.


In [None]:
%%time
computeAverageScore(wordles, 'raise')

In [79]:
%%time
def scoreGuesses(words, saved_scores, *args, list_all=False):
    best = 0
    for guess in args:
        if guess not in saved_scores:
            sc = computeAverageScore(words, guess)
            saved_scores[guess] = sc
        
        if saved_scores[guess] > best:
            best = saved_scores[guess]
            print(f'New best! Average score for {guess}: {saved_scores[guess]}')
        elif list_all:
            print(f'Average score for {guess}: {saved_scores[guess]}')

scoreGuesses(wordles, saved_scores,
    'llama', 'trace', 'cares', 'shear', 'thine', 'tears', 'stare', 
    'anode', 'caret', 'crane', 'crate', 'canoe', 'chart', 'raise', 'arise',
     list_all=True)

New best! Average score for llama: 1875.945
New best! Average score for trace: 2240.98
Average score for cares: 2233.197
Average score for shear: 2222.255
Average score for thine: 2196.207
Average score for tears: 2239.552
New best! Average score for stare: 2243.705
Average score for anode: 2210.205
Average score for caret: 2239.4
Average score for crane: 2236.258
Average score for crate: 2242.1
Average score for canoe: 2228.168
Average score for chart: 2174.498
New best! Average score for raise: 2253.999
Average score for arise: 2251.274
Wall time: 3.04 s


In [73]:
most_common = []
for i in range(27):
    most_common.append(set(list(sorted(ALPHABET, reverse=True, key=lambda letter: overall[letter]))[:i]))

print(most_common[8])

{'e', 't', 'a', 'o', 'r', 'i', 'l', 's'}


In [87]:
%%time
accepted = [word for word in accepted_words if 
            (len(set(word)) == len(word) and 
             sum((1 if letter in most_common[3] else 0) for letter in word) >= 2 and
             sum((1 if letter in most_common[7] else 0) for letter in word) >= 4)]

print(f'Scoring list of {len(accepted)} words')
scoreGuesses(wordles, saved_scores, *accepted, list_all=True)

Scoring list of 587 words
New best! Average score for abler: 2207.016
New best! Average score for ablet: 2207.399
New best! Average score for abore: 2217.535
Average score for abort: 2189.879
Average score for actor: 2206.383
New best! Average score for adore: 2225.957
New best! Average score for aeros: 2239.798
New best! Average score for aesir: 2245.117
Average score for afire: 2215.794
Average score for afore: 2215.746
Average score for afrit: 2178.626
Average score for after: 2204.993
Average score for agile: 2208.926
Average score for aglet: 2213.493
Average score for aider: 2221.125
Average score for aiery: 2227.965
Average score for ailed: 2213.014
Average score for aimer: 2218.682
Average score for aired: 2224.975
Average score for airth: 2202.855
Average score for airts: 2220.663
Average score for aisle: 2238.811
Average score for aiver: 2199.006
Average score for aizle: 2183.362
Average score for alder: 2220.688
Average score for aleft: 2190.658
Average score for alert: 2243.

Average score for pareo: 2225.469
Average score for parle: 2232.431
Average score for parol: 2206.219
Average score for parti: 2199.7
Average score for pater: 2225.271
Average score for pearl: 2223.892
Average score for peart: 2228.317
Average score for pelta: 2207.128
Average score for perai: 2217.452
Average score for peril: 2209.868
Average score for petal: 2209.122
Average score for petar: 2221.701
Average score for petri: 2200.811
Average score for piert: 2205.581
Average score for pieta: 2201.643
Average score for pilae: 2216.028
Average score for pilar: 2201.492
Average score for pilea: 2208.53
Average score for piler: 2206.888
Average score for plate: 2225.709
Average score for pleat: 2208.683
Average score for plier: 2211.053
Average score for polar: 2203.578
Average score for poler: 2209.037
Average score for porae: 2229.729
Average score for poral: 2201.039
Average score for porta: 2196.063
Average score for potae: 2225.527
Average score for prate: 2233.5
Average score for p

Average score for ureal: 2228.238
Average score for urial: 2216.774
Average score for urite: 2228.606
Average score for uteri: 2205.335
Average score for vaire: 2217.475
Average score for valet: 2200.91
Average score for valor: 2177.529
Average score for velar: 2193.061
Average score for viler: 2178.306
Average score for viral: 2172.162
Average score for vireo: 2180.356
Average score for vitae: 2197.379
Average score for volae: 2194.357
Average score for volar: 2173.19
Average score for voter: 2180.805
Average score for waite: 2218.272
Average score for waler: 2207.927
Average score for walie: 2210.343
Average score for water: 2210.474
Average score for wrate: 2220.488
Average score for write: 2202.582
Average score for wrote: 2198.754
Average score for yarto: 2206.159
Average score for zaire: 2211.129
Average score for zoeal: 2172.317
Wall time: 12min 9s


In [88]:
scores_list = [(word, score) for word, score in saved_scores.items()]
scores_list.sort(key=lambda x: x[1], reverse=True)
scores_list[:20]

[('roate', 2254.575),
 ('raise', 2253.999),
 ('raile', 2253.669),
 ('soare', 2252.699),
 ('arise', 2251.274),
 ('irate', 2251.221),
 ('orate', 2251.109),
 ('ariel', 2249.712),
 ('arose', 2248.979),
 ('raine', 2247.944),
 ('artel', 2247.504),
 ('taler', 2247.263),
 ('ratel', 2245.157),
 ('aesir', 2245.117),
 ('arles', 2245.109),
 ('realo', 2245.052),
 ('alter', 2245.008),
 ('later', 2244.777),
 ('oater', 2243.755),
 ('salet', 2243.728)]

In [89]:
def save_scores(path, scores_list):
    words = []
    with open(path, 'w') as file:
        for word, score in scores_list:
            file.write(f'{word} {score}\n')

save_scores('guess_scores_linear.txt', scores_list)

In [28]:
BASIC_RESTRICTION = lambda word: word.islower() and word.isalpha() and len(word) == WORD_LEN
RESULTS_RESTRICTION = lambda word: BASIC_RESTRICTION(word) and sum((1 if letter in 'eca' else 0) for letter in word) == 5
GUESS_RESTRICTION = lambda word: BASIC_RESTRICTION(word) and word in accepted_words

In [29]:
# e = exact, c = contains, a = absent
def solver(wordles, accepted_words):
    words = accepted_words.copy()
    
    for guess_count in range(6):
        guess = ''

        while not GUESS_RESTRICTION(guess):
            guess = input(f'{ORDINALS[guess_count]} guess: ')
            
            if len(words) == 1 and guess == next(iter(words)):
                print(f'Congratulations, you won in {guess_count + 1} turns!')
                return
            
        results = ''
        while not RESULTS_RESTRICTION(results):
            results = input('Result (ex. caace): ')
            
        exact, contains, absent = [], [], []

        for index, result in enumerate(results):
            letter = guess[index]
            if result == 'e':
                exact.append((index, letter))
            elif result == 'c':
                contains.append((index, letter))
            elif result == 'a':
                absent.append((index, letter))
                
        # print(f'Green letters: {exact}')
        # print(f'Yellow letters: {contains}')
        # print(f'Gray letters: {absent}')
        
        if len(exact) == 5 and guess in words:
            print(f'Congratulations, you won in {guess_count + 1} turns!')
            return
        elif len(exact) == 5:
            print(f'Invalid input, please restart.')
            return
        
        possible = set()
        for word in words:
            if satisfies(word, exact, contains, absent):
                possible.add(word)
                
        if len(possible) == 0:
            print(f'Invalid input, please restart.')
            return
        print(f'Possible words ({len(possible)}): {possible}\n')
        words = possible
        
    print('You lost :(')

In [25]:
solver(wordles, accepted_words)

NameError: name 'GUESS_RESTRICTION' is not defined

In [30]:
GREEN_STYLE = '\033[92m'
YELLOW_STYLE = '\033[33m'
GRAY_STYLE = '\033[30m'

def wordle_start():
    print('Welcome to Wordle!')
    response = ''
    while response.lower() not in ['0', '1', '2']:
        response = input('What assistance level would you like? (0/1/2): ')
        
    return int(response)

def play_main(wordles, accepted_words, wordle=None, assistance=None):
    if assistance is None:
        assistance = wordle_start()
        
    if wordle is None:
        wordle = random.choice(wordles)
        
    words = accepted_words.copy()
    wordle_print = ['\033[30m_'] * 5
    overall_exact_indices, overall_contains, overall_absent = set(), {}, set()
    guesses = []
    
    for guess_count in range(6):
        guess = ''

        while not GUESS_RESTRICTION(guess):
            guess = input(f'{ORDINALS[guess_count]} guess: ')
            
            if guess == wordle:
                print(f'Congratulations, you won in {guess_count + 1} turns!')
                return
        
        counts = Counter(wordle)
        exact, contains, absent = [], [], []
        colors = []
        indices = list(range(5))
        
        contains_counts = {}
        absent_set = set()
        
        # green handling
        i = 0
        while i < len(indices):
            index = indices[i]
            letter = guess[index]
            if wordle[index] == letter:
                exact.append((index, letter))
                counts[letter] -= 1
                indices.pop(i)
                
                overall_exact_indices.add(index)
                contains_counts[letter.upper()] = contains_counts[letter.upper()] + 1 if \
                    letter.upper() in contains_counts else 1
                
                stylized_letter = GREEN_STYLE + letter.upper()
                colors.append((index, stylized_letter))
                wordle_print[index] = stylized_letter
            else:
                i += 1
        
        # yellow handling
        i = 0
        while i < len(indices):
            index = indices[i]
            letter = guess[index]
            if letter in wordle and counts[letter] > 0:
                contains.append((index, letter))
                counts[letter] -= 1
                indices.pop(i)
                
                contains_counts[letter.upper()] = contains_counts[letter.upper()] + 1 if \
                    letter.upper() in contains_counts else 1
                
                stylized_letter = YELLOW_STYLE + letter.upper()
                colors.append((index, stylized_letter))
            else:
                i += 1

        # gray handling
        for index in indices:
            letter = guess[index]
            absent.append((index, letter))
            absent_set.add(letter.upper())
            
            stylized_letter = GRAY_STYLE + letter.upper()
            colors.append((index, stylized_letter))
        
        # printing basic assistance
        if assistance > 0:
            overall_exact = {}
            for idx in overall_exact_indices:
                letter = wordle[idx]
                overall_exact[letter.upper()] = overall_exact[letter.upper()] + 1 if \
                    letter.upper() in overall_exact else 1
                
            absent_set -= contains_counts.keys()
            absent_set -= overall_exact.keys()
            
            overall_contains = {key:max(overall_contains[key] if key in overall_contains else -np.inf,
                contains_counts[key] if key in contains_counts else -np.inf) for key in
                   set(list(overall_contains.keys())).union(contains_counts.keys())}
            
            overall_contains_copy = overall_contains.copy()
            for letter in overall_exact:
                if letter in overall_contains_copy:
                    overall_contains_copy[letter] -= overall_exact[letter]
                    if overall_contains_copy[letter] == 0:
                        del overall_contains_copy[letter]
            
            overall_absent = overall_absent.union(absent_set)
        
        guesses.append(''.join(map(lambda x: x[1], sorted(colors, key=lambda x: x[0]))))
        
        print('\033[1m')
        if assistance > 0:
            remaining = (set(ALPHABET.upper()) - overall_absent) - overall_contains.keys()
            print(f'{"".join(wordle_print)}\t{YELLOW_STYLE}Required: {overall_contains_copy}\t{GRAY_STYLE}Remaining: {sorted(remaining) if len(remaining) > 0 else "{}"}')
        for g in guesses:
            print(g)
        print('\033[0m')
        
        if assistance > 1:
            possible = set()
            for word in words:
                if satisfies(word, exact, contains, absent):
                    possible.add(word)

            print(f'Possible words ({len(possible)}): {possible}\n')
            words = possible
        
    print(f'You lost :(\nWordle: {wordle}')
    
def play(wordles, accepted_words):
    assistance = wordle_start()
    playagain = True
    while playagain:
        play_main(wordles, accepted_words, assistance=assistance)
        
        response = ''
        while response.lower() not in ['y', 'n', 'yes', 'no']:
            response = input('Would you like to play again? (y/n): ')

        playagain = (response.lower()[0] == 'y')
        print()

In [33]:
play(wordles, accepted_words)

Welcome to Wordle!
What assistance level would you like? (0/1/2): 2
1st guess: raise
[1m
[30m_[30m_[30m_[30m_[30m_	[33mRequired: {'I': 1}	[30mRemaining: ['B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
[30mR[30mA[33mI[30mS[30mE
[0m
Possible words (449): {'mingy', 'tumid', 'willy', 'tigon', 'biggy', 'duomi', 'picky', 'cibol', 'lotic', 'dicty', 'kinky', 'ninth', 'indol', 'pinko', 'licit', 'gippo', 'incog', 'humid', 'until', 'ictic', 'ninny', 'lipid', 'zinco', 'mucid', 'incut', 'witch', 'zilch', 'motif', 'yogin', 'zombi', 'pitch', 'villi', 'nihil', 'filmy', 'milky', 'mooli', 'ontic', 'tight', 'kindy', 'unfix', 'dicky', 'windy', "zymic'", 'kibbi', 'lindy', 'pilum', 'culti', 'pinup', 'wingy', 'input', 'bight', 'ticky', 'kimbo', 'tippy', 'finch', 'comix', 'titch', 'mitch', 'pubic', 'middy', 'kylix', 'mufti', 'bluid', 'livid', 'jingo', 'pudic', 'cinch', 'ogmic', 'piggy', 'bocci', 'pilow', 'bolix', 'picot', 'bidon', 'ovoli'

KeyboardInterrupt: Interrupted by user