In [18]:
import re
# Five letters, not a proper noun
wordle_word = re.compile(r"^[a-z]....$")

In [24]:
import urllib.request
# Wordle word list - 2300 words
url = "https://gist.githubusercontent.com/cfreshman/a03ef2cba789d8cf00c08f767e0fad7b/raw/5d752e5f0702da315298a6bb5a771586d6ff445c/wordle-answers-alphabetical.txt"
# BSD word list - 8500 words
#url = "https://web.mit.edu/freebsd/head/share/dict/web2"
with urllib.request.urlopen(url) as w:
    words = [word for word in w.read().decode("utf-8").splitlines() if wordle_word.match(word)]

In [28]:
import random
i = random.randrange(len(words))
correct = words[i]
print(f"{i}: {correct}")

124: augur


In [29]:
quiet = False

In [30]:
def wordle_round(guess, correct):
    result = ""
    for i in range(5):
        if guess[i] == correct[i]:
            result = result + "+"
        elif guess[i] in correct:
            result = result + "o"
        else:
            result = result + "-"
    return result

In [31]:
def process_result(guess, result, exact, exactnt, must, mustnt):
    for i in range(5):
        if result[i] == "+":
            exact[i] = guess[i]
            must.add(guess[i])
        elif result[i] == "o":
            exactnt[i].add(guess[i])
            must.add(guess[i])
        else:
            mustnt.add(guess[i])
    return exact, exactnt, must, mustnt

In [32]:
from tqdm.notebook import trange
def next_guess(exact, exactnt, must, mustnt, guesses):
    combined = [
        exact[i] if exact[i] != "." else
        "[^" + "".join(exactnt[i]) + "]" if exactnt[i] else
        "."
        for i in range(5)
    ]
    exactly = re.compile("^" + "".join(combined) + "$")
    filtered_words = [
        word
        for word in words
        if word not in guesses and \
            exactly.match(word) and \
            all(letter in word for letter in must) and \
            not any(letter in word for letter in mustnt)
    ]
    scores = { word: [0, 0] for word in filtered_words }
    if quiet:
        my_range = range
    else:
        my_range = trange
    for i in my_range(len(filtered_words)):
        test = filtered_words[i]
        for j in range(i + 1, len(filtered_words)):
            target = filtered_words[j]
            if any(letter in target for letter in test if letter not in must):
                overlap = len(set(test).intersection(set(target)))
                scores[test][0] += 1
                scores[test][1] += overlap
                scores[target][0] += 1
                scores[target][1] += overlap

    # Round 1 of scoring: prefer words that get us the most info.
    # We get info from each letter of overlap, or if there is no overlap.
    # Since score[0] is the number of words with overlap, 
    # remaining - score[0] is the number of words with no overlap.
    max_score = -1
    guess = ""
    remaining = len(filtered_words)
    matching = set()
    for word, score in scores.items():
        adjusted_score = score[1] + remaining - score[0]
        if adjusted_score > max_score:
            max_score = adjusted_score
            guess = word
            matching=set([word])
        elif adjusted_score == max_score:
            matching.add(word)

    # Round 2 of scoring: prefer less letter overlap
    # Among the highest scoring words, prefer those that overlap less.
    max_score = 0
    for word in matching:
        adjusted_score = -scores[word][0]
        if adjusted_score > max_score:
            max_score = adjusted_score
            guess = word
    if not quiet:
        print(matching)
        print(f"{scores[guess]}")
    return guess, remaining

In [33]:
def starting_word():
    # Run this when changing next_guess to re-compute an optimal initial guess
    exact = ["."] * 5
    exactnt = [set(), set(), set(), set(), set()]
    must = set()
    mustnt = set()
    guesses = []
    guess, outof = next_guess(exact, exactnt, must, mustnt, guesses)
    print(f"Initial guess: {guess} out of {outof}")
    return guess, outof

In [39]:
def play(correct):
    #guess = "ourie"
    #guess = "irate"
    #guess = "rutic"
    #guess = "arose"
    guess = "stare"
    exact = ["."] * 5
    exactnt = [set(), set(), set(), set(), set()] # can't use [set()] * 5
    must = set()
    mustnt = set()
    guesses = []
    r = 1
    while r < 100:
        if not quiet:
            print(f"round {r}:")
            print(f"{guess}")
        result = wordle_round(guess, correct)
        if not quiet:
            print(f"{result}")
        if result == "+++++":
            break
        r += 1
        guesses.append(guess)
        exact, exactnt, must, mustnt = process_result(guess, result, exact, exactnt, must, mustnt)
        if not quiet:
            print("required   letters: " + "".join(must))
            print("prohibited letters: " + "".join(mustnt))
            print("required   pattern: " + "".join(exact))
            print("prohibited pattern: " + ", ".join("".join(letters) for letters in exactnt))
        guess, outof = next_guess(exact, exactnt, must, mustnt, guesses)
        if not quiet:
            print(f"{outof} words remaining")
            print()
    return guess, r

In [35]:
play("water")

round 1:
irate
-oooo
required   letters: tare
prohibited letters: i
required   pattern: .....
prohibited pattern: , r, a, t, e


  0%|          | 0/14 [00:00<?, ?it/s]

{'later', 'alter', 'alert'}
[2, 10]
14 words remaining

round 2:
alert
o-ooo
required   letters: tare
prohibited letters: il
required   pattern: .....
prohibited pattern: a, r, ae, tr, te


  0%|          | 0/7 [00:00<?, ?it/s]

{'eater', 'hater', 'taker', 'tamer', 'cater', 'taper', 'water'}
[0, 0]
7 words remaining

round 3:
cater
-++++
required   letters: tare
prohibited letters: cil
required   pattern: .ater
prohibited pattern: a, r, ae, tr, te


  0%|          | 0/3 [00:00<?, ?it/s]

{'eater', 'hater', 'water'}
[0, 0]
3 words remaining

round 4:
eater
o++++
required   letters: tare
prohibited letters: cil
required   pattern: .ater
prohibited pattern: ae, r, ae, tr, te


  0%|          | 0/2 [00:00<?, ?it/s]

{'hater', 'water'}
[0, 0]
2 words remaining

round 5:
hater
-++++
required   letters: tare
prohibited letters: chil
required   pattern: .ater
prohibited pattern: ae, r, ae, tr, te


  0%|          | 0/1 [00:00<?, ?it/s]

{'water'}
[0, 0]
1 words remaining

round 6:
water
+++++


('water', 6)

In [None]:
guess = "irate"
exact = ["."] * 5
exactnt = [set(), set(), set(), set(), set()] # can't use [set()] * 5
must = set()
mustnt = set()
guesses = [ "ental" ]
r = 1
result = "--ooo"
guesses.append(guess)
exact, exactnt, must, mustnt = process_result(guess, result, exact, exactnt, must, mustnt)
print("required   letters: " + "".join(must))
print("prohibited letters: " + "".join(mustnt))
print("required   pattern: " + "".join(exact))
print("prohibited pattern: " + ", ".join("".join(letters) for letters in exactnt))
guess, outof = next_guess(exact, exactnt, must, mustnt, guesses)
print(guess)
print(f"{outof} words remaining")


required   letters: aet
prohibited letters: ir
required   pattern: .....
prohibited pattern: , , a, t, e
laten
97 words remaining


In [None]:
result = "oooo-"
guesses.append(guess)
exact, exactnt, must, mustnt = process_result(guess, result, exact, exactnt, must, mustnt)
print("required   letters: " + "".join(must))
print("prohibited letters: " + "".join(mustnt))
print("required   pattern: " + "".join(exact))
print("prohibited pattern: " + ", ".join("".join(letters) for letters in exactnt))
guess, outof = next_guess(exact, exactnt, must, mustnt, guesses)
print(guess)
print(f"{outof} words remaining")


required   letters: alet
prohibited letters: nir
required   pattern: .....
prohibited pattern: l, a, at, et, e
pleat
10 words remaining


In [None]:
result = "++++-"
guesses.append(guess)
exact, exactnt, must, mustnt = process_result(guess, result, exact, exactnt, must, mustnt)
print("required   letters: " + "".join(must))
print("prohibited letters: " + "".join(mustnt))
print("required   pattern: " + "".join(exact))
print("prohibited pattern: " + ", ".join("".join(letters) for letters in exactnt))
guess, outof = next_guess(exact, exactnt, must, mustnt, guesses)
print(guess)
print(f"{outof} words remaining")

required   letters: hars
prohibited letters: tepiouyk
required   pattern: shar.
prohibited pattern: , a, r, , r
shard
2 words remaining


In [40]:
quiet = True
hist = [0] * 20
for i in trange(1000):
    word, rounds = play(words[random.randrange(len(words))])
    hist[rounds] += 1
    if rounds > 6:
      print(f"{word}: {rounds}")
print(hist[1:])

  0%|          | 0/1000 [00:00<?, ?it/s]

joker: 9
pound: 8
rover: 7
pound: 8
[0, 57, 390, 413, 112, 24, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [37]:
quiet = False
starting_word()

  0%|          | 0/2315 [00:00<?, ?it/s]

{'stare'}
[2087, 4082]
Initial guess: stare out of 2315


('stare', 2315)

In [38]:
hist

[0, 1, 53, 386, 394, 130, 28, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]