In [1]:
import re
# Five letters, not a proper noun
wordle_word = re.compile(r"^[a-z]....$")

In [2]:
import urllib.request
with urllib.request.urlopen("https://web.mit.edu/freebsd/head/share/dict/web2") as w:
    words = [word for word in w.read().decode("utf-8").splitlines() if wordle_word.match(word)]

In [3]:
import random
i = random.randrange(len(words))
correct = words[i]
print(f"{i}: {correct}")

2443: evade


In [4]:
quiet = False

In [5]:
def wordle_round(guess, correct):
    result = ""
    for i in range(5):
        if guess[i] == correct[i]:
            result = result + "+"
        elif guess[i] in correct:
            result = result + "o"
        else:
            result = result + "-"
    return result

In [6]:
def process_result(guess, result, exact, exactnt, must, mustnt):
    for i in range(5):
        if result[i] == "+":
            exact[i] = guess[i]
            must.add(guess[i])
        elif result[i] == "o":
            exactnt[i].add(guess[i])
            must.add(guess[i])
        else:
            mustnt.add(guess[i])
    return exact, exactnt, must, mustnt

In [7]:
from tqdm.notebook import trange
def next_guess(exact, exactnt, must, mustnt, guesses):
    combined = [
        exact[i] if exact[i] != "." else
        "[^" + "".join(exactnt[i]) + "]" if exactnt[i] else
        "."
        for i in range(5)
    ]
    exactly = re.compile("^" + "".join(combined) + "$")
    filtered_words = [
        word
        for word in words
        if word not in guesses and \
            exactly.match(word) and \
            all(letter in word for letter in must) and \
            not any(letter in word for letter in mustnt)
    ]
    scores = { word: [0, 0] for word in filtered_words }
    if quiet:
        my_range = range
    else:
        my_range = trange
    for i in my_range(len(filtered_words)):
        test = filtered_words[i]
        for j in range(i + 1, len(filtered_words)):
            target = filtered_words[j]
            if any(letter in target for letter in test if letter not in must):
                overlap = len(set(test).intersection(set(target)))
                scores[test][0] += 1
                scores[test][1] += overlap
                scores[target][0] += 1
                scores[target][1] += overlap

    # Round 1 of scoring: prefer words that get us the most info.
    # We get info from each letter of overlap, or if there is no overlap.
    # Since score[0] is the number of words with overlap, 
    # remaining - score[0] is the number of words with no overlap.
    max_score = -1
    guess = ""
    remaining = len(filtered_words)
    matching = set()
    for word, score in scores.items():
        adjusted_score = score[1] + remaining - score[0]
        if adjusted_score > max_score:
            max_score = adjusted_score
            guess = word
            matching=set([word])
        elif adjusted_score == max_score:
            matching.add(word)

    # Round 2 of scoring: prefer less letter overlap
    # Among the highest scoring words, prefer those that overlap less.
    max_score = 0
    for word in matching:
        adjusted_score = -scores[word][0]
        if adjusted_score > max_score:
            max_score = adjusted_score
            guess = word
    if not quiet:
        print(matching)
        print(f"{scores[guess]}")
    return guess, remaining

In [8]:
def starting_word():
    # Run this when changing next_guess to re-compute an optimal initial guess
    exact = ["."] * 5
    exactnt = [set(), set(), set(), set(), set()]
    must = set()
    mustnt = set()
    guesses = []
    guess, outof = next_guess(exact, exactnt, must, mustnt, guesses)
    print(f"Initial guess: {guess} out of {outof}")
    return guess, outof

In [9]:
def play(correct):
    #guess = "ourie"
    guess = "irate"
    #guess = "rutic"
    #guess = "arose"
    exact = ["."] * 5
    exactnt = [set(), set(), set(), set(), set()] # can't use [set()] * 5
    must = set()
    mustnt = set()
    guesses = []
    r = 1
    while r < 100:
        if not quiet:
            print(f"round {r}:")
            print(f"{guess}")
        result = wordle_round(guess, correct)
        if not quiet:
            print(f"{result}")
        if result == "+++++":
            break
        r += 1
        guesses.append(guess)
        exact, exactnt, must, mustnt = process_result(guess, result, exact, exactnt, must, mustnt)
        if not quiet:
            print("required   letters: " + "".join(must))
            print("prohibited letters: " + "".join(mustnt))
            print("required   pattern: " + "".join(exact))
            print("prohibited pattern: " + ", ".join("".join(letters) for letters in exactnt))
        guess, outof = next_guess(exact, exactnt, must, mustnt, guesses)
        if not quiet:
            print(f"{outof} words remaining")
            print()
    return guess, r

In [10]:
play("water")

round 1:
irate
-oooo
required   letters: arte
prohibited letters: i
required   pattern: .....
prohibited pattern: , r, a, t, e


  0%|          | 0/49 [00:00<?, ?it/s]

{'telar', 'alter', 'later', 'ratel', 'alert', 'taler'}
[5, 25]
49 words remaining

round 2:
alert
o-ooo
required   letters: arte
prohibited letters: li
required   pattern: .....
prohibited pattern: a, r, ae, rt, te


  0%|          | 0/28 [00:00<?, ?it/s]

{'terma', 'rated', 'dater', 'pater', 'mater', 'taper', 'terap', 'detar', 'tamer'}
[2, 10]
28 words remaining

round 3:
dater
-++++
required   letters: arte
prohibited letters: dli
required   pattern: .ater
prohibited pattern: a, r, ae, rt, te


  0%|          | 0/10 [00:00<?, ?it/s]

{'cater', 'pater', 'gater', 'eater', 'mater', 'tater', 'hater', 'water', 'rater', 'bater'}
[0, 0]
10 words remaining

round 4:
bater
-++++
required   letters: arte
prohibited letters: dlbi
required   pattern: .ater
prohibited pattern: a, r, ae, rt, te


  0%|          | 0/9 [00:00<?, ?it/s]

{'cater', 'pater', 'mater', 'gater', 'eater', 'tater', 'hater', 'water', 'rater'}
[0, 0]
9 words remaining

round 5:
cater
-++++
required   letters: arte
prohibited letters: cbdli
required   pattern: .ater
prohibited pattern: a, r, ae, rt, te


  0%|          | 0/8 [00:00<?, ?it/s]

{'pater', 'mater', 'gater', 'eater', 'tater', 'hater', 'water', 'rater'}
[0, 0]
8 words remaining

round 6:
eater
o++++
required   letters: arte
prohibited letters: cbdli
required   pattern: .ater
prohibited pattern: ae, r, ae, rt, te


  0%|          | 0/7 [00:00<?, ?it/s]

{'pater', 'mater', 'gater', 'tater', 'hater', 'water', 'rater'}
[0, 0]
7 words remaining

round 7:
gater
-++++
required   letters: arte
prohibited letters: cbdgli
required   pattern: .ater
prohibited pattern: ae, r, ae, rt, te


  0%|          | 0/6 [00:00<?, ?it/s]

{'pater', 'mater', 'tater', 'hater', 'water', 'rater'}
[0, 0]
6 words remaining

round 8:
hater
-++++
required   letters: arte
prohibited letters: cbhdgli
required   pattern: .ater
prohibited pattern: ae, r, ae, rt, te


  0%|          | 0/5 [00:00<?, ?it/s]

{'pater', 'mater', 'tater', 'water', 'rater'}
[0, 0]
5 words remaining

round 9:
mater
-++++
required   letters: arte
prohibited letters: cbhdgmli
required   pattern: .ater
prohibited pattern: ae, r, ae, rt, te


  0%|          | 0/4 [00:00<?, ?it/s]

{'rater', 'tater', 'pater', 'water'}
[0, 0]
4 words remaining

round 10:
pater
-++++
required   letters: arte
prohibited letters: cpbhdgmli
required   pattern: .ater
prohibited pattern: ae, r, ae, rt, te


  0%|          | 0/3 [00:00<?, ?it/s]

{'rater', 'tater', 'water'}
[0, 0]
3 words remaining

round 11:
rater
o++++
required   letters: arte
prohibited letters: cpbhdgmli
required   pattern: .ater
prohibited pattern: are, r, ae, rt, te


  0%|          | 0/2 [00:00<?, ?it/s]

{'tater', 'water'}
[0, 0]
2 words remaining

round 12:
tater
o++++
required   letters: arte
prohibited letters: cpbhdgmli
required   pattern: .ater
prohibited pattern: arte, r, ae, rt, te


  0%|          | 0/1 [00:00<?, ?it/s]

{'water'}
[0, 0]
1 words remaining

round 13:
water
+++++


('water', 13)

In [None]:
guess = "irate"
exact = ["."] * 5
exactnt = [set(), set(), set(), set(), set()] # can't use [set()] * 5
must = set()
mustnt = set()
guesses = [ "ental" ]
r = 1
result = "--ooo"
guesses.append(guess)
exact, exactnt, must, mustnt = process_result(guess, result, exact, exactnt, must, mustnt)
print("required   letters: " + "".join(must))
print("prohibited letters: " + "".join(mustnt))
print("required   pattern: " + "".join(exact))
print("prohibited pattern: " + ", ".join("".join(letters) for letters in exactnt))
guess, outof = next_guess(exact, exactnt, must, mustnt, guesses)
print(guess)
print(f"{outof} words remaining")


required   letters: aet
prohibited letters: ir
required   pattern: .....
prohibited pattern: , , a, t, e
laten
97 words remaining


In [None]:
result = "oooo-"
guesses.append(guess)
exact, exactnt, must, mustnt = process_result(guess, result, exact, exactnt, must, mustnt)
print("required   letters: " + "".join(must))
print("prohibited letters: " + "".join(mustnt))
print("required   pattern: " + "".join(exact))
print("prohibited pattern: " + ", ".join("".join(letters) for letters in exactnt))
guess, outof = next_guess(exact, exactnt, must, mustnt, guesses)
print(guess)
print(f"{outof} words remaining")


required   letters: alet
prohibited letters: nir
required   pattern: .....
prohibited pattern: l, a, at, et, e
pleat
10 words remaining


In [None]:
result = "++++-"
guesses.append(guess)
exact, exactnt, must, mustnt = process_result(guess, result, exact, exactnt, must, mustnt)
print("required   letters: " + "".join(must))
print("prohibited letters: " + "".join(mustnt))
print("required   pattern: " + "".join(exact))
print("prohibited pattern: " + ", ".join("".join(letters) for letters in exactnt))
guess, outof = next_guess(exact, exactnt, must, mustnt, guesses)
print(guess)
print(f"{outof} words remaining")

required   letters: hars
prohibited letters: tepiouyk
required   pattern: shar.
prohibited pattern: , a, r, , r
shard
2 words remaining


In [11]:
quiet = True
hist = [0] * 20
for i in trange(1000):
    word, rounds = play(words[random.randrange(len(words))])
    hist[rounds] += 1
    if rounds > 6:
      print(f"{word}: {rounds}")
print(hist[1:])

  0%|          | 0/1000 [00:00<?, ?it/s]

kabel: 7
mater: 9
boozy: 7
ember: 7
quave: 8
barry: 7
scran: 8
unwet: 7
peery: 7
jutty: 8
tizzy: 8
wirer: 9
hubby: 8
punky: 7
rider: 7
stray: 8
corer: 9
jural: 7
reree: 8
spore: 7
eaver: 7
gurry: 7
borer: 7
pansy: 7
rider: 7
tower: 7
jinny: 7
sider: 8
vinny: 10
jotty: 7
vulva: 7
wordy: 7
gager: 7
[0, 9, 225, 392, 270, 71, 21, 8, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [None]:
quiet = False
starting_word()

  0%|          | 0/8498 [00:00<?, ?it/s]

{'irate', 'tarie', 'arite', 'retia'}
[7894, 15009]
Initial guess: irate out of 8498


('irate', 8498)