In [8]:
import numpy as np

In [104]:
def file_to_list(f):
    return [line.strip() for line in f]

In [105]:
def list_to_array(l):
    return np.array([[ord(c) for c in word] for word in l])

In [106]:
with open('full_five_letters_words.txt') as f:
    words = file_to_list(f)
len(words)

3450

In [107]:
words[0], words[2]

('inter', 'rower')

In [108]:
words_a = list_to_array(words)

In [109]:
green = words_a[:, np.newaxis, :] == words_a[np.newaxis, :, :]
green.shape

(3450, 3450, 5)

In [110]:
green[0, 2]

array([False, False, False,  True,  True])

In [111]:
yellow = (words_a[:, np.newaxis, :, np.newaxis] == words_a[np.newaxis, :, np.newaxis, :]).sum(axis=3).astype(bool)

In [112]:
yellow.shape

(3450, 3450, 5)

In [114]:
words[4], words[6]

('enrol', 'sleep')

In [115]:
yellow[4,6]

array([ True, False, False, False,  True])

In [156]:
def encode(a, base):
    powers = (base ** np.arange(a.shape[-1])).reshape(((1,) * (len(a.shape) - 1)) + (-1,))
    return (a * powers).sum(axis=-1)

In [161]:
encoded = encode(green, 4) * 2 + encode(yellow, 4)
encoded.shape

(3450, 3450)

In [163]:
encoded

array([[1023,  273,  960, ...,    1,    0,   16],
       [  84, 1023,    4, ...,   16,    0,   64],
       [ 961,  257, 1023, ...,    4,    0,    0],
       ...,
       [   4,    4,   64, ..., 1023,   16,   16],
       [   0,    0,    0, ...,    1, 1023,  117],
       [ 256,  256,    0, ...,    4,  117, 1023]])

In [237]:
def choose(encoded, active):
    best, best_score = 0, np.inf
    for i in active:
        row = encoded[i, active]
        bincount = np.bincount(row)
        score = (bincount * (bincount - 1)).sum() / bincount.sum()
        if score < best_score:
            best, best_score = i, score
    return best, best_score

In [238]:
choose(encoded, np.arange(words_a.shape[0]))

(1477, 81.94898550724638)

In [248]:
def play():
    word = np.random.randint(len(words))
    # print(f"The word is {words[word].upper()}")
    active = np.arange(len(words))
    while True:
        guess, guess_score = choose(encoded, active)
        print(f"Guess: {words[guess].upper()} (score {guess_score:.2f})")
        response = ['G' if g else 'Y' if y else '-'
                    for g, y in zip(green[guess, word], yellow[guess, word])]
        print(''.join(response))
        if guess == word:
            print("Correct!")
            break
        active = active[encoded[guess, active] == encoded[guess, word]]
        candidates = ' '.join(words[candidate].upper() for candidate in active[:5])
        if active.size > 5:
            candidates = candidates + " ..."
        print(f"Remaining possible words: {active.size} ({candidates})")
        assert word in active, (word, active)

In [257]:
play()

Guess: ALOES (score 81.95)
Y--Y-
Remaining possible words: 126 (MAYBE GREAT GAFFE TWEAK MATTE ...)
Guess: CRATE (score 4.63)
-YYYY
Remaining possible words: 2 (EXTRA TERRA)
Guess: EXTRA (score 0.00)
Y-YGG
Remaining possible words: 1 (TERRA)
Guess: TERRA (score 0.00)
GGGGG
Correct!
