# Wordle best first guess

Find the 5-character word that has the least average Hamming
distance from all other 5-character words.

In [1]:
import statistics

Read the word list (en-GB-ise Hunspell from http://wordlist.aspell.net/dicts/)

In [2]:
with open("words.txt") as f:
    words = f.read().strip().splitlines()

Exclude words ending with `'s`

In [3]:
words = [word for word in words if not word.endswith("'s")]

Include only words with 5 characters

In [4]:
words = [word for word in words if len(word) == 5]

Exclude words that contain an upper character

In [5]:
words = [word for word in words if not any(map(lambda x: x.isupper(), list(word)))]

In [6]:
len(words)

5167

In [7]:
def hamming(a, b):
    """Calculate the Hamming distance between strings a and b"""
    n = len(a)
    if n != len(b):
        raise ValueError("a and b must be of equal length")
    distance = 0
    for i in range(n):
        if a[i] != b[i]:
            distance += 1
    return distance


Calculate average Hamming distances (takes about 30 seconds)

In [8]:
average_distances = []
for word in words:
    distances = []
    for other_word in words:
        if word == other_word:
            continue
        else:
            distances.append(hamming(word, other_word))
    average_distances.append((statistics.mean(distances), word))

Sort by average edit distance

In [9]:
sorted(average_distances, key=lambda x: x[0])[:5]

[(4.0991095625241964, 'sores'),
 (4.102206736353078, 'sales'),
 (4.116337591947348, 'soles'),
 (4.118660472319009, 'sates'),
 (4.1339527680991095, 'sames')]