In [1]:
from collections import Counter

In [2]:
# using the file from https://gist.github.com/cfreshman/dec102adb5e60a8299857cbf78f6cf57
with open('wordle-answers-alphabetical.txt') as f:
   answer_words = f.readlines()

with open('allowed.txt') as f:
   allowed_words = f.readlines()

In [5]:
answer_words = [word[:5] for word in answer_words]
allowed_words = [word[:5] for word in allowed_words]

In [7]:
print(answer_words[:10])

['aback', 'abase', 'abate', 'abbey', 'abbot', 'abhor', 'abide', 'abled', 'abode', 'abort']


In [9]:
# Create a score for each letter in each position, based on the frequency of that letter appearing in the answers
positions = [Counter(),Counter(),Counter(),Counter(),Counter()]
for word in answer_words:
    for index,letter in enumerate(word):
        positions[index][letter] += 1
        
print(positions[0])

Counter({'s': 366, 'c': 198, 'b': 173, 't': 149, 'p': 142, 'a': 141, 'f': 136, 'g': 115, 'd': 111, 'm': 107, 'r': 105, 'l': 88, 'w': 83, 'e': 72, 'h': 69, 'v': 43, 'o': 41, 'n': 37, 'i': 34, 'u': 33, 'q': 23, 'j': 20, 'k': 20, 'y': 6, 'z': 3})


In [13]:
# now let's score the corpus, including the allowed words that will never be answers 
# (this doesn't actuallly change the answer: the best words are also possible answers)

scored_words = []
for word in answer_words + allowed_words:
    score = 0
    for index,letter in enumerate(word):
        score += positions[index][letter]
    scored_words.append((word, score))
scored_words.sort(key = lambda x:x[1], reverse = True)
print(scored_words[:100])

[('slate', 1437), ('sauce', 1411), ('slice', 1409), ('shale', 1403), ('saute', 1398), ('share', 1393), ('sooty', 1392), ('shine', 1382), ('suite', 1381), ('crane', 1378), ('saint', 1371), ('soapy', 1366), ('shone', 1360), ('shire', 1352), ('saucy', 1351), ('slave', 1344), ('cease', 1342), ('sense', 1342), ('saner', 1339), ('snare', 1336), ('stale', 1336), ('crate', 1335), ('spree', 1332), ('shore', 1330), ('suave', 1329), ('slide', 1326), ('stare', 1326), ('slime', 1325), ('sorry', 1324), ('brace', 1323), ('shiny', 1322), ('gooey', 1320), ('seize', 1318), ('crone', 1315), ('sleet', 1315), ('state', 1313), ('brine', 1312), ('scree', 1311), ('shade', 1310), ('space', 1310), ('spare', 1310), ('shame', 1309), ('slant', 1309), ('sally', 1308), ('scale', 1299), ('spine', 1299), ('trace', 1299), ('shake', 1296), ('stone', 1293), ('tease', 1293), ('shape', 1291), ('scare', 1289), ('shave', 1287), ('salty', 1285), ('sassy', 1285), ('slope', 1285), ('since', 1283), ('poise', 1282), ('swine', 128

In [11]:
best_word = scored_words[0][0]
print(f"Best first word: {best_word}")

Best first word: slate


In [16]:
#now let's find the best companion word. It's the highest-scoring word
#that doesn't share any letters with the first word

for scored_word in scored_words:
    if not any([letter in best_word for letter in scored_word[0]]):  
        break

print(f"Best second word: {scored_word}")

Best second word: ('crony', 1255)


In [24]:
# Ok, but is slate-crony really the best combo? It's possible there's a lower-scoring
# word than "slate" that has a higher-scoring companion than "crony", for a higher total
# score. Let's take a look at that. Since we know the answer has to beat slate-crony, we 
# can limit the counting pretty heavily

max_score = 0
for first_scored_word in scored_words[:100]:
    score = first_scored_word[1]
    for second_scored_word in scored_words[:100]:
        if not any([letter in best_word for letter in second_scored_word[0]]):
            score += second_scored_word[1]
            if score > max_score:
                max_score = score
                best_combo = (first_scored_word, second_scored_word)
            break

print(f"Best combo: {best_combo}")

Best combo: (('slate', 1437), ('crony', 1255))


In [None]:
# Ok, we can't best our slate of cronies!