In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width: 100% !important; }</style>"))

In [2]:
'''
https://www.nytimes.com/games/wordle/index.html

read data. there's a list of legal words, and a list of more common words that can be the secret word
 this is in plaintext in the wordle source...
'''
import csv

wordleAnswersPath = './wordleSecretWords.txt'
with open(wordleAnswersPath, 'r') as wordData:
    csvReader = csv.reader(wordData, delimiter=',', quotechar='"')
    wordleAnswers = csvReader.__next__()

wordleWordsPath = './wordleWords.txt'
with open(wordleWordsPath, 'r') as wordData:
    csvReader = csv.reader(wordData, delimiter=',', quotechar='"')
    wordleWords = csvReader.__next__()

wordleAnswers.sort()
wordleWords.sort()

nAnswers = len(wordleAnswers)
nWords = len(wordleWords)

In [4]:
'''
basic functions for comparing guess to word
 wordleCompare3 is best & used later. No significant performance differences
'''

def wordleCompare1(guess, word):
    matches = [guess[i] == word[i] for i in range(len(word))]
    remainingLetters = [w for (m,w) in zip(matches, word) if not m]
    contains = [x in remainingLetters for x in guess]
    return [2 if m else 1 if c else 0 for (m, c) in zip(matches, contains)]


def wordleCompare2(guess, word):
    matches = [guess[i] == word[i] for i in range(len(word))]
    remainingLetters = [w for (m,w) in zip(matches, word) if not m]
    contains = [x in remainingLetters for x in guess]
    return [g.upper() if m else g if c else '_' for (m, c, g) in zip(matches, contains, guess)]

# returns a string the same length as guess where '_' = guess letter not present in secret word. upper case = letter in correct place. lower case = letter in incorrect place
def wordleCompare3(guess, word):
    #matches = [guess[i] == word[i] for i in range(len(word))]
    guessList = list(guess)
    wordList = list(word)
    matches = [g==w for (g,w) in zip(guessList, wordList)]
    remainingLetters = [w for (m,w) in zip(matches, wordList) if not m]
    contains = [g in remainingLetters for g in guessList]
    return ''.join([g.upper() if m else g if c else '_' for (m, c, g) in zip(matches, contains, guessList)])

def wordleCompare3b(guess, word):
    return ''.join(wordleCompare2(guess, word))

In [5]:
''' demo basic functions '''
testWord = "foyer"
testGuess = "roate"

res1 = wordleCompare1(testGuess, testWord)
print(res1)

res2 = wordleCompare2(testGuess, testWord)
print(res2)

res3 = wordleCompare3(testGuess, testWord)
print(res3)

[1, 2, 0, 0, 1]
['r', 'O', '_', '_', 'e']
rO__e


In [433]:
''' functions for evaluating the quality of a guess word '''

def evaluateOneGuess(guess, remainingWords):
    countsByPattern = {}
    wordsByPattern = {}
    for word in remainingWords:
        if word == guess:
            continue
        pattern = wordleCompare3(guess, word)
        if not pattern in countsByPattern:
            countsByPattern[pattern] = 1.0
            wordsByPattern[pattern] = [word]
        else:
            countsByPattern[pattern] += 1.0
            wordsByPattern[pattern].append(word)
    
    return countsByPattern, wordsByPattern


def scoreOneEvaluation(countsByPattern):
    avgRemainingWords = 0
    for v in countsByPattern.values():
        avgRemainingWords += v*v
    
    return avgRemainingWords/sum(countsByPattern.values())

def scoreOneGuess(guess, remainingWords):
    countsByPattern = {}
    for word in remainingWords:
        if word == guess:
            continue
        pattern = wordleCompare3(guess, word)
        if not pattern in countsByPattern:
            countsByPattern[pattern] = 1.0
        else:
            countsByPattern[pattern] += 1.0

    avgRemainingWords = 0
    for v in countsByPattern.values():
        avgRemainingWords += v*v
    
    return avgRemainingWords/len(remainingWords)
    
def sortDictByValue(d):
    return {k: v for k, v in sorted(d.items(), key=lambda item: item[1])}

countDict,wordDict = evaluateOneGuess('raise', wordleWords)

scoreOneGuess('adieu', wordleAnswers)

123.4365526201819

In [411]:
''' evaluate one first-guess-word. '''
guess = 'stare'
round1ComboWords = {}
round1ComboCounts = {}
for word in wordleAnswers:
    if word == guess:
        continue
    
    strPattern = wordleCompare3(guess, word)
    if not strPattern in round1ComboCounts:
        round1ComboWords[strPattern] = [word]
        round1ComboCounts[strPattern] = 1.0
    else:
        round1ComboWords[strPattern].append(word)
        round1ComboCounts[strPattern] += 1.0

round1ComboCounts = {k: v for k, v in sorted(round1ComboCounts.items(), key=lambda item: -item[1])}

avgRemainingWords = 0
for v in round1ComboCounts.values():
    avgRemainingWords += v*v/nAnswers

print("average remaining words: ", avgRemainingWords)
print('')
print('# unique patterns: ', len(round1ComboCounts))
print('')
print('pattern : words left')
for (key,val) in round1ComboCounts.items():
    weight = val/nWords
    print(key, "  : ", val)

average remaining words:  71.04634040710256

# unique patterns:  132

pattern : words left
_____   :  226.0
__a__   :  126.0
___re   :  118.0
____e   :  115.0
___r_   :  100.0
_t___   :  98.0
____E   :  87.0
_ta__   :  74.0
S____   :  67.0
__ar_   :  64.0
_t__e   :  54.0
__a_e   :  51.0
___rE   :  49.0
__are   :  47.0
_t_r_   :  42.0
__a_E   :  41.0
s____   :  38.0
__A__   :  37.0
_t_re   :  30.0
__Ar_   :  30.0
S_A__   :  27.0
S___E   :  27.0
__A_E   :  24.0
ST___   :  22.0
_ta_e   :  21.0
s___E   :  21.0
S___e   :  21.0
___R_   :  20.0
_t__E   :  20.0
S_a__   :  18.0
St___   :  18.0
st___   :  17.0
s_a__   :  16.0
__aR_   :  15.0
_tare   :  14.0
s_A__   :  14.0
st__e   :  14.0
__ArE   :  14.0
s__re   :  14.0
S__re   :  14.0
_tar_   :  13.0
_t_rE   :  13.0
st_r_   :  13.0
__AR_   :  12.0
s__r_   :  12.0
_tAr_   :  12.0
_tA_e   :  11.0
S_A_E   :  11.0
sta__   :  10.0
__A_e   :  10.0
___Re   :  10.0
s__rE   :  10.0
St__e   :  10.0
S__R_   :  10.0
STA__   :  10.0
s_a_E   :  9.0
_tA__   :

In [413]:
remainingWords = round1ComboWords['_____']
print(len(remainingWords))
remainingWords

226


['biddy',
 'billy',
 'bingo',
 'blimp',
 'blind',
 'blink',
 'block',
 'blond',
 'blood',
 'bloom',
 'blown',
 'bluff',
 'bobby',
 'bongo',
 'booby',
 'boozy',
 'bough',
 'bound',
 'buddy',
 'buggy',
 'build',
 'bulky',
 'bully',
 'bunch',
 'bunny',
 'buxom',
 'chick',
 'child',
 'chili',
 'chill',
 'chock',
 'chuck',
 'chump',
 'chunk',
 'cinch',
 'civic',
 'civil',
 'click',
 'cliff',
 'climb',
 'cling',
 'clink',
 'clock',
 'cloud',
 'clown',
 'cluck',
 'clump',
 'clung',
 'colon',
 'comfy',
 'comic',
 'conch',
 'condo',
 'conic',
 'couch',
 'cough',
 'could',
 'coyly',
 'cubic',
 'cumin',
 'cynic',
 'dilly',
 'dimly',
 'dingo',
 'dingy',
 'dizzy',
 'dodgy',
 'doing',
 'dolly',
 'dough',
 'dowdy',
 'downy',
 'duchy',
 'dully',
 'dummy',
 'dumpy',
 'dying',
 'filly',
 'filmy',
 'finch',
 'fizzy',
 'flick',
 'fling',
 'flock',
 'flood',
 'flown',
 'fluff',
 'fluid',
 'flung',
 'flunk',
 'foggy',
 'folio',
 'folly',
 'found',
 'fully',
 'fungi',
 'funky',
 'funny',
 'fuzzy',
 'ghoul',


In [414]:
guess2Qualities = {}
guess2Data = {}
remainingWords = round1ComboWords['_____']
for guess in wordleWords:
    countsByPattern, wordsByPattern = evaluateOneGuess(guess, remainingWords)
    guess2Data[guess] = wordsByPattern
    guess2Qualities[guess] = scoreOneEvaluation(countsByPattern)

guess2Qualities = {k: v for k, v in sorted(guess2Qualities.items(), key=lambda item: item[1])}
guess2Qualities

{'colin': 6.20353982300885,
 'doily': 6.469026548672566,
 'could': 6.511111111111111,
 'noily': 6.716814159292035,
 'dolci': 6.79646017699115,
 'nould': 6.831858407079646,
 'lound': 7.0265486725663715,
 'cloud': 7.195555555555556,
 'nicol': 7.654867256637168,
 'lindy': 7.716814159292035,
 'piony': 7.911504424778761,
 'guild': 7.915555555555556,
 'ludic': 8.026548672566372,
 'unlid': 8.079646017699115,
 'build': 8.093333333333334,
 'lucid': 8.164444444444445,
 'cibol': 8.230088495575222,
 'logic': 8.306666666666667,
 'poind': 8.309734513274336,
 'mould': 8.380530973451327,
 'fonly': 8.415929203539823,
 'lupin': 8.469026548672566,
 'linum': 8.52212389380531,
 'folic': 8.548672566371682,
 'login': 8.573333333333334,
 'dimly': 8.582222222222223,
 'choli': 8.5929203539823,
 'cling': 8.617777777777778,
 'choil': 8.63716814159292,
 'oundy': 8.63716814159292,
 'poilu': 8.663716814159292,
 'lingy': 8.672566371681416,
 'bliny': 8.725663716814159,
 'child': 8.786666666666667,
 'pound': 8.78666666

In [416]:
guess2Data['cling']['C_in_']

['cinch', 'conic', 'cumin', 'cynic']

In [406]:
# evaluate one word for the third guess
guess = 'mawks'
round2ComboWords = {}
round2ComboCounts = {}
for word in remainingWords:
    if word == guess:
        continue
    
    strPattern = wordleCompare3(guess, word)
    if not strPattern in round2ComboCounts:
        round2ComboWords[strPattern] = [word]
        round2ComboCounts[strPattern] = 1.0
    else:
        round2ComboWords[strPattern].append(word)
        round2ComboCounts[strPattern] += 1.0

In [409]:
guess2Qualities = {}
guess2Data = {}
guess2Result = '_____'
remainingWords = round2ComboWords[guess2Result]
for guess in wordleWords:
    countsByPattern, wordsByPattern = evaluateOneGuess(guess, remainingWords)
    guess2Data[guess] = wordsByPattern
    guess2Qualities[guess] = scoreOneEvaluation(countsByPattern)

guess2Qualities = {k: v for k, v in sorted(guess2Qualities.items(), key=lambda item: item[1])}
guess2Qualities

{'chevy': 1.2857142857142858,
 'chivy': 1.2857142857142858,
 'lynch': 1.2857142857142858,
 'vichy': 1.2857142857142858,
 'belch': 1.5714285714285714,
 'bench': 1.5714285714285714,
 'blech': 1.5714285714285714,
 'bunch': 1.5714285714285714,
 'calyx': 1.5714285714285714,
 'carvy': 1.5714285714285714,
 'chyle': 1.5714285714285714,
 'chynd': 1.5714285714285714,
 'civvy': 1.5714285714285714,
 'colby': 1.5714285714285714,
 'convo': 1.5714285714285714,
 'coven': 1.5714285714285714,
 'covey': 1.5714285714285714,
 'covin': 1.5714285714285714,
 'curvy': 1.5714285714285714,
 'cylix': 1.5714285714285714,
 'felch': 1.5714285714285714,
 'filch': 1.5714285714285714,
 'finch': 1.5714285714285714,
 'gulch': 1.5714285714285714,
 'hylic': 1.5714285714285714,
 'lanch': 1.5714285714285714,
 'linch': 1.5714285714285714,
 'lunch': 1.5714285714285714,
 'synch': 1.5714285714285714,
 'vinca': 1.5714285714285714,
 'vinic': 1.5714285714285714,
 'vinyl': 1.5714285714285714,
 'vocab': 1.5714285714285714,
 'vughy': 

In [410]:
guess2Data['chevy']

{'__e__': ['boxer', 'goner'],
 'C_e__': ['corer'],
 'C_ev_': ['cover'],
 '__e_y': ['foyer'],
 '_hev_': ['hover'],
 '__ev_': ['lover']}

In [327]:
# evaluate one word for the second guess
guess = 'lingo'
round2ComboWords = {}
round2ComboCounts = {}
for word in remainingWords:
    if word == guess:
        continue
    
    strPattern = wordleCompare3(guess, word)
    if not strPattern in round2ComboCounts:
        round2ComboWords[strPattern] = [word]
        round2ComboCounts[strPattern] = 1.0
    else:
        round2ComboWords[strPattern].append(word)
        round2ComboCounts[strPattern] += 1.0

round2ComboCounts = {k: v for k, v in sorted(round2ComboCounts.items(), key=lambda item: -item[1])}

avgRemainingWords = 0
for v in round2ComboCounts.values():
    avgRemainingWords += v*v/len(remainingWords)

print("average remaining words: ", avgRemainingWords)
print('')
print('# unique patterns: ', len(round2ComboCounts))
print('')
print('pattern : words left')
for (key,val) in round2ComboCounts.items():
    weight = val/nWords
    print(key, "  : ", val)

average remaining words:  3.39080459770115

# unique patterns:  40

pattern : words left
l____   :  8.0
__N__   :  6.0
l___o   :  5.0
___G_   :  5.0
li___   :  4.0
____o   :  4.0
_i___   :  4.0
_____   :  4.0
_i__o   :  3.0
_I___   :  3.0
_ING_   :  2.0
_I__o   :  2.0
l_n_o   :  2.0
___Go   :  2.0
li_g_   :  2.0
l__go   :  2.0
_in__   :  2.0
_IN__   :  2.0
_In__   :  2.0
_in_o   :  2.0
__n_o   :  2.0
lI___   :  1.0
lI_G_   :  1.0
l__g_   :  1.0
l__G_   :  1.0
___g_   :  1.0
_iNg_   :  1.0
__ngo   :  1.0
_i_g_   :  1.0
L__G_   :  1.0
LI_G_   :  1.0
L__Go   :  1.0
L_NG_   :  1.0
_I_G_   :  1.0
__nG_   :  1.0
li__o   :  1.0
__N_o   :  1.0
l_n__   :  1.0
__n__   :  1.0
___go   :  1.0


In [329]:
round2ComboWords['li__o']

['olive']

In [283]:
evaluateOneGuess('clapt', remainingWords)

({'__a__': 3.0,
  'c_ap_': 1.0,
  'c_a_T': 1.0,
  '_la__': 1.0,
  '__apt': 1.0,
  'c_a__': 2.0,
  'Cla__': 1.0,
  'C_a__': 1.0,
  '__ap_': 1.0,
  'C_a_t': 1.0,
  '__a_T': 1.0},
 {'__a__': ['maxim', 'mania', 'mafia'],
  'c_ap_': ['panic'],
  'c_a_T': ['tacit'],
  '_la__': ['valid'],
  '__apt': ['patio'],
  'c_a__': ['magic', 'manic'],
  'Cla__': ['cavil'],
  'C_a__': ['cabin'],
  '__ap_': ['vapid'],
  'C_a_t': ['cacti'],
  '__a_T': ['habit']})

In [282]:
guessCounts

{'clapt': 1.5714285714285714,
 'clept': 1.5714285714285714,
 'clint': 1.5714285714285714,
 'clipt': 1.5714285714285714,
 'celom': 1.7142857142857142,
 'cloam': 1.7142857142857142,
 'clomb': 1.7142857142857142,
 'clomp': 1.7142857142857142,
 'compt': 1.7142857142857142,
 'locum': 1.7142857142857142,
 'macon': 1.7142857142857142,
 'monic': 1.7142857142857142,
 'mulct': 1.7142857142857142,
 'nomic': 1.7142857142857142,
 'comet': 1.8571428571428572,
 'claim': 1.8571428571428572,
 'venom': 1.8571428571428572,
 'climb': 1.8571428571428572,
 'clamp': 1.8571428571428572,
 'cleft': 1.8571428571428572,
 'coven': 1.8571428571428572,
 'clump': 1.8571428571428572,
 'clift': 1.8571428571428572,
 'combi': 1.8571428571428572,
 'comte': 1.8571428571428572,
 'convo': 1.8571428571428572,
 'covin': 1.8571428571428572,
 'manat': 1.8571428571428572,
 'metic': 1.8571428571428572,
 'nempt': 1.8571428571428572,
 'tacan': 1.8571428571428572,
 'panic': 1.9285714285714286,
 'chant': 2.0,
 'month': 2.0,
 'count': 

In [None]:
# evaluate the second guess for all possible results from a single first guess word. Find the best 2nd guess for each result
round2GuessQualities = {}
for pattern1 in round1ComboWords.keys():
    remainingWords = round1ComboWords[pattern1]
    for guess in wordleWords:
        round2ComboWords = {}
        round2ComboCounts = {}
        for word in remainingwords:
            if word == guess:
                continue
    
            strPattern = wordleCompare3(guess, word)
            if not strPattern in round2ComboCounts:
                round2ComboWords[strPattern] = [word]
                round2ComboCounts[strPattern] = 1.0
            else:
                round2ComboWords[strPattern].append(word)
                round2ComboCounts[strPattern] += 1.0
        

In [253]:
# evaluate all possible first guesses. evaluate how well each guess narrows the field after the first round
guess1Qualities = {}
for guess in wordleWords:
    comboCounts = {}
    for word in wordleAnswers:
        if guess == word:
            continue
        
        strPattern = wordleCompare3(guess, word)
        if not strPattern in comboCounts:
            comboCounts[strPattern] = 1.0
        else:
            comboCounts[strPattern] += 1.0

    avgRemainingWords = 0
    for v in comboCounts.values():
        avgRemainingWords += v * v  / nAnswers
    
    guess1Qualities[guess] = avgRemainingWords

guess1Qualities = {k: v for k, v in sorted(guess1Qualities.items(), key=lambda item: item[1])}

In [163]:
guessQualities = {}
for guess in words:
    comboCounts = {}
    for word in round2Words:
        strPattern = wordleCompare3(guess, word)
        if not strPattern in comboCounts:
            comboCounts[strPattern] = 0

        comboCounts[strPattern] += 1

    #comboCounts = {k: v for k, v in sorted(comboCounts.items(), key=lambda item: -item[1])}

    # could change the weight or 'refinement' here, like go another level
    avgRemainingWords = 0
    for v in comboCounts.values():
        avgRemainingWords += v * v  / nAnswers
    
    guessQualities[guess] = avgRemainingWords

In [165]:
guessQualities2 = {k: v for k, v in sorted(guessQualities.items(), key=lambda item: item[1])}
guessQualities2

{'tondi': 6.793418690436101e-05,
 'tonic': 6.892037623764684e-05,
 'toing': 7.088087310502239e-05,
 'monie': 7.312653315310702e-05,
 'biont': 7.375626851050648e-05,
 'bonie': 7.462363985183018e-05,
 'genie': 7.533654780360315e-05,
 'diene': 7.705940868705427e-05,
 'teind': 7.780796203641593e-05,
 'tigon': 7.948329572308228e-05,
 'tinge': 7.953082291986713e-05,
 'point': 7.975657710459516e-05,
 'penie': 8.084970263064706e-05,
 'timon': 8.095663882341303e-05,
 'monte': 8.197847355428752e-05,
 'pinot': 8.23111639317815e-05,
 'conte': 8.284584489561121e-05,
 'tenge': 8.329735326506736e-05,
 'pinto': 8.373697983532734e-05,
 'meint': 8.550736791556342e-05,
 'ontic': 8.554301331315203e-05,
 'hoing': 8.563806770672182e-05,
 'yonic': 8.598263988341205e-05,
 'boite': 8.652920264643782e-05,
 'potin': 8.661237524081149e-05,
 'opine': 8.685001122473575e-05,
 'donut': 8.720646520062218e-05,
 'thine': 8.7693618967667e-05,
 'untie': 8.805007294355357e-05,
 'monic': 8.898873508005444e-05,
 'tenue': 8.9

In [344]:
guess1Qualities = {}
for guess in wordleWords:
    guess1Qualities[guess] = scoreOneGuess(guess, wordleAnswers)

guess1Qualities = {k: v for k, v in sorted(guess1Qualities.items(), key=lambda item: item[1])}

In [354]:
promisingFirstGuesses = list(guess1Qualities.keys())[:50]

In [380]:
round2TotalQualities = {}
round2FullData = {}

In [385]:
round2Fulldata = {}

In [431]:
# determine best first guess by outcome after second guess. deeper...
# add a word (round1Guess) to the ranks. It takes a while 
round1Guess = 'tares'
round1ComboCounts,round1ComboWords = evaluateOneGuess(round1Guess, wordleAnswers)

round2Qualities = {}
for pattern,remainingWords in round1ComboWords.items():
    temp = {}
    for guess in wordleWords:
        temp[guess] = scoreOneGuess(guess, remainingWords)
    
    temp = {k: v for k, v in sorted(temp.items(), key=lambda item: item[1])}
    best = list(temp.keys())[0]
    round2Qualities[pattern] = (temp[best]*round1ComboCounts[pattern]/nAnswers ,temp[best], best)

totalQuality = 0
for k,v in round2Qualities.items():
    totalQuality += v[0]

round2TotalQualities[round1Guess] = totalQuality
round2FullData[round1Guess] = round2Qualities

In [358]:
round1ComboCounts
round1ComboCounts = {k: v for k, v in sorted(round1ComboCounts.items(), key=lambda item: item[1])}

In [378]:
totalQuality = 0
for k,v in round2Qualities.items():
    totalQuality += v[0]

In [379]:
totalQuality

3.1346903421394536

In [434]:
# these are best after 2 guesses. ROATE still best, use ROATE (a BS alternate spelling of rote, btw)
round2TotalQualitiesSorted = sortDictByValue(round2TotalQualities)
round2TotalQualitiesSorted

{'roate': 3.1346903421394536,
 'raile': 3.1641403204850587,
 'irate': 3.2152446946730207,
 'orate': 3.228237332178433,
 'crate': 3.2594196621914246,
 'arose': 3.269813772195756,
 'soare': 3.281940233867475,
 'raine': 3.2854049372022525,
 'ariel': 3.2962321351234314,
 'later': 3.3425725422260717,
 'raise': 3.349935036812474,
 'tares': 3.370723256821134,
 'stare': 3.3884798614118683,
 'oater': 3.403204850584668,
 'crane': 3.4291901255954973,
 'lares': 3.4382849718492867,
 'arise': 3.4486790818536153,
 'adieu': 5.112602858380252,
 'audio': 6.310090948462544}

In [383]:
# these give best average remaining words after the first guess. Top 50 words
promisingFirstGuesses

['roate',
 'raise',
 'raile',
 'soare',
 'arise',
 'irate',
 'orate',
 'ariel',
 'arose',
 'raine',
 'artel',
 'taler',
 'ratel',
 'aesir',
 'arles',
 'alter',
 'realo',
 'saner',
 'later',
 'oater',
 'taser',
 'salet',
 'snare',
 'stare',
 'tares',
 'slate',
 'reais',
 'reast',
 'alert',
 'strae',
 'lares',
 'laser',
 'saine',
 'rales',
 'urate',
 'serai',
 'toile',
 'crate',
 'seral',
 'rates',
 'carte',
 'antre',
 'slane',
 'trace',
 'coate',
 'stoae',
 'carle',
 'carse',
 'reals',
 'terai']