In [42]:
import pandas as pd
import itertools
import os
from tqdm import tqdm
from collections import defaultdict
import pickle
import random
from scipy.stats import entropy

In [43]:
# Let grey is represented by 0, yellow by 1 and green by 2
# what could be possible input info of 5 letter word in wordle
allPatterns = list(itertools.product([0,1,2], repeat=5))
print(len(allPatterns))
# Total 3 power 5 that is 243 possible combination

243


In [44]:
#  Extracting all and curated word list from data folder
with open(r'words.txt', encoding='utf-8') as f:
    rawwordlelist = f.read()
rawwordlelist = rawwordlelist.split(",")
allWordList = []
for word in rawwordlelist:
    word = word.strip('"')
    allWordList.append(word)
print(len(allWordList))

12972


In [45]:
# First 2315 words of our list are most frequent wordle answers. so lets extract it too.
curatedWordList = allWordList[:2315]
print(len(curatedWordList))

2315


In [46]:
# Sanity check whether all words are of 5 length or not
errorMsg = 'All words are not of 5 letter each'
assert len({len(x) for x in allWordList}) == 1 and len(allWordList[0]) == 5, errorMsg

In [47]:
def convertWordToPattern(guessWord, realAnswer):
    """
    lets suppose if ans is midst and we guessed digit then we return (1, 2, 0, 1, 2)
    """
    pattern = [0, 0, 0 ,0 ,0]
    for i, x in enumerate(guessWord):
        pattern[i] = int(x in realAnswer)
    for i, (x1, x2) in enumerate(zip(guessWord, realAnswer)):
        if x1 == x2:
            pattern[i] = 2
    
    return tuple(pattern)

# lets test it
print(convertWordToPattern('digit', 'midst'))
    

(1, 2, 0, 1, 2)


In [48]:
def createPatternMap(wordList):
    """
    For every word in the wordlist with every possible pattern information we get from wordle game, 
    lets create a list of possible set of words.
    """
    wordPatternMap = defaultdict(lambda: defaultdict(set))
    for word in tqdm(wordList):
        for realAnswer in wordList:
            pattern = convertWordToPattern(word, wordList)
            wordPatternMap[word][pattern].add(realAnswer)
    return dict(wordPatternMap)


In [49]:
if 'wordPatternMap.p' not in os.listdir('.'):
#     Lets cache this patterns map so that we dont need to create it everytime
    wordPatternMap = createPatternMap(curatedWordList)
    pickle.dump(wordPatternMap, open('wordPatternMap.p', 'wb+'))
else:
    # loading patterns map from the cache file
    wordPatternMap = pickle.load(open('wordPatternMap.p', 'rb'))

In [50]:
def calculateEntropies(possibleWords, wordPatternMap):
#     wordList,
    """
    Calculating the entropy for every word in our words list , taking into account the remaining possible words
    """
    entropiesMap = {}
    for word in tqdm(possibleWords):
        counts = []
        for pattern in allPatterns:
            matches = wordPatternMap[word][tuple(pattern)]
            matches = matches.intersection(possibleWords)
            counts.append(len(matches))
        entropiesMap[word] = entropy(counts)
    return entropiesMap

In [52]:
# Lets begin a trial
realAnswer = random.choice(curatedWordList)
# print("====RealAnswer to reach is ", realAnswer)
possibleWords = set(curatedWordList)

for _ in range(10):
#     print("====possiblewordss", possibleWords)
#     curatedWordList
    entropies = calculateEntropies( possibleWords, wordPatternMap)
    print("set of possible words includes like:")
    print([sample_word[0] for sample_word in sorted(entropies.items(), key=lambda sample_word: -sample_word[1])[:10]])
    
#     print("==entropy[0]", entropies[0])
#   Pick max entropy word
    guessWord = random.choice([x[0] for x in sorted(entropies.items(), key=lambda x: -x[1])[:10]])
    print('Our suggestion for guess based on Entropy: ', guessWord)
    
    guessWordPatternOutput = [0, 0, 0, 0, 0]
    for i, l1 in enumerate(guessWord):
        guessWordPatternOutput[i] = int(l1 in realAnswer)

    for i, (l1, l2) in enumerate(zip(guessWord, realAnswer)):
        if l1 == l2:
            guessWordPatternOutput[i] = 2

    print('guessWordPatternOutput:', guessWordPatternOutput)
    if guessWord == realAnswer:
        print('WIN!')
        print()
        print()
        print()
        break
    print("wordPatternMap", wordPatternMap[guessWord])
    words = wordPatternMap[guessWord][tuple(guessWordPatternOutput)]
    print("====words", words)
    possibleWords = possibleWords.intersection(words)
    

====RealAnswer to reach is  bongo
====possiblewordss {'array', 'bagel', 'dealt', 'flare', 'facet', 'mucky', 'hazel', 'expel', 'feral', 'visit', 'vapor', 'shark', 'irony', 'smack', 'fishy', 'dandy', 'recut', 'tepid', 'dunce', 'could', 'alley', 'stoke', 'aloof', 'quack', 'swung', 'finch', 'pried', 'truce', 'raspy', 'glade', 'butte', 'lapse', 'baler', 'known', 'circa', 'bliss', 'panel', 'bleed', 'rhyme', 'creak', 'quick', 'mango', 'chill', 'fable', 'islet', 'erupt', 'quail', 'glean', 'zebra', 'pupil', 'rarer', 'mourn', 'award', 'shaft', 'spray', 'lemur', 'began', 'frail', 'stout', 'amber', 'aware', 'vowel', 'hutch', 'retch', 'miser', 'borne', 'nylon', 'learn', 'steal', 'fluid', 'piece', 'abyss', 'bring', 'chide', 'inept', 'begun', 'crank', 'acute', 'phone', 'fetal', 'knoll', 'extra', 'mummy', 'large', 'refer', 'onion', 'pupal', 'flick', 'chunk', 'idiom', 'afoul', 'sonar', 'maxim', 'spill', 'twang', 'sneak', 'wrote', 'pudgy', 'harsh', 'spool', 'octet', 'swamp', 'spelt', 'clock', 'older', '

100%|█████████████████████████████████████| 2315/2315 [00:00<00:00, 4383.40it/s]


set of possible words includes like:
['raise', 'slate', 'crate', 'irate', 'trace', 'arise', 'stare', 'snare', 'arose', 'least']
Our suggestion for guess based on Entropy:  raise
guessWordPatternOutput: [0, 0, 0, 0, 0]
wordPatternMap {(0, 0, 0, 0, 0): {'vouch', 'block', 'thump', 'flung', 'mucky', 'humph', 'hobby', 'couch', 'gummy', 'howdy', 'muddy', 'tough', 'thumb', 'lynch', 'flown', 'tooth', 'blond', 'toddy', 'could', 'ought', 'booth', 'plumb', 'doubt', 'funky', 'flunk', 'blunt', 'pulpy', 'touch', 'booty', 'dummy', 'would', 'bongo', 'mount', 'hunky', 'found', 'nutty', 'blood', 'notch', 'lunch', 'known', 'clump', 'golly', 'plump', 'month', 'pooch', 'coyly', 'hotly', 'putty', 'lobby', 'chock', 'pouch', 'junto', 'fluff', 'gulch', 'bully', 'bound', 'dutch', 'dodgy', 'folly', 'moody', 'motto', 'glyph', 'wooly', 'mogul', 'dolly', 'wound', 'jumbo', 'polyp', 'dough', 'bunny', 'goody', 'puppy', 'phony', 'quoth', 'boozy', 'cluck', 'knock', 'youth', 'moult', 'ghoul', 'holly', 'flock', 'nymph', '

100%|███████████████████████████████████████| 168/168 [00:00<00:00, 7190.90it/s]


set of possible words includes like:
['mulch', 'lunch', 'cloth', 'could', 'clung', 'clump', 'blunt', 'gulch', 'bunch', 'blond']
Our suggestion for guess based on Entropy:  gulch
guessWordPatternOutput: [1, 0, 0, 0, 0]
wordPatternMap {(0, 0, 0, 0, 0): {'tabby', 'array', 'skier', 'sorry', 'spoof', 'raven', 'baker', 'print', 'spook', 'taint', 'broom', 'snaky', 'smite', 'stove', 'visit', 'vapor', 'envoy', 'irony', 'ember', 'marry', 'snide', 'risky', 'proof', 'dandy', 'rainy', 'diner', 'tepid', 'snake', 'adept', 'tweet', 'retry', 'stoke', 'beefy', 'booty', 'fjord', 'front', 'pried', 'raspy', 'boxer', 'known', 'friar', 'brown', 'fairy', 'steer', 'media', 'fever', 'steep', 'bitty', 'rebar', 'arson', 'dairy', 'smoky', 'drown', 'timid', 'skimp', 'ratio', 'mayor', 'wordy', 'taste', 'tread', 'dirty', 'teddy', 'probe', 'ebony', 'motto', 'admit', 'swine', 'zebra', 'rarer', 'drive', 'karma', 'award', 'jazzy', 'kiosk', 'spray', 'abide', 'tepee', 'safer', 'video', 'abase', 'spade', 'debit', 'froze', '

100%|███████████████████████████████████████████| 4/4 [00:00<00:00, 7843.49it/s]


set of possible words includes like:
['pygmy', 'dodgy', 'foggy', 'bongo']
Our suggestion for guess based on Entropy:  dodgy
guessWordPatternOutput: [0, 2, 0, 2, 0]
wordPatternMap {(0, 0, 0, 0, 0): {'skier', 'raven', 'scamp', 'level', 'baker', 'bible', 'until', 'taint', 'facet', 'print', 'hazel', 'metal', 'tunic', 'expel', 'swath', 'humph', 'smite', 'cheek', 'utter', 'visit', 'feral', 'thrum', 'shark', 'think', 'crave', 'smack', 'ember', 'truck', 'ranch', 'snake', 'recut', 'spell', 'winch', 'tweet', 'chime', 'quack', 'evict', 'unzip', 'finch', 'antic', 'hater', 'truce', 'butte', 'chute', 'lapse', 'quest', 'baler', 'knack', 'friar', 'circa', 'fluke', 'steer', 'bliss', 'ample', 'panel', 'fever', 'aisle', 'steep', 'caulk', 'ankle', 'rebar', 'creak', 'quick', 'chill', 'fable', 'skimp', 'chair', 'islet', 'erupt', 'cumin', 'bench', 'quail', 'taste', 'clamp', 'chart', 'wreck', 'blush', 'slate', 'swine', 'revel', 'zebra', 'pupil', 'rarer', 'stuck', 'march', 'plume', 'karma', 'shaft', 'lemur', '

100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 6168.09it/s]

set of possible words includes like:
['bongo']
Our suggestion for guess based on Entropy:  bongo
guessWordPatternOutput: [2, 2, 2, 2, 2]
WIN!






