In [18]:
import pandas as pd
import itertools
import os
from tqdm import tqdm
from collections import defaultdict
import pickle

In [9]:
# Let grey is represented by 0, yellow by 1 and green by 2
# what could be possible input info of 5 letter word in wordle
allPatterns = list(itertools.product([0,1,2], repeat=5))
print(len(allPatterns))
# Total 3 power 5 that is 243 possible combination

243


In [10]:
#  Extracting all and curated word list from data folder
with open(r'words.txt', encoding='utf-8') as f:
    rawwordlelist = f.read()
rawwordlelist = rawwordlelist.split(",")
allWordList = []
for word in rawwordlelist:
    word = word.strip('"')
    allWordList.append(word)
print(len(allWordList))

12972


In [11]:
# First 2315 words of our list are most frequent wordle answers. so lets extract it too.
curatedWordList = allWordList[:2315]
print(len(curatedWordList))

2315


In [12]:
# Sanity check whether all words are of 5 length or not
errorMsg = 'All words are not of 5 letter each'
assert len({len(x) for x in allWordList}) == 1 and len(allWordList[0]) == 5, errorMsg

In [13]:
def convertWordToPattern(guessWord, realAnswer):
    """
    lets suppose if ans is midst and we guessed digit then we return (1, 2, 0, 1, 2)
    """
    pattern = [0, 0, 0 ,0 ,0]
    for i, x in enumerate(guessWord):
        pattern[i] = int(x in realAnswer)
    for i, (x1, x2) in enumerate(zip(guessWord, realAnswer)):
        if x1 == x2:
            pattern[i] = 2
    
    return tuple(pattern)

# lets test it
print(convertWordToPattern('digit', 'midst'))
    

(1, 2, 0, 1, 2)


In [14]:
def createPatternMap(wordList):
    """
    For every word in the wordlist with every possible pattern information we get from wordle game, 
    lets create a list of possible set of words.
    """
    wordPatternMap = defaultdict(lambda: defaultdict(set))
    for word in tqdm(wordList):
        for realAnswer in wordList:
            pattern = convertWordToPattern(word, wordList)
            wordPatternMap[word][pattern].add(realAnswer)
    return dict(wordPatternMap)


100%|███████████████████████████████████████| 2315/2315 [09:20<00:00,  4.13it/s]

2315





In [20]:
if 'wordPatternMap.p' not in os.listdir('.'):
#     Lets cache this patterns map so that we dont need to create it everytime
    wordPatternMap = createPatternMap(curatedWordList)
    pickle.dump(wordPatternMap, open('wordPatternMap.p', 'wb+'))
else:
    # loading patterns map from the cache file
    wordPatternMap = pickle.load(open('wordPatternMap.p', 'rb'))

2315
