In [1]:
# task: 
# read wordle_all_guess_words.txt
# for a given solution word, create one of the following scenes: 
"""

⬛⬛🟩🟩🟩
⬛🟩🟩🟨🟨
⬛🟩🟩🟩🟩
⬛⬛🟩🟩🟩
⬛⬛🟩⬛🟩
🟨🟨🟨🟨🟨

⬛⬛🟨🟨🟨
⬛🟨🟨🟩🟩
⬛🟨🟨🟨🟨
⬛⬛🟨🟨🟨
⬛⬛🟨⬛🟨
🟩🟩🟩🟩🟩

⬛🟩🟩🟩⬛
🟩🟩🟨🟨⬛
🟩🟩🟩🟩⬛
⬛🟩🟩🟩⬛
⬛🟩⬛🟩⬛
🟨🟨🟨🟨🟨

⬛🟨🟨🟨⬛
🟨🟨🟩🟩⬛
🟨🟨🟨🟨⬛
⬛🟨🟨🟨⬛
⬛🟨⬛🟨⬛
🟩🟩🟩🟩🟩

"""
''


''

In [2]:
import pandas as pd

### old list prepared using Unix tools: 
odf = pd.read_csv ('wordle_all_guess_words.txt', header=0, names=['word'])

odf.describe()

In [3]:
# 2022-03-06 wordlist from https://www.nytimes.com/games/wordle/main.3d28ac0c.js

from urllib.request import urlopen

with urlopen("https://www.nytimes.com/games/wordle/main.3d28ac0c.js") as response: 
    rawcode = response.read()

# very structure-dependent (i.e. fragile) way to extract all valid answers and guess words into a dataframe:
allwords = rawcode.decode('UTF-8').replace('"', '').split('Ma=[')[1].split('],Ra')[0].replace('],Oa=[', ',')
wordlist = sorted(set(allwords.split(',')))
df=pd.DataFrame(wordlist, columns=['word'])

assert len(df) > 12900  # len is 12972 as of 2022-03-06

df.describe()

Unnamed: 0,word
count,12972
unique,12972
top,idols
freq,1


In [4]:
# example of how to select with a regex:  find 5 words in the dataframe that start with p:
df[df['word'].str.match('^p.*')== True].sample(5)

Unnamed: 0,word
7895,padle
8073,pedes
8518,prang
8324,pleas
8445,poove


In [5]:
# example using two regex: 
pattern="yyggn"  
realword="comma"
r1='[^c][^o]mm[^comma]'
df1 = df[df['word'].str.match(r1)== True]
r2='[comma][comma]...'
df2 = df1[df1['word'].str.match(r2)== True]
df2.head()

Unnamed: 0,word
6677,mammy


except "mammy" won't work for this pattern because it has 3 "m" characters and the two that are in the right spot 
are eaten by the yes "green" pattern. So we have to do a final check that there are enough letters in the real word to support the guess. 

In [6]:
def matchwords(df, realword, pattern): 
#    pattern="yyggn"  # yellow, yellow, green, green, neutral
#    realword="comma"
    r1=''
    r2=''
    for i in range(0,5): 
        if (pattern[i]) == 'g':   # green: this letter must be in the position of realword
            r1 = r1 + realword[i]
            r2 = r2 + '.'
        if (pattern[i]) == 'n':   # neutral: this letter cannot be anywhere in realword
            r1 = r1 + "[^" + realword + "]"
            r2 = r2 + '.'
        if (pattern[i]) == 'y':   # yellow: this is not the letter in realword, but the letter exists in realword
            r1 = r1 + "[" + realword + "]"
            r2 = r2 + "[^" + realword[i] + "]"
    
    # print ("pass 1 filter: " + r1)
    # print ("pass 2 filter: " + r2)
    
    # apply filters
    df1 = df[df['word'].str.match(r1)== True]
    df2 = df1[df1['word'].str.match(r2)== True]

    # df2 contains potential words, but 'mammy' won't work for 'comma' because of the dupe letters. 
    # so really we're going to have to check yellow characters carefully. 
    
    for guess in df2.word: 
        # print ("checking " + guess)
        guesslist = list(guess)
        reallist = list(realword)
        
        # first, remove the green letters.  Green is greediest. 
        for i in range(0,5): 
            if pattern[i] == 'g': 
                guesslist[i] = ' '
                reallist[i] = '.'
        
        # next, for each yellow letter, remove the first occurrence of that letter from realword
        for i in range(0,5): 
            if pattern[i] == 'y': 
                for j in range(0,5): 
                    if guesslist[i] == reallist[j]: 
                        guesslist[i] = ' '
                        reallist[j] = '.'
                        break   # we don't have to check and remove them all. just the first. 
        
        # last, remove the non-letters which we know aren't there
        for i in range(0,5): 
            if pattern[i] == 'n': 
                guesslist[i] = ' '
    
        # if any letters remain in guesslist, this word won't work. 
        # print ("guesslist: [%s]"%''.join(guesslist))

        for i in range(0,5): 
            if guesslist[i] != ' ': 
                # must remove guess from df2; it won't work. 
                df2 = df2[df2['word'].str.match(guess) == False]
                break
        
    return df2

In [7]:
# example:  I want a word that will make two yellow legs for the realword "comma"
matchwords(df, "comma", "nynyn").sample(5)

Unnamed: 0,word
11074,talon
5350,impot
8887,ranch
8961,razor
2633,datos


### Okay, I'm ready to make all my guys: 

In [10]:
realword = 'tires'
realword = 'scare'
realword = 'stair'
realword = 'story'

pictures = [
    # which looks better, the slightly rounded version? 
    ["nnggn", 
     "nngyy", 
     "ngggg",
     "ngggg",
     "nngng",
     "yyyyy"],
    # or the /r/place version?
    ["nnggg", 
     "nggyy", 
     "ngggg",
     "nnggg",
     "nngng",
     "yyyyy"],
#
     ["nnyyy", 
     "nyygg", 
     "nyyyy",
     "nnyyy",
     "nnyny",
     "ggggg"],

    ["ngggn", 
     "ggyyn", 
     "ggggn",
     "ngggn",
     "ngngn",
     "yyyyy"],
#
     ["nyyyn", 
     "yyggn", 
     "yyyyn",
     "nyyyn",
     "nynyn",
     "ggggg"],

#flip
     ["gggnn", 
     "yyggn", 
     "ggggn",
     "gggnn",
     "gngnn",
     "yyyyy"],
#
     ["yyynn", 
     "ggyyn", 
     "yyyyn",
     "yyynn",
     "ynynn",
     "ggggg"],

    ["ngggn", 
     "nyygg", 
     "ngggg",
     "ngggn",
     "ngngn",
     "yyyyy"],
#
     ["nyyyn", 
     "nggyy", 
     "nyyyy",
     "nyyyn",
     "nynyn",
     "ggggg"],


#endflip
   
    
]

for picture in pictures:
    for pattern in picture: 
#        candidates = matchwords(df, realword, pattern=pattern)
#        if (len(candidates) > 0):
#            guessword = str(candidates.sample().word.values[0])
#        else: 
#            guessword = '.....'
 
        candidates = ""
        while (len(candidates) == 0): 
            candidates = matchwords(df, realword, pattern=pattern)
            if (len(candidates) == 0): 
                for i in range(0,5): 
                    if pattern[i] != 'n':
                        pattern = pattern[0:i] + 'n' + pattern[i+1: ]
                        break
        guessword = str(candidates.sample().word.values[0])
        
# print the guessword with the pattern
        for i in range(0,5): 
            if pattern[i] == 'n': 
                print ('\033[1;37;40m' + guessword[i], end=' ')
            if pattern[i] == 'y': 
                print ('\033[1;37;43m' + guessword[i], end=' ')
            if pattern[i] == 'g': 
                print ('\033[1;37;42m' + guessword[i], end=' ')
        print ('\033[m')
    print ('------------')


[1;37;40mc [1;37;40mh [1;37;42mo [1;37;42mr [1;37;40me [m
[1;37;40mc [1;37;40ml [1;37;42mo [1;37;43my [1;37;43ms [m
[1;37;40mg [1;37;40ml [1;37;42mo [1;37;42mr [1;37;42my [m
[1;37;40mg [1;37;40ml [1;37;42mo [1;37;42mr [1;37;42my [m
[1;37;40mp [1;37;40me [1;37;42mo [1;37;40mn [1;37;42my [m
[1;37;43mr [1;37;43mo [1;37;43my [1;37;43ms [1;37;43mt [m
------------
[1;37;40mi [1;37;40mv [1;37;42mo [1;37;42mr [1;37;42my [m
[1;37;40mp [1;37;40mh [1;37;42mo [1;37;43mt [1;37;43ms [m
[1;37;40mg [1;37;40ml [1;37;42mo [1;37;42mr [1;37;42my [m
[1;37;40mi [1;37;40mv [1;37;42mo [1;37;42mr [1;37;42my [m
[1;37;40mp [1;37;40mh [1;37;42mo [1;37;40mn [1;37;42my [m
[1;37;43mt [1;37;43my [1;37;43mr [1;37;43mo [1;37;43ms [m
------------
[1;37;40mw [1;37;40mu [1;37;43mr [1;37;43ms [1;37;43mt [m
[1;37;40me [1;37;40mn [1;37;43mt [1;37;42mr [1;37;42my [m
[1;37;40mw [1;37;43mo [1;37;43mr [1;37;43ms [1;37;43mt [m
[1;37;40ma [