In [110]:
# TEST #1 FOR GRAY TILES:
import pandas as pd

df = pd.read_csv("~/Desktop/wordle_solver/data/valid_solutions.csv")

display(df)

Unnamed: 0,word
0,aback
1,abase
2,abate
3,abbey
4,abbot
...,...
2310,young
2311,youth
2312,zebra
2313,zesty


In [111]:
# In pandas .str is an accessor object that allows us to access string
# methods on a data frame containing string values.

# In our column, 'word', we can use the 'str' accessor to access string
# methods such as 'lower()' or 'contains()' on the 'word' column

print(type(df['word'].str))

df['word'] = df['word'].str.upper();
df['word'] = df['word'].str.lower();

display(df);

<class 'pandas.core.strings.accessor.StringMethods'>


Unnamed: 0,word
0,aback
1,abase
2,abate
3,abbey
4,abbot
...,...
2310,young
2311,youth
2312,zebra
2313,zesty


In [112]:
# In pandas, the ~ symbol is used as a logical NOT operator. It is used 
# to inverse a boolean statement.

In [113]:
letters = ""
# Grey Filter:
for letter in letters:
    df = df[~(df['word'].str.contains(letter))]

display(df)

Unnamed: 0,word
0,aback
1,abase
2,abate
3,abbey
4,abbot
...,...
2310,young
2311,youth
2312,zebra
2313,zesty


In [114]:
# Green Filter:

incompleteWord = '^al..y$'

# Method will filter out all the words without the GREEN letters (i.e. right letter, right position)
# Aquire words that have the letters in them (in the correct position)
df = df[df['word'].str.contains(incompleteWord, regex = True)]

display(df);

Unnamed: 0,word
55,allay
56,alley
59,alloy


In [115]:
# Yellow Tiles are a bit trickier...
# The issue comes in two parts:
#     i) Yellow tiles, mean that the character is in the word, but not at that index
#     ii) However, we do know that is guaranteed in the word at one of the other 4 spots.

In [116]:
df = pd.read_csv("../data/valid_solutions.csv")

userGuessInput = "wafer"
userTileInput = "YGYGY"
alphabet = "abcdefghijklmnopqrstuvwxyz"
yellow = list();
reg_expression = "^"

for i in range(5):
    if userTileInput[i] == "Y":
        alphabet = alphabet.replace(userGuessInput[i],'')
        reg_expression +='[{}]'.format(alphabet)
        alphabet = "abcdefghijklmnopqrstuvwxyz"
        yellow.append(userGuessInput[i])
    elif userTileInput[i] == "G":
        reg_expression += '.'
        df = df[~(df['word'].str.contains(userGuessInput[i]))]

reg_expression += "$"
print(reg_expression)

^[abcdefghijklmnopqrstuvxyz].[abcdeghijklmnopqrstuvwxyz].[abcdefghijklmnopqstuvwxyz]$


In [117]:
df = df[df['word'].str.contains(reg_expression, regex = True)]

display(df)

Unnamed: 0,word
193,biddy
194,bigot
196,billy
198,bingo
200,birch
...,...
2218,vomit
2220,vouch
2222,vying
2310,young


In [118]:
# The above code won't filter correctly, this is because we aren't accounting for the
# fact that we have to have that yellow tiled character somewhere in the word.
#
# This is where the yellow (list) comes into play
# We can add all our yellow characters to a list and then make sure that we have the same count of them.

# Essentially, the problem is that we are not guaranteeing a yellow we are just eliminating the possibility of a yellow at that index.

In [119]:
for i in yellow:
    df = df[df['word'].str.count(i) == yellow.count(i)]
df

Unnamed: 0,word
818,frown


In [109]:
# With this simple for loop, our code filters correctly for the yellow filter.

In [97]:
# We can now combine all our functions into a simple terminal based application.
#
# You can access the full source code here: https://github.com/shawnpradeep/wordle_solver
