In [211]:
import string
import re
import pandas as pd

df = pd.read_csv('../Data-Preprocessed/word_freq.csv', index_col='Unnamed: 0')
df.head()

Unnamed: 0,word,wordFreq
667,AALII,0
668,AARON,0
669,ABACA,0
670,ABACK,0
671,ABAFF,0


In [196]:
wordle_word = 'HELLO'
'HELLO' in list(df['word'])

True

0 means not correct, 1 means correct letter + incorrect location, 2  means correct word + correct location

In [183]:
def compare(guess: str, actual: str):
    if len(guess) != len(actual):
        print('Lengths do not match!')
    comparison = [0] * len(actual)
    for i in range(len(actual)):
        if guess[i] == actual[i]:
            comparison[i] = 2
        elif guess[i] in actual:
            comparison[i] = 1
    return comparison

In [184]:
df.sample(n=10)

Unnamed: 0,word,wordFreq
4458,HECTE,0
8754,STOON,0
7399,REDIP,0
285,SCRUB,2093
2773,CROZE,0
2095,BUNDA,0
4315,GULFY,0
2928,DAVIT,0
5130,KILEH,0
6360,NOGAL,0


In [185]:
compare(
    guess = 'WHEEL'
    ,actual = wordle_word
)

[0, 1, 1, 1, 1]

In [186]:
guesses = {'ABACA':[0,0,0,0,0], 'HOMEY':[2,1,0,1,0], 'TRUSH':[0,0,0,0,1], 'WHEEL': [0,1,1,1,1]}

In [214]:
def add_known_info(guess: str, result: list, knowns: dict) -> dict:
    knowns_local = knowns.copy()
    for i in range(len(guess)):
        if guess[i] not in knowns_local['exact']:
            if result[i] == 2:
                knowns_local['exact'][i] = guess[i]
            elif result[i] == 1:
                knowns_local['inexact'][guess[i]][i] = guess[i]
            elif result[i] == 0:
                knowns_local['exclude'].add(guess[i])
    return knowns_local

In [215]:
knowns = {'exact':['.']*5, 'inexact':{}, 'exclude': set({})}
alphabet = list(string.ascii_uppercase)
for letter in alphabet:
    knowns['inexact'][letter] = ['.']*5

for guess, result in guesses.items():
    knowns = add_known_info(guess=guess, result=result, knowns=knowns)
knowns

{'exact': ['H', '.', '.', '.', '.'],
 'inexact': {'A': ['.', '.', '.', '.', '.'],
  'B': ['.', '.', '.', '.', '.'],
  'C': ['.', '.', '.', '.', '.'],
  'D': ['.', '.', '.', '.', '.'],
  'E': ['.', '.', 'E', 'E', '.'],
  'F': ['.', '.', '.', '.', '.'],
  'G': ['.', '.', '.', '.', '.'],
  'H': ['.', '.', '.', '.', '.'],
  'I': ['.', '.', '.', '.', '.'],
  'J': ['.', '.', '.', '.', '.'],
  'K': ['.', '.', '.', '.', '.'],
  'L': ['.', '.', '.', '.', 'L'],
  'M': ['.', '.', '.', '.', '.'],
  'N': ['.', '.', '.', '.', '.'],
  'O': ['.', 'O', '.', '.', '.'],
  'P': ['.', '.', '.', '.', '.'],
  'Q': ['.', '.', '.', '.', '.'],
  'R': ['.', '.', '.', '.', '.'],
  'S': ['.', '.', '.', '.', '.'],
  'T': ['.', '.', '.', '.', '.'],
  'U': ['.', '.', '.', '.', '.'],
  'V': ['.', '.', '.', '.', '.'],
  'W': ['.', '.', '.', '.', '.'],
  'X': ['.', '.', '.', '.', '.'],
  'Y': ['.', '.', '.', '.', '.'],
  'Z': ['.', '.', '.', '.', '.']},
 'exclude': {'A', 'B', 'C', 'M', 'R', 'S', 'T', 'U', 'W', 'Y'}}

In [219]:
def filter(knowns: dict, words: pd.DataFrame) -> pd.DataFrame:
    # Filter by exact matches
    regex = ''.join(knowns['exact'])
    filtered_data = words[words['word'].str.match(regex)]
    # Filter by exclude - must contain none of these
    mask = [False] * len(filtered_data)
    for exclusion in knowns['exclude']:
        mask = filtered_data['word'].str.contains(exclusion) | mask
    filtered_data = filtered_data[~mask]
    # Filter by inexact matches - must contain all
    mask = [True] * len(filtered_data)
    for inexact_match in knowns['inexact']:
        if re.search('[A-Z]', ''.join(knowns['inexact'][inexact_match])):
            mask = filtered_data['word'].str.contains(inexact_match) & mask
    filtered_data = filtered_data[mask]
    # Filter by inexact matches - must not contain at specific locations
    mask = [False] * len(filtered_data)
    for inexact_match in knowns['inexact']:
        if re.search('[A-Z]', ''.join(knowns['inexact'][inexact_match])):
            regex = ''.join(knowns['inexact'][inexact_match])
            mask = filtered_data['word'].str.match(regex) | mask
    filtered_data = filtered_data[~mask]
    return filtered_data

In [220]:
filtered_data = filter(knowns=knowns, words=df)
filtered_data

Unnamed: 0,word,wordFreq
4472,HELIO,0
4473,HELLO,0
4475,HELOE,0
