In [86]:
import pandas as pd
import numpy as np

from tqdm import tqdm

In [87]:
df = pd.read_csv('dataset\\wordle.csv')
df[['1', '2', '3', '4', '5']] = df['word'].apply(lambda x: pd.Series(list(x)))

In [None]:
class MetaData:
    
    def __init__(self):
        self.correct_letters = {
            '1': None,
            '2': None,
            '3': None,
            '4': None,
            '5': None
        }
        self.wrong_spots = {
            '1': [],
            '2': [],
            '3': [],
            '4': [],
            '5': []
        }
        self.wrong_letters = []

In [93]:
def get_template():
    correct_letters = {
        '1': None,
        '2': None,
        '3': None,
        '4': None,
        '5': None
    }
    wrong_spots = {
        '1': [],
        '2': [],
        '3': [],
        '4': [],
        '5': []
    }
    wrong_letters = []
    return correct_letters, wrong_spots, wrong_letters

def get_possible_words(df, correct_letters, wrong_spots, wrong_letters):
    partial_letters = (list(correct_letters.values()) + [x[i] for x in wrong_spots.values() for i in range(len(x))])
    while None in partial_letters:
        partial_letters.remove(None)
    temp = df[df.apply(lambda row: 
        (
            (
                (correct_letters['1'] is None or row['1'] == correct_letters['1']) and
                (correct_letters['2'] is None or row['2'] == correct_letters['2']) and
                (correct_letters['3'] is None or row['3'] == correct_letters['3']) and
                (correct_letters['4'] is None or row['4'] == correct_letters['4']) and
                (correct_letters['5'] is None or row['5'] == correct_letters['5']) and
                all(letter in row['word'] for letter in partial_letters)           and 
                not any(letter in row['word'] for letter in wrong_letters)         and
                all(letter not in row['1'] for letter in wrong_spots['1'])         and
                all(letter not in row['2'] for letter in wrong_spots['2'])         and
                all(letter not in row['3'] for letter in wrong_spots['3'])         and
                all(letter not in row['4'] for letter in wrong_spots['4'])         and
                all(letter not in row['5'] for letter in wrong_spots['5'])
            )
        )
        ,axis=1
    )]
    return temp['word'].values

def validate_guess(guess_word, comp_word, correct_letters, wrong_spots, wrong_letters):
    _score = 0
    for idx, letter in enumerate(guess_word):
        if letter in comp_word:
            if guess_word[idx] == comp_word[idx]:
                correct_letters[f'{idx+1}'] = letter
                _score += 10
            else:
                wrong_spots[f'{idx+1}'].append(letter)
                _score += 5
        else:
            wrong_letters.append(letter)
            _score -= 1
    return _score, correct_letters, wrong_spots, wrong_letters

In [154]:
possible_words = df['word'].values
correct_word = np.random.choice(possible_words)
guess_word = 'salet'
_, correct_letters, wrong_spots, wrong_letters = validate_guess(guess_word, correct_word, *get_template())
filtered_words = get_possible_words(df, correct_letters, wrong_spots, wrong_letters)

while(guess_word != correct_word):
    best_guess = None
    best_score = np.NINF
    print(f'Guess: {guess_word} | Filter Length: {len(filtered_words)}')
    for candidate_guess in filtered_words:
        for comp_word in possible_words:
            score, _, _, _ = validate_guess(candidate_guess, comp_word, *get_template())
        if score > best_score:
            best_score = score
            best_guess = candidate_guess
    
    guess_word = best_guess
    _, correct_letters, wrong_spots, wrong_letters = validate_guess(guess_word, correct_word, correct_letters, wrong_spots, wrong_letters)
    filtered_words = get_possible_words(df, correct_letters, wrong_spots, wrong_letters)

print(f'Correct Word: {correct_word}')

Guess: salet | Filter Length: 85
Guess: antis | Filter Length: 37
Guess: kyats | Filter Length: 17
Guess: chats | Filter Length: 12
Guess: moats | Filter Length: 8
Guess: brats | Filter Length: 4
Guess: drats | Filter Length: 3
Correct Word: frats


In [148]:
guess_word = 'semen'
_, correct_letters, wrong_spots, wrong_letters = validate_guess(guess_word, correct_word, correct_letters, wrong_spots, wrong_letters)
filtered_words = get_possible_words(df, correct_letters, wrong_spots, wrong_letters)

In [150]:
best_guess

'semes'

In [151]:
correct_word

'semes'