# CSP Solver for Wordle game

### Useful links :
- [Medium article](https://medium.com/better-programming/beating-wordle-constraint-programming-ef0b0b6897fe#:~:text=Beating%20Wordle%3A%20Constraint%20Programming,Wordle%20solver%20do%20its%20thing)
- [Sample dataset (GitHub)](https://github.com/dwyl/english-words)


In [4]:
import pandas as pd
from collections import Counter
import itertools

# We import all the english words inside this dataset
total_words = pd.read_fwf("words_alpha.txt", names=["words"])

# Keep only the 5 letter words
words = total_words[total_words["words"].str.len() == 5]

# We convert the words into a list of integers (A -> 0, B -> 1, ..., Z -> 25)
words_data = []
for word in words["words"]:
    words_data.append([ord(c) - ord("a") for c in word])

words_data



Counter({0: 8393,
         4: 7803,
         18: 6537,
         14: 5219,
         17: 5145,
         8: 5067,
         11: 4247,
         19: 4189,
         13: 4044,
         20: 3361,
         3: 2813,
         2: 2745,
         24: 2523,
         12: 2494,
         15: 2299,
         7: 2284,
         1: 2091,
         6: 1971,
         10: 1743,
         5: 1238,
         22: 1171,
         21: 878,
         25: 474,
         9: 376,
         23: 361,
         16: 139})

In [3]:
from collections import defaultdict

# Track constraints:
# - Green: {position: required_letter}
# - Yellow: {letter: invalid_positions}
# - Gray: set of banned letters
green_constraints = {}
yellow_constraints = defaultdict(set)  # Letter -> positions it cannot be
gray_letters = set()



In [17]:
from ortools.sat.python import cp_model

model = cp_model.CpModel()
solver = cp_model.CpSolver()

# Create variables for each position (0-25)
position_vars = [model.NewIntVar(0, 25, f'pos_{i}') for i in range(5)]

# Add green constraints
for pos, char in green_constraints.items():
    model.Add(position_vars[pos] == char)

# Add yellow constraints: if a letter is yellow, it must be in the word but not in its current position
for char, invalid_positions in yellow_constraints.items():
    # Ensure the letter appears at least once
    model.Add(sum([position_vars[pos] == char for pos in range(5)]) >= 1)
    # Exclude invalid positions
    for pos in invalid_positions:
        model.Add(position_vars[pos] != char)

# Gray letters: exclude entirely
for char in gray_letters:
    for pos in range(5):
        model.Add(position_vars[pos] != char)


# We define the objective function (heuristic function)
objective = []
for pos in range(5):
    char_var = position_vars[pos]

    for char, freq in letter_frequency.items():
        # Create a Boolean variable: True iff char_var == char
        is_char = model.NewBoolVar(f'pos_{pos}_is_{char}')
        
        # Link is_char to (char_var == char)
        model.Add(char_var == char).OnlyEnforceIf(is_char)
        model.Add(char_var != char).OnlyEnforceIf(is_char.Not())

        objective.append(is_char * freq)

model.Maximize(sum(objective))




TypeError: CpModel.new_bool_var() takes 2 positional arguments but 3 were given

In [98]:
import random

# We initialise the word statistics for the solver
# (i.e. the positional frequency of each letter and the global letter frequency)
positional_freq = [defaultdict(int) for _ in range(5)]

for word in words_data:
    for pos in range(5):
        char = word[pos]
        positional_freq[pos][char] += 1

letter_frequency = Counter(itertools.chain.from_iterable(words_data))
# print(f"positional_freq: {positional_freq}")
# print(f"letter_frequency: {letter_frequency}")


def get_feedback(guess, target):
    feedback = []
    for g_char, t_char in zip(guess, target):
        if g_char == t_char:
            feedback.append('G')
        elif g_char in target:
            feedback.append('Y')
        else:
            feedback.append('B')
    return feedback


def update_model(model, position_vars, guess, feedback, gray_letters):
    letter_counts = defaultdict(int)
    for pos in range(5):
        char = guess[pos]
        fb = feedback[pos]
        if fb == 'G':
            model.Add(position_vars[pos] == char)
            # if letter_counts[char] > 0:
            #     letter_counts[char] -= 1

        elif fb == 'Y':
            model.Add(position_vars[pos] != char)
            letter_counts[char] += 1
        elif fb == 'B':
            gray_letters.add(char)
    
    # Enforce gray letters (exclude them entirely)
    for char in gray_letters:
        for p in range(5):
            model.Add(position_vars[p] != char)
    
    # Enforce minimum counts for Y/G letters
    for char, count in letter_counts.items():
        occurs = [model.NewBoolVar(f'occurs_{p}_{char}') for p in range(5)]
        for p in range(5):
            model.Add(position_vars[p] == char).OnlyEnforceIf(occurs[p])
            model.Add(position_vars[p] != char).OnlyEnforceIf(occurs[p].Not())
        model.Add(sum(occurs) >= count)


# Function that returns the word in the dataset that has the best score according to the heuristic function
def get_best_word(previous_feedback=[], previous_guess=[]):
    best_score = -1
    best_word = None
    for word in words_data:
        if previous_feedback and previous_guess:
            for i in range(5):
                if previous_feedback[i] == 'G':
                    if word[i] != previous_guess[i]:
                        break
                elif previous_feedback[i] == 'Y':
                    if word[i] == previous_guess[i] or previous_guess[i] not in word:
                        break
                else:
                    if previous_guess[i] in word:
                        break

        score = sum([10 * (positional_freq[pos][char] + 0.1 * letter_frequency[char]) for pos, char in enumerate(word)])
        if score > best_score:
            best_score = score
            best_word = word
    
    if best_word is not None:
        words_data.remove(best_word)
    return best_word


def solve_wordle(target_word, max_attempts=6):
    target_as_int = [ord(c) - ord('a') for c in target_word]
    model = cp_model.CpModel()

    position_vars = [model.NewIntVar(0, 25, f'pos_{i}') for i in range(5)]
    model.AddAllowedAssignments(position_vars, list(words_data))

    gray_letters = set()
    
    # Build the heuristic objective
    # objective = []
    # for pos in range(5):
    #     char_var = position_vars[pos]
    #     for char in range(26):
    #         is_char = model.NewBoolVar(f'pos_{pos}_char_{char}')
    #         print(f"test = {is_char}")
    #         model.Add(char_var == char).OnlyEnforceIf(is_char)
    #         model.Add(char_var != char).OnlyEnforceIf(is_char.Not())
    #         # Score = positional_freq + 0.1 * global_freq (scaled to integers)
    #         score = int(10 * (positional_freq[pos][char] + 0.1 * letter_frequency[char]))
    #         objective.append(is_char * score)
    # model.Maximize(sum(objective))
    status_dict = {
    cp_model.OPTIMAL: "OPTIMAL",
    cp_model.FEASIBLE: "FEASIBLE",
    cp_model.INFEASIBLE: "INFEASIBLE",
    cp_model.MODEL_INVALID: "MODEL_INVALID",
    cp_model.UNKNOWN: "UNKNOWN"
    }
    guess = []
    feedback = []
    
    for attempt in range(max_attempts):
        solver = cp_model.CpSolver()
        status = solver.Solve(model)
        
        print(f"status = {status_dict.get(status, 'UNKNOWN')}")
        if status == cp_model.OPTIMAL or status == cp_model.FEASIBLE:
            guess = [solver.Value(pos) for pos in position_vars]

            # guess = [solver.Value(pos) for pos in position_vars]
            guess_str = ''.join([chr(c + ord('a')) for c in guess])
            feedback = get_feedback(guess, target_as_int)
            print(f"Attempt {attempt+1}: {guess_str} → {feedback}")
            # print(f"{guess_str in words["words"].to_list()}")
            
            if feedback == ['G'] * 5:
                print(f"Solved {target_word} in {attempt+1} attempts!")
                return
            update_model(model, position_vars, guess, feedback, gray_letters)
        else:
            print("No valid guesses left!")
            return
    print(f"Failed to solve {target_word} in {max_attempts} attempts.")

# Example usage
random_word = random.choice(words["words"].to_list())
print(f"Random word: {random_word}")
solve_wordle(random_word, 100)


Random word: kenaf
status = OPTIMAL
Attempt 1: afley → ['Y', 'Y', 'B', 'Y', 'B']
status = OPTIMAL
Attempt 2: chafe → ['B', 'B', 'Y', 'Y', 'Y']
status = OPTIMAL
Attempt 3: freak → ['Y', 'B', 'Y', 'G', 'Y']
status = OPTIMAL
Attempt 4: kenaf → ['G', 'G', 'G', 'G', 'G']
Solved kenaf in 4 attempts!
