# Information Entropy

## 1. Understanding Information Entropy
**Information entropy**, introduced by Claude Shannon, _is a measure of the uncertainty or randomness in a set of possible outcomes_. In the context of Wordle, entropy quantifies the expected information you would gain from making a particular guess, based on how it partitions the remaining possible words.

### Why Use Entropy in Wordle?

In Wordle, each guess provides feedback that narrows down the list of possible answers. By choosing a word that maximizes the expected information gain (entropy), you can eliminate the largest number of potential words, leading you closer to the solution more efficiently.

**Key Concepts:**
- **Probability Distribution:** The likelihood of each possible outcome.
- **Expected Information Gain:** The average amount of information you expect to gain from a guess.

## 2. Applying Entropy to Wordle

**How to Calculate Entropy for a Guess**
1. Possible Outcomes: For each guess, consider all possible feedback patterns (e.g., positions of green, yellow, and gray letters).

2. Partitioning the Word List: Each feedback pattern partitions the remaining possible words into subsets. Words that would produce the same feedback form a group.

3. Calculating Probabilities: For each feedback pattern, calculate the probability that it will occur, based on the current list of possible answers.

4. Entropy Formula:
$$\large
\text { Entropy }=-\sum_i p_i \log _2 p_i
$$
where $p_i$ is the probability of the $i$-th feedback pattern.


In [20]:
import math
import numpy as np
from collections import defaultdict, Counter


class Wordle:
    def __init__(self):
        self.ANSWERS_PATH = "../wordle/wordle-answers.txt"
        self.POSSIBLE_WORDS_PATH = "../wordle/wordle-possible-words.txt"
        # self.words = ["about", "blink", "crane", "drove", "merge", "rayon"]

    def load_words(self, file_path: str) -> list:
        with open(file_path, "r") as file:
            content = file.read()
            words = content.split("\n")
        if len(words[-1]) == 0:  # remove empty word after last word from
            words.pop()
        return words

    def get_letter_positions(self, word_played: str, color_letters) -> np.array:
        return np.array(
            [
                index
                for index, letter in enumerate(word_played)
                if letter in color_letters
            ]
        )

    # Function to find words in list of possible words in Wordle
    def find_words(
        self,
        green_letters: str,
        green_letter_positions: list,
        yellow_letters: str,
        yellow_letter_positions: list,
        gray_letters: str,
        answer_word_list: bool = True,
    ) -> list:
        """
        Function that takes the green and yellow letters and their positions and returns a list of possible words
        Args:
            :param green_letters: string of green letters
            :param green_letter_positions: list of positions of green letters
            :param yellow_letters: string of yellow letters
            :param yellow_letter_positions: list of positions of yellow letters
            :param gray_letters: string of bad letters
            :param answer_word_list: boolean to check if the possible words are the answer words
        Returns:
            :return: list of possible words
        """
        # List of possible words
        possible_words = []
        final_words = []

        # Get positions of green and yellow letters
        # green_letter_positions = self.get_letter_positions(word_played, green_letters)
        # yellow_letter_positions = self.get_letter_positions(word_played, yellow_letters)

        # Subtract 1 from every value in green and yellow letter positions
        green_letter_positions = np.array(green_letter_positions) - 1
        yellow_letter_positions = np.array(yellow_letter_positions) - 1

        # Make all letters lower case
        green_letters = green_letters.lower()
        yellow_letters = yellow_letters.lower()
        gray_letters = gray_letters.lower()

        # Get list of words
        if answer_word_list:
            words = self.load_words(file_path=self.ANSWERS_PATH)
        else:
            words = self.load_words(file_path=self.POSSIBLE_WORDS_PATH)

        # Convert letters to sets for efficient checking
        set_yellow_letters = set(yellow_letters)
        set_bad_letters = set(gray_letters)

        possible_words = []
        # Iterate over each word
        for word in words:
            # Exclude words with bad letters
            if set_bad_letters & set(word):
                continue

            # Include words with yellow letters, but not at the specified positions
            if set_yellow_letters <= set(word) and all(
                word[pos] != yellow_letters[i]
                for i, pos in enumerate(yellow_letter_positions)
            ):
                possible_words.append(word)

        final_words = []
        # Filter words based on green letter positions
        if green_letters:
            for word in possible_words:
                if all(
                    word[pos] == green_letters[i]
                    for i, pos in enumerate(green_letter_positions)
                ):
                    final_words.append(word)
        else:
            final_words = possible_words
        self.words = final_words
        self.LEN_WORDS = len(self.words)
        return final_words

    def simulate_feedback_pattern(self, word_played: str) -> dict:
        # hash map of the feedback pattern
        feedback_patern = defaultdict(list)
        for word in self.words:
            for letter in word_played:
                if letter not in word:
                    feedback_patern[word].append("gray")
                elif letter in word and word_played.index(letter) != word.index(letter):
                    feedback_patern[word].append("yellow")
                elif letter in word_played and word_played.index(letter) == word.index(
                    letter
                ):
                    feedback_patern[word].append("green")
        return feedback_patern

    # calculate probabilities of feedback pattern
    def calculate_probabilities(self, feedback_patern) -> dict:
        # count the number of each feedback pattern
        list_counts = Counter(tuple(lst) for lst in feedback_patern.values())
        # calculate the probabilities of each feedback pattern
        probabilities = {}
        for key, value in list_counts.items():
            probabilities[key] = value / self.LEN_WORDS
        return probabilities

    # calculate the entropy of the probabilities
    def compute_entropy(self, probabilities: dict) -> float:
        entropy = 0
        for prob in probabilities.values():
            entropy += -prob * math.log2(prob)
        return entropy

    # compute entropy for the guess
    def compute_entropy_words(self):
        words_entropy = {}
        for word in self.words:
            feedback_patern = self.simulate_feedback_pattern(word)
            probabilities = self.calculate_probabilities(feedback_patern)
            entropy = self.compute_entropy(probabilities)
            words_entropy[word] = entropy
            # Order the words by entropy in descending order
            words_entropy = dict(
                sorted(words_entropy.items(), key=lambda item: item[1], reverse=True)
            )
            # Get top 10 words with the highest entropy
            top_words_entropy = dict(list(words_entropy.items())[:10])
        return top_words_entropy

In [23]:
import pprint

# Initialize Wordle class
wordle = Wordle()

# Main function that returns a list of possible words
green_letters = ""
green_letter_positions = []
yellow_letters = "elewels"
yellow_letter_positions = [5, 2, 4, 1, 2, 3, 4]
gray_letters = "crantrh"

# Find words in a list of possible words
words = wordle.find_words(
    green_letters,
    green_letter_positions,
    yellow_letters,
    yellow_letter_positions,
    gray_letters,
)
# Compute entropy of the guess
words_entropy = wordle.compute_entropy_words()
# Pretty print the words with their entropy
pprint.pprint(words_entropy, sort_dicts=False)

{'swell': 0.0}
