# Codenames AI: Spymaster & Guesser — SOLUTION

Embedding-based approach using `sentence-transformers` with greedy vectorized Spymaster.

In [None]:
import nltk
import random
import numpy as np
import tkinter as tk
from enum import Enum
from dataclasses import dataclass, field
from collections import Counter

nltk.download("words", quiet=True)
nltk.download("wordnet", quiet=True)
nltk.download("omw-1.4", quiet=True)

from nltk.corpus import words, wordnet

## Part 1: Board Generation

In [None]:
from functools import cache
import random
from random import randrange, shuffle


class CardRole(Enum):
    RED = "red"
    BLUE = "blue"
    BYSTANDER = "bystander"
    ASSASSIN = "assassin"


@dataclass
class Card:
    word: str
    role: CardRole
    revealed: bool = False


@cache
def get_candidate_words() -> list[str]:
    seen = set()
    candidates = []
    for w in words.words():
        w = w.lower()
        if (
            w not in seen
            and w.isalpha()
            and w.islower()
            and 4 <= len(w) <= 8
            and len(wordnet.synsets(w)) > 5
        ):
            seen.add(w)
            candidates.append(w)
    return sorted(candidates)


def generate_board(seed=42) -> list[Card]:
    random.seed(seed)
    stack = (
        [CardRole.RED] * 9
        + [CardRole.BLUE] * 8
        + [CardRole.BYSTANDER] * 7
        + [CardRole.ASSASSIN]
    )
    shuffle(stack)
    word_list = get_candidate_words()
    cards = []
    while stack:
        word = word_list.pop(randrange(len(word_list)))
        card = Card(word=word, role=stack.pop(), revealed=False)
        cards.append(card)
    return cards

In [None]:
# Test Part 1
candidates = get_candidate_words()
print(f"Candidate pool: {len(candidates)} words")
assert len(candidates) > 1000, "Should have a large candidate pool"
assert all(w.isalpha() and w.islower() for w in candidates)
assert all(len(wordnet.synsets(w)) > 0 for w in candidates[:50])
print(f"Sample: {candidates[:10]}")

board = generate_board(seed=42)
assert len(board) == 25
role_counts = Counter(c.role for c in board)
assert role_counts[CardRole.RED] == 9
assert role_counts[CardRole.BLUE] == 8
assert role_counts[CardRole.BYSTANDER] == 7
assert role_counts[CardRole.ASSASSIN] == 1
assert len(set(c.word for c in board)) == 25
assert all(not c.revealed for c in board)
print(f"\nBoard words: {[c.word for c in board]}")
print(f"Role distribution: {dict(role_counts)}")
print("✓ Part 1 passed")

## Part 2: Embedding Engine

In [None]:
from sentence_transformers import SentenceTransformer


class EmbeddingEngine:
    """
    Embeds words using sentence-transformers and computes cosine similarities.

    Precomputes:
    - board_embeddings: (25, D) matrix of board word embeddings
    - vocab_embeddings: (V, D) matrix of candidate clue word embeddings
    - vocab_to_board_sims: (V, 25) precomputed cosine similarity matrix
    """

    def __init__(self, board_words: list[str], vocab_words: list[str], model_name: str = "all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)
        self.board_words = board_words
        self.vocab_words = vocab_words

        self.board_word_to_idx: dict[str, int] = {w: i for i, w in enumerate(board_words)}
        self.vocab_word_to_idx: dict[str, int] = {w: i for i, w in enumerate(vocab_words)}

        self.board_embeddings = self.model.encode(board_words, normalize_embeddings=True)  # (25, D)
        self.vocab_embeddings = self.model.encode(vocab_words, normalize_embeddings=True, batch_size=256)  # (V, D)

        # Since embeddings are L2-normalized, cosine sim = dot product
        self.vocab_to_board_sims: np.ndarray = self.vocab_embeddings @ self.board_embeddings.T  # (V, 25)
        self.board_to_board_sims: np.ndarray = self.board_embeddings @ self.board_embeddings.T  # (25, 25)

    def similarity(self, word1: str, word2: str) -> float:
        """Cosine similarity between two board words."""
        i, j = self.board_word_to_idx[word1], self.board_word_to_idx[word2]
        return float(self.board_to_board_sims[i, j])

    def clue_to_board_sims(self, clue_word: str) -> dict[str, float]:
        """
        Get similarities from a clue word to all board words.
        Uses precomputed matrix if in vocab, otherwise encodes on the fly.
        """
        if clue_word in self.vocab_word_to_idx:
            sims = self.vocab_to_board_sims[self.vocab_word_to_idx[clue_word], :]  # (25,)
        else:
            clue_emb = self.model.encode([clue_word], normalize_embeddings=True)[0]  # (D,)
            sims = clue_emb @ self.board_embeddings.T  # (25,)
        return {w: float(s) for w, s in zip(self.board_words, sims)}

    def get_vocab_sims_to_board_indices(self, board_indices: list[int]) -> np.ndarray:
        return self.vocab_to_board_sims[:, board_indices]

In [None]:
# Test Part 2
board = generate_board(seed=42)
board_words = [c.word for c in board]
vocab_words = [w for w in get_candidate_words() if w not in set(board_words)]

engine = EmbeddingEngine(board_words, vocab_words)

assert engine.board_embeddings.shape == (25, 384), f"Expected (25, 384), got {engine.board_embeddings.shape}"
assert engine.vocab_to_board_sims.shape == (len(vocab_words), 25)
assert engine.board_to_board_sims.shape == (25, 25)

for w in board_words[:3]:
    assert engine.similarity(w, w) > 0.99, f"Self-similarity of '{w}' should be ~1.0"

w1, w2 = board_words[0], board_words[1]
assert abs(engine.similarity(w1, w2) - engine.similarity(w2, w1)) < 1e-6

sims = engine.clue_to_board_sims(vocab_words[0])
assert isinstance(sims, dict) and len(sims) == 25

sims_novel = engine.clue_to_board_sims("elephant")
assert len(sims_novel) == 25

print(f"Board: {board_words[:5]}...")
print(f"Vocab size: {len(vocab_words)}")
print(f"Embedding dim: {engine.board_embeddings.shape[1]}")
print(f"Sample similarity ({w1}, {w2}): {engine.similarity(w1, w2):.3f}")
print("✓ Part 2 passed")

## Part 3: Game State

In [None]:
class Team(Enum):
    RED = "red"
    BLUE = "blue"


@dataclass
class GuessResult:
    word: str
    role: CardRole
    correct: bool
    game_over: bool
    assassin: bool


@dataclass
class Clue:
    word: str
    count: int


class GameState:
    def __init__(self, board: list[Card], starting_team: Team = Team.RED):
        self.board = board
        self.current_team = starting_team
        self.game_over = False
        self.winner: Team | None = None
        self.turn_history: list[tuple[Team, Clue, list[GuessResult]]] = []

    def get_unrevealed(self, role: CardRole | None = None) -> list[Card]:
        return [c for c in self.board if not c.revealed and (role is None or c.role == role)]

    def get_team_role(self, team: Team) -> CardRole:
        return CardRole.RED if team == Team.RED else CardRole.BLUE

    def get_opponent_role(self, team: Team) -> CardRole:
        return CardRole.BLUE if team == Team.RED else CardRole.RED

    def _check_win(self) -> Team | None:
        if not self.get_unrevealed(CardRole.RED):
            return Team.RED
        if not self.get_unrevealed(CardRole.BLUE):
            return Team.BLUE
        return None

    def make_guess(self, word: str) -> GuessResult:
        card = None
        for c in self.board:
            if c.word.lower() == word.lower():
                card = c
                break
        if card is None:
            raise ValueError(f"Word {word} is not found on the board")
        if card.revealed:
            raise ValueError(f"Word {word} is already revealed")

        card.revealed = True
        team_role = self.get_team_role(self.current_team)

        if card.role == CardRole.ASSASSIN:
            self.game_over = True
            self.winner = Team.BLUE if self.current_team == Team.RED else Team.RED
            return GuessResult(word=card.word, role=card.role, correct=False, game_over=True, assassin=True)

        correct = card.role == team_role
        winner = self._check_win()
        game_over = winner is not None
        if game_over:
            self.game_over = True
            self.winner = winner

        return GuessResult(word=word, role=card.role, correct=correct, game_over=game_over, assassin=False)

    def end_turn(self):
        self.current_team = Team.BLUE if self.current_team == Team.RED else Team.RED

    def remaining_count(self, team: Team) -> int:
        role = self.get_team_role(team)
        return len(self.get_unrevealed(role))

In [None]:
# Test Part 3
board = generate_board(seed=42)
game = GameState(board)

assert not game.game_over
assert game.current_team == Team.RED
assert game.remaining_count(Team.RED) == 9
assert game.remaining_count(Team.BLUE) == 8
assert len(game.get_unrevealed()) == 25

red_cards = game.get_unrevealed(CardRole.RED)
result = game.make_guess(red_cards[0].word)
assert result.correct and not result.game_over and not result.assassin
assert game.remaining_count(Team.RED) == 8

bystander_cards = game.get_unrevealed(CardRole.BYSTANDER)
result = game.make_guess(bystander_cards[0].word)
assert not result.correct and not result.game_over

game.end_turn()
assert game.current_team == Team.BLUE

assassin_card = game.get_unrevealed(CardRole.ASSASSIN)
if assassin_card:
    result = game.make_guess(assassin_card[0].word)
    assert result.assassin and result.game_over
    assert game.game_over and game.winner == Team.RED

try:
    game.make_guess(red_cards[0].word)
    assert False, "Should have raised ValueError"
except ValueError:
    pass

print("✓ Part 3 passed")

## Part 4: Guesser AI

In [None]:
class Guesser:
    """
    AI Guesser that interprets clues using embedding similarity.
    """

    def __init__(self, engine: EmbeddingEngine, min_threshold: float = 0.15):
        self.engine = engine
        self.min_threshold = min_threshold

    def rank_words(self, clue_word: str, unrevealed_words: list[str]) -> list[tuple[str, float]]:
        """Rank unrevealed words by cosine similarity to the clue word, descending."""
        sims = self.engine.clue_to_board_sims(clue_word)
        filtered = [(w, sims[w]) for w in unrevealed_words if w in sims]
        return sorted(filtered, key=lambda x: x[1], reverse=True)

    def make_guesses(self, clue: Clue, unrevealed_words: list[str]) -> list[str]:
        """Pick up to clue.count words that exceed min_threshold."""
        ranking = self.rank_words(clue.word, unrevealed_words)
        guesses = []
        for word, sim in ranking:
            if len(guesses) >= clue.count:
                break
            if sim < self.min_threshold:
                break
            guesses.append(word)
        return guesses

In [None]:
# Test Part 4
guesser = Guesser(engine)

unrevealed = list(engine.board_words)
ranking = guesser.rank_words("danger", unrevealed)
assert len(ranking) == len(unrevealed)
assert all(isinstance(r, tuple) and len(r) == 2 for r in ranking)
sims = [s for _, s in ranking]
assert sims == sorted(sims, reverse=True), "Should be sorted descending"
print("Rankings for clue 'danger':")
for word, sim in ranking[:5]:
    print(f"  {word}: {sim:.3f}")

guesses = guesser.make_guesses(Clue("danger", 2), unrevealed)
assert len(guesses) <= 2
assert all(g in unrevealed for g in guesses)
print(f"\nGuesses for 'danger 2': {guesses}")

guesses_obscure = guesser.make_guesses(Clue("quasar", 3), unrevealed)
print(f"Guesses for 'quasar 3': {guesses_obscure} (may be fewer due to threshold)")

print("✓ Part 4 passed")

## Part 5: Spymaster AI

Greedy vectorized approach: for each vocab word, sort team words by similarity, try top-1/2/3, score with margin formula.

In [None]:
@dataclass
class ScoredClue:
    word: str
    count: int
    score: float
    targets: list[str]
    target_sims: list[float]
    max_opponent_sim: float
    assassin_sim: float


class Spymaster:
    """
    AI Spymaster that generates clues using embedding similarity.

    Greedy approach: for each vocab word, sort team words by similarity,
    try targeting top-1/2/3, score with margin formula, pick the best.
    """

    def __init__(
        self,
        engine: EmbeddingEngine,
        opponent_penalty: float = 1.5,
        assassin_penalty: float = 3.0,
        bystander_penalty: float = 0.5,
        count_bonus: float = 0.05,
    ):
        self.engine = engine
        self.opponent_penalty = opponent_penalty
        self.assassin_penalty = assassin_penalty
        self.bystander_penalty = bystander_penalty
        self.count_bonus = count_bonus

    def score_all_vocab(
        self,
        team_indices: list[int],
        opponent_indices: list[int],
        assassin_index: int,
        bystander_indices: list[int],
        count: int,
    ) -> tuple[np.ndarray, np.ndarray]:
        """
        Score all vocab words for a given target count. Fully vectorized.

        For each vocab word, targets are the `count` most-similar team words.
        Weakest link = the count-th highest similarity.

        Returns:
            (scores, weakest_link_sims): both shape (V,)
        """
        sims = self.engine.vocab_to_board_sims  # (V, 25)

        # Team sims sorted descending per vocab word: (V, n_team)
        team_sims = sims[:, team_indices]  # (V, n_team)
        team_sims_sorted = np.sort(team_sims, axis=1)[:, ::-1]  # descending

        # Weakest link for this count = column (count-1)
        weakest_link = team_sims_sorted[:, count - 1]  # (V,)

        # Opponent danger: max sim to any opponent word
        if opponent_indices:
            max_opp_sim = sims[:, opponent_indices].max(axis=1)  # (V,)
        else:
            max_opp_sim = np.zeros(len(self.engine.vocab_words))

        # Assassin danger
        assassin_sim = sims[:, assassin_index]  # (V,)

        # Bystander danger
        if bystander_indices:
            max_by_sim = sims[:, bystander_indices].max(axis=1)  # (V,)
        else:
            max_by_sim = np.zeros(len(self.engine.vocab_words))

        scores = (
            weakest_link
            - self.opponent_penalty * max_opp_sim
            - self.assassin_penalty * assassin_sim
            - self.bystander_penalty * max_by_sim
            + self.count_bonus * (count - 1)
        )

        return scores, weakest_link

    def generate_clue(self, game: GameState, team: Team) -> ScoredClue:
        """
        Generate the best clue for the given team.

        Try count=3, 2, 1. For each, score all vocab words. Pick the best overall.
        """
        team_role = game.get_team_role(team)
        opp_role = game.get_opponent_role(team)

        team_cards = game.get_unrevealed(team_role)
        opp_cards = game.get_unrevealed(opp_role)
        assassin_cards = game.get_unrevealed(CardRole.ASSASSIN)
        bystander_cards = game.get_unrevealed(CardRole.BYSTANDER)

        team_indices = [self.engine.board_word_to_idx[c.word] for c in team_cards]
        opp_indices = [self.engine.board_word_to_idx[c.word] for c in opp_cards]
        assassin_index = self.engine.board_word_to_idx[assassin_cards[0].word] if assassin_cards else 0
        bystander_indices = [self.engine.board_word_to_idx[c.word] for c in bystander_cards]

        best_score = float("-inf")
        best_vocab_idx = 0
        best_count = 1

        max_count = min(3, len(team_indices))
        for count in range(max_count, 0, -1):
            scores, _ = self.score_all_vocab(
                team_indices, opp_indices, assassin_index, bystander_indices, count
            )
            top_idx = int(np.argmax(scores))
            if scores[top_idx] > best_score:
                best_score = scores[top_idx]
                best_vocab_idx = top_idx
                best_count = count

        # Build the ScoredClue with target details
        clue_word = self.engine.vocab_words[best_vocab_idx]
        clue_sims_to_team = self.engine.vocab_to_board_sims[best_vocab_idx, team_indices]  # (n_team,)

        # Get the top-`best_count` team words by sim
        top_team_order = np.argsort(clue_sims_to_team)[::-1][:best_count]
        targets = [team_cards[i].word for i in top_team_order]
        target_sims = [float(clue_sims_to_team[i]) for i in top_team_order]

        # Opponent and assassin info
        if opp_indices:
            max_opp = float(self.engine.vocab_to_board_sims[best_vocab_idx, opp_indices].max())
        else:
            max_opp = 0.0
        assassin_sim = float(self.engine.vocab_to_board_sims[best_vocab_idx, assassin_index])

        return ScoredClue(
            word=clue_word,
            count=best_count,
            score=best_score,
            targets=targets,
            target_sims=target_sims,
            max_opponent_sim=max_opp,
            assassin_sim=assassin_sim,
        )

In [None]:
# Test Part 5
spymaster = Spymaster(engine)

board = generate_board(seed=42)
game = GameState(board)
team_cards = game.get_unrevealed(CardRole.RED)
team_indices = [engine.board_word_to_idx[c.word] for c in team_cards]
opp_cards = game.get_unrevealed(CardRole.BLUE)
assassin_cards = game.get_unrevealed(CardRole.ASSASSIN)
bystander_cards = game.get_unrevealed(CardRole.BYSTANDER)
opp_indices = [engine.board_word_to_idx[c.word] for c in opp_cards]
assassin_index = engine.board_word_to_idx[assassin_cards[0].word]
bystander_indices = [engine.board_word_to_idx[c.word] for c in bystander_cards]

print(f"Team words: {[c.word for c in team_cards]}")
print(f"Opponent words: {[c.word for c in opp_cards]}")
print(f"Assassin: {assassin_cards[0].word}")

# Test score_all_vocab for count=2
scores, weakest_sims = spymaster.score_all_vocab(
    team_indices, opp_indices, assassin_index, bystander_indices, count=2
)
assert scores.shape == (len(engine.vocab_words),)
assert weakest_sims.shape == scores.shape
best_idx = np.argmax(scores)
print(f"\nBest 2-word clue: '{engine.vocab_words[best_idx]}' (score={scores[best_idx]:.3f}, weakest_link={weakest_sims[best_idx]:.3f})")

# Test full clue generation
clue = spymaster.generate_clue(game, Team.RED)
assert isinstance(clue, ScoredClue)
assert clue.count >= 1
assert clue.count == len(clue.targets)
assert clue.word.lower() not in {c.word for c in board}, "Clue must not be a board word"
print(f"\nGenerated clue: '{clue.word}' for {clue.count}")
print(f"Targets: {clue.targets}")
print(f"Target similarities: {[f'{s:.3f}' for s in clue.target_sims]}")
print(f"Max opponent sim: {clue.max_opponent_sim:.3f}")
print(f"Assassin sim: {clue.assassin_sim:.3f}")
print(f"Score: {clue.score:.3f}")
print("✓ Part 5 passed")

## Part 6: Game Loop

In [None]:
def play_game(seed: int = 42, verbose: bool = True, max_turns: int = 30) -> GameState:
    """
    Run a full game of Codenames with AI players.
    """
    board = generate_board(seed=seed)
    board_words = [c.word for c in board]
    vocab_words = [w for w in get_candidate_words() if w not in set(board_words)]
    engine = EmbeddingEngine(board_words, vocab_words)
    spy = Spymaster(engine)
    guess = Guesser(engine)
    game = GameState(board)

    if verbose:
        print("=" * 60)
        print("CODENAMES — AI vs AI")
        print("=" * 60)
        print("\nBoard:")
        for i in range(0, 25, 5):
            row = board[i : i + 5]
            print("  ".join(f"{c.word:>10}({c.role.value[0].upper()})" for c in row))
        print()

    turn = 0
    while not game.game_over and turn < max_turns:
        team = game.current_team
        turn += 1

        if verbose:
            print(
                f"--- Turn {turn}: {team.value.upper()} "
                f"(Red: {game.remaining_count(Team.RED)}, "
                f"Blue: {game.remaining_count(Team.BLUE)}) ---"
            )

        # Spymaster generates clue
        clue = spy.generate_clue(game, team)
        if verbose:
            print(
                f"  Spymaster: '{clue.word}' for {clue.count} "
                f"(targets: {clue.targets}, score: {clue.score:.3f})"
            )

        # Guesser makes guesses
        unrevealed_words = [c.word for c in game.get_unrevealed()]
        guesses = guess.make_guesses(Clue(clue.word, clue.count), unrevealed_words)

        turn_results = []
        for g in guesses:
            if game.game_over:
                break
            result = game.make_guess(g)
            turn_results.append(result)
            if verbose:
                status = "✓" if result.correct else ("☠" if result.assassin else "✗")
                print(f"  Guess: {g} → {result.role.value} {status}")
            if not result.correct:
                break

        game.turn_history.append((team, Clue(clue.word, clue.count), turn_results))

        if not game.game_over:
            game.end_turn()

    if verbose:
        print(f"\n{'=' * 60}")
        if game.winner:
            print(f"WINNER: {game.winner.value.upper()}!")
        else:
            print("Game ended (max turns reached)")
        print(f"Turns played: {len(game.turn_history)}")

    # If max_turns hit without a winner, declare based on remaining
    if not game.game_over:
        game.game_over = True
        red_rem = game.remaining_count(Team.RED)
        blue_rem = game.remaining_count(Team.BLUE)
        game.winner = Team.RED if red_rem <= blue_rem else Team.BLUE

    return game

In [None]:
# Test Part 6
game = play_game(seed=42, verbose=True)
assert game.game_over, "Game should have ended"
assert game.winner is not None, "There should be a winner"
assert len(game.turn_history) > 0, "Should have at least one turn"
print(f"\nGame ended after {len(game.turn_history)} turns")
print(f"Winner: {game.winner.value}")
print("✓ Part 6 passed")

## Part 7: tkinter GUI

In [None]:
COLORS = {
    CardRole.RED: {"bg": "#e74c3c", "fg": "white", "unrevealed": "#f5b7b1"},
    CardRole.BLUE: {"bg": "#3498db", "fg": "white", "unrevealed": "#aed6f1"},
    CardRole.BYSTANDER: {"bg": "#f5deb3", "fg": "black", "unrevealed": "#fef9e7"},
    CardRole.ASSASSIN: {"bg": "#2c3e50", "fg": "white", "unrevealed": "#d5d8dc"},
}
HIDDEN_COLOR = {"bg": "#ecf0f1", "fg": "black"}


class CodenamesGUI:
    def __init__(self, game: GameState, spymaster: Spymaster, guesser: Guesser, spymaster_view: bool = False):
        self.game = game
        self.spymaster = spymaster
        self.guesser = guesser
        self.spymaster_view = spymaster_view
        self.current_clue: ScoredClue | None = None
        self.guesses_remaining = 0

        self.root = tk.Tk()
        self.root.title("Codenames AI")
        self.buttons: list[tk.Button] = []
        self._build_ui()

    def _build_ui(self):
        self.status_var = tk.StringVar(value="Generating clue...")
        tk.Label(self.root, textvariable=self.status_var, font=("Helvetica", 14), pady=10).pack()

        self.score_var = tk.StringVar()
        tk.Label(self.root, textvariable=self.score_var, font=("Helvetica", 12), pady=5).pack()

        grid_frame = tk.Frame(self.root, padx=10, pady=10)
        grid_frame.pack()

        for i, card in enumerate(self.game.board):
            row, col = divmod(i, 5)
            btn = tk.Button(
                grid_frame, text=card.word, width=12, height=3, font=("Helvetica", 11),
                command=lambda idx=i: self._on_card_click(idx),
            )
            btn.grid(row=row, column=col, padx=3, pady=3)
            self.buttons.append(btn)

        ctrl_frame = tk.Frame(self.root, pady=10)
        ctrl_frame.pack()
        tk.Button(ctrl_frame, text="New Clue", command=self._generate_clue).pack(side=tk.LEFT, padx=5)
        tk.Button(ctrl_frame, text="AI Guess", command=self._ai_guess).pack(side=tk.LEFT, padx=5)
        tk.Button(ctrl_frame, text="Toggle View", command=self._toggle_view).pack(side=tk.LEFT, padx=5)

        self._update_board()

    def _get_card_colors(self, card: Card) -> dict:
        if card.revealed:
            c = COLORS[card.role]
            return {"bg": c["bg"], "fg": c["fg"]}
        elif self.spymaster_view:
            c = COLORS[card.role]
            return {"bg": c["unrevealed"], "fg": "black"}
        else:
            return HIDDEN_COLOR

    def _update_board(self):
        for i, card in enumerate(self.game.board):
            colors = self._get_card_colors(card)
            self.buttons[i].configure(bg=colors["bg"], fg=colors["fg"], state=tk.DISABLED if card.revealed else tk.NORMAL)
        view = "SPYMASTER" if self.spymaster_view else "GUESSER"
        self.score_var.set(f"Red: {self.game.remaining_count(Team.RED)} | Blue: {self.game.remaining_count(Team.BLUE)} | View: {view}")

    def _on_card_click(self, index: int):
        if self.game.game_over:
            return
        card = self.game.board[index]
        if card.revealed:
            return

        result = self.game.make_guess(card.word)
        self._update_board()

        if result.game_over:
            winner = self.game.winner.value.upper() if self.game.winner else "??"
            reason = "ASSASSIN!" if result.assassin else "All words found!"
            self.status_var.set(f"GAME OVER — {winner} wins! ({reason})")
        elif not result.correct:
            self.game.end_turn()
            self.status_var.set(f"Wrong! ({result.role.value}) — {self.game.current_team.value.upper()}'s turn. Click 'New Clue'.")
        else:
            self.guesses_remaining -= 1
            if self.guesses_remaining <= 0:
                self.game.end_turn()
                self.status_var.set(f"All guesses used — {self.game.current_team.value.upper()}'s turn. Click 'New Clue'.")
            else:
                self.status_var.set(f"Correct! Clue: '{self.current_clue.word}' — {self.guesses_remaining} guesses left")

    def _generate_clue(self):
        if self.game.game_over:
            return
        self.status_var.set("Thinking...")
        self.root.update()
        clue = self.spymaster.generate_clue(self.game, self.game.current_team)
        self.current_clue = clue
        self.guesses_remaining = clue.count
        self.status_var.set(f"{self.game.current_team.value.upper()} Spymaster: '{clue.word}' for {clue.count}")

    def _ai_guess(self):
        if self.game.game_over or self.current_clue is None:
            return
        unrevealed_words = [c.word for c in self.game.get_unrevealed()]
        guesses = self.guesser.make_guesses(Clue(self.current_clue.word, 1), unrevealed_words)
        if guesses:
            for i, card in enumerate(self.game.board):
                if card.word == guesses[0]:
                    self._on_card_click(i)
                    break

    def _toggle_view(self):
        self.spymaster_view = not self.spymaster_view
        self._update_board()

    def run(self):
        self._generate_clue()
        self.root.mainloop()

In [None]:
# Launch the GUI
board = generate_board(seed=123)
board_words = [c.word for c in board]
vocab_words = [w for w in get_candidate_words() if w not in set(board_words)]
engine_gui = EmbeddingEngine(board_words, vocab_words)
spymaster_gui = Spymaster(engine_gui)
guesser_gui = Guesser(engine_gui)
game_gui = GameState(board)

gui = CodenamesGUI(game_gui, spymaster_gui, guesser_gui, spymaster_view=True)
gui.run()

## Part 8 (Bonus): Evaluate Your AI

In [None]:
@dataclass
class GameStats:
    winner: Team
    num_turns: int
    assassin_hit: bool
    red_remaining: int
    blue_remaining: int


def evaluate_ai(n_games: int = 20) -> list[GameStats]:
    stats = []
    for i in range(n_games):
        game = play_game(seed=i * 17 + 7, verbose=False)
        assassin_hit = any(r.assassin for _, _, results in game.turn_history for r in results)
        stats.append(GameStats(
            winner=game.winner,
            num_turns=len(game.turn_history),
            assassin_hit=assassin_hit,
            red_remaining=game.remaining_count(Team.RED),
            blue_remaining=game.remaining_count(Team.BLUE),
        ))
        print(f"  Game {i + 1}/{n_games}: {game.winner.value} wins in {len(game.turn_history)} turns{' (ASSASSIN)' if assassin_hit else ''}")
    return stats


def print_evaluation_report(stats: list[GameStats]):
    n = len(stats)
    red_wins = sum(1 for s in stats if s.winner == Team.RED)
    blue_wins = n - red_wins
    assassin_hits = sum(1 for s in stats if s.assassin_hit)
    avg_turns = sum(s.num_turns for s in stats) / n

    print(f"\n{'=' * 40}")
    print(f"EVALUATION REPORT ({n} games)")
    print(f"{'=' * 40}")
    print(f"Red win rate:   {red_wins / n:.0%} ({red_wins}/{n})")
    print(f"Blue win rate:  {blue_wins / n:.0%} ({blue_wins}/{n})")
    print(f"Assassin rate:  {assassin_hits / n:.0%} ({assassin_hits}/{n})")
    print(f"Avg turns/game: {avg_turns:.1f}")
    print(f"Avg red remaining:  {sum(s.red_remaining for s in stats) / n:.1f}")
    print(f"Avg blue remaining: {sum(s.blue_remaining for s in stats) / n:.1f}")

In [None]:
# Test Part 8
stats = evaluate_ai(n_games=10)
assert len(stats) == 10
assert all(isinstance(s, GameStats) for s in stats)
print_evaluation_report(stats)
print("✓ Part 8 passed")

## Part 9 (Bonus): Agglomerative Clustering Spymaster

In [None]:
from scipy.cluster.hierarchy import linkage, fcluster
from collections import defaultdict


class ClusteringSpymaster(Spymaster):
    """
    Extended Spymaster that uses agglomerative clustering to find
    natural groups among team words before searching for clues.
    """

    def cluster_team_words(self, team_indices: list[int], max_k: int = 5) -> dict[int, list[list[int]]]:
        """
        Cluster team words using agglomerative clustering on embeddings.
        Returns dict mapping k -> list of clusters (each cluster is a list of board indices).
        """
        if len(team_indices) <= 1:
            return {1: [team_indices]}

        embeddings = self.engine.board_embeddings[team_indices]  # (n_team, D)
        Z = linkage(embeddings, method="average", metric="cosine")

        max_k = min(max_k, len(team_indices))
        clusterings = {}
        for k in range(2, max_k + 1):
            labels = fcluster(Z, t=k, criterion="maxclust")
            groups = defaultdict(list)
            for idx, label in zip(team_indices, labels):
                groups[label].append(idx)
            clusterings[k] = list(groups.values())

        return clusterings

    def generate_clue(self, game: GameState, team: Team) -> ScoredClue:
        """
        Generate best clue using clustering + greedy hybrid.
        Try explicit clusters AND the greedy approach, return the best.
        """
        team_role = game.get_team_role(team)
        opp_role = game.get_opponent_role(team)

        team_cards = game.get_unrevealed(team_role)
        opp_cards = game.get_unrevealed(opp_role)
        assassin_cards = game.get_unrevealed(CardRole.ASSASSIN)
        bystander_cards = game.get_unrevealed(CardRole.BYSTANDER)

        team_indices = [self.engine.board_word_to_idx[c.word] for c in team_cards]
        opp_indices = [self.engine.board_word_to_idx[c.word] for c in opp_cards]
        assassin_index = self.engine.board_word_to_idx[assassin_cards[0].word] if assassin_cards else 0
        bystander_indices = [self.engine.board_word_to_idx[c.word] for c in bystander_cards]

        # Start with greedy baseline
        best = super().generate_clue(game, team)

        # Try explicit clusters
        clusterings = self.cluster_team_words(team_indices)
        for k, clusters in clusterings.items():
            for cluster in clusters:
                count = len(cluster)
                if count < 2 or count > 4:
                    continue

                scores, _ = self.score_all_vocab(cluster, opp_indices, assassin_index, bystander_indices, count)
                top_idx = int(np.argmax(scores))

                if scores[top_idx] > best.score:
                    clue_word = self.engine.vocab_words[top_idx]
                    cluster_sims = self.engine.vocab_to_board_sims[top_idx, cluster]
                    order = np.argsort(cluster_sims)[::-1]
                    targets = [self.engine.board_words[cluster[i]] for i in order]
                    target_sims = [float(cluster_sims[i]) for i in order]

                    if opp_indices:
                        max_opp = float(self.engine.vocab_to_board_sims[top_idx, opp_indices].max())
                    else:
                        max_opp = 0.0

                    best = ScoredClue(
                        word=clue_word,
                        count=count,
                        score=float(scores[top_idx]),
                        targets=targets,
                        target_sims=target_sims,
                        max_opponent_sim=max_opp,
                        assassin_sim=float(self.engine.vocab_to_board_sims[top_idx, assassin_index]),
                    )

        return best

In [None]:
# Test Part 9
board = generate_board(seed=42)
game = GameState(board)
board_words = [c.word for c in board]
vocab_words = [w for w in get_candidate_words() if w not in set(board_words)]
engine9 = EmbeddingEngine(board_words, vocab_words)
clustering_spy = ClusteringSpymaster(engine9)

team_cards = game.get_unrevealed(CardRole.RED)
team_indices = [engine9.board_word_to_idx[c.word] for c in team_cards]

clusterings = clustering_spy.cluster_team_words(team_indices)
assert isinstance(clusterings, dict)
for k, clusters in clusterings.items():
    all_indices = [idx for cluster in clusters for idx in cluster]
    assert sorted(all_indices) == sorted(team_indices), f"k={k}: missing indices"
    print(f"k={k}: {[[engine9.board_words[i] for i in c] for c in clusters]}")

clue = clustering_spy.generate_clue(game, Team.RED)
assert isinstance(clue, ScoredClue)
print(f"\nClustering clue: '{clue.word}' for {clue.count}")
print(f"Targets: {clue.targets}")
print(f"Score: {clue.score:.3f}")
print("✓ Part 9 passed")