In [14]:


import numpy as np
import random

# Example list of Codenames words (simplified)
codenames_words = ['apple', 'banana', 'cherry', 'date', 'elephant', 'flamingo', 'giraffe', 'hippo']

# Simplified representation of states and actions
# Assuming states are just the count of words left to guess for simplification
# Actions could be giving a clue for any of the words (index in codenames_words)
num_states = len(codenames_words)  # Simplified state space
num_actions = len(codenames_words)  # One action per possible word clue

# Initialize Q-table with zeros
q_table = np.zeros((num_states, num_actions))

# Parameters for Q-learning
alpha = 0.1  # Learning rate
gamma = 0.6  # Discount factor
epsilon = 0.1  # Exploration rate

# Simulate getting a reward for an action in a given state (highly simplified)
def get_reward(state, action):
    # Simplified reward logic: Correct guesses get a reward, incorrect guesses get a penalty
    if random.random() < 0.8:  # Assume 80% chance the team guesses correctly
        return 10 - state  # Reward decreases as fewer words are left (increasing difficulty)
    else:
        return -10  # Penalty for incorrect guess

# Q-learning training loop (simplified)
for episode in range(1000):  # Number of games to simulate
    state = random.randint(1, num_states-1)  # Random initial state

    for _ in range(100):  # Limit the number of steps in each game
        if random.uniform(0, 1) < epsilon:  # Explore action space
            action = random.randint(0, num_actions-1)
        else:  # Exploit learned values
            action = np.argmax(q_table[state])

        # Get the reward for taking the action
        reward = get_reward(state, action)

        # Update Q-table using the Q-learning equation
        next_max = np.max(q_table[state])  # Estimate of optimal future value
        q_value = q_table[state, action]
        new_q_value = q_value + alpha * (reward + gamma * next_max - q_value)
        q_table[state, action] = new_q_value

        # Simplified update to state (assuming correct guess)
        state -= 1
        if state == 0:  # All words guessed correctly
            break

# Example of using the trained Q-table to select an action
def select_clue(state):
    action_index = np.argmax(q_table[state])
    return codenames_words[action_index]

# Example: Select a clue when 5 words are left
print("Selected clue:", select_clue(5))




Selected clue: apple
