In [1]:
import numpy as np
import random

In [2]:
class QLearningAgent:
    def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.1):
        self.num_states = num_states
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.q_table = np.zeros((num_states, num_actions))

    def choose_action(self, state):
        if random.uniform(0, 1) < self.exploration_prob:
            return random.choice(range(self.num_actions))  # Explore
        else:
            return np.argmax(self.q_table[state, :])  # Exploit

    def update_q_table(self, state, action, reward, next_state):
        predict = self.q_table[state, action]
        target = reward + self.discount_factor * np.max(self.q_table[next_state, :])
        self.q_table[state, action] += self.learning_rate * (target - predict)

In [3]:
def play_game_with_agent(agent, num_states, num_actions):
    print("Welcome to the Q-Learning Game!")
    state = 0  
    done = False

    while not done:
        print(f"Current state: {state}")
        user_action = int(input("Enter your action (0 or 1): "))
        reward = 0
        next_state = (state + user_action) % num_states
        if next_state == num_states - 1:
            print("Congratulations! You reached the final state.")
            reward = 1
            done = True

        agent.update_q_table(state, user_action, reward, next_state)
        state = next_state

        print(f"Agent's turn...")
        agent_action = agent.choose_action(state)
        reward = 0
        next_state = (state + agent_action) % num_states
        if next_state == num_states - 1:
            print("The agent reached the final state.")
            reward = 1
            done = True

        agent.update_q_table(state, agent_action, reward, next_state)
        state = next_state

    print("Game over.")

In [4]:
if __name__ == "__main__":
    num_states = 10  
    num_actions = 2  

    agent = QLearningAgent(num_states, num_actions)

    play_game_with_agent(agent, num_states, num_actions)

Welcome to the Q-Learning Game!
Current state: 0
Enter your action (0 or 1): 0
Agent's turn...
Current state: 0
Enter your action (0 or 1): 1
Agent's turn...
Current state: 1
Enter your action (0 or 1): 1
Agent's turn...
Current state: 2
Enter your action (0 or 1): 1
Agent's turn...
Current state: 3
Enter your action (0 or 1): 1
Agent's turn...
Current state: 4
Enter your action (0 or 1): 1
Agent's turn...
Current state: 5
Enter your action (0 or 1): 1
Agent's turn...
Current state: 6
Enter your action (0 or 1): 1
Agent's turn...
Current state: 7
Enter your action (0 or 1): 1
Agent's turn...
The agent reached the final state.
Game over.
