In [None]:
%reload_ext autoreload
%load_ext autoreload
%autoreload 2

import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
from agents.q_agent import QAgent 

params = {
    # Environment parameters
    "env": gym.make("CartPole-v1"),

    # Agent parameters
    "state_filter": lambda state : (state[1], state[2], state[3]),
    "learning_rate": 0.001,
    "discount_factor": 0.99,
    "state_rounding": 1,

    # Agent training parameters
    "training_episodes": 500000,
    "initial_epsilon": 1.0,
    "min_epsilon": 0.5,
    "decay_percentage": 0.5,

    # Test parameters
    "test_episodes": 20
}

def run(params):
    env = params["env"]
    agent = QAgent(
        env=env,
        state_filter=params["state_filter"],
        learning_rate=params["learning_rate"],
        discount_factor=params["discount_factor"],
        state_rounding=params["state_rounding"]
    )
    reward_over_episodes = []

    # Train the agent for 100 episodes
    agent.train(
        episodes=params["training_episodes"], 
        initial_epsilon=params["initial_epsilon"], 
        min_epsilon=params["min_epsilon"], 
        decay_percentage=params["decay_percentage"]
    )

    # Env mode set to human to visualize the agent's performance
    env = gym.make("CartPole-v1", render_mode="human")
    episodes_count = params["test_episodes"]
    for i in range(episodes_count):
        state, _ = env.reset()
        episode_reward = 0
        done = False

        while not done:
            action = agent.get_best_action(state)
            next_state, reward, done, truncated, info = env.step(action)
            episode_reward += reward
            state = next_state

        if done:
            reward_over_episodes.append(episode_reward)
            print("Episode terminated, total reward:", episode_reward)

    env.close()

    # Plot the reward over episodes
    plt.plot(reward_over_episodes)
    plt.ylabel('Reward')
    plt.xlabel('Episode')
    plt.show()

In [None]:
# Validate QAgent training against the actual Q-table

%reload_ext autoreload
%load_ext autoreload
%autoreload 2

from test_env import TestEnv
from agents.q_agent import QAgent

env = TestEnv()
agent = QAgent(env)

q_values = env.calculate_q_table_state_action_values()
agent.train(100000)

agent_q_values = agent.q_table

# Ideally a small training episode should result in a q_table that is close to the actual q_table
print(q_values)
print(agent_q_values)

In [None]:
run({
    # Environment parameters
    "env": gym.make("CartPole-v1"),

    # Agent parameters
    "state_filter": lambda state : (state[1], state[2], state[3]),
    "learning_rate": 0.07,
    "discount_factor": 0.99,
    "state_rounding": 1,

    # Agent training parameters
    "training_episodes": 10000,
    "initial_epsilon": 1.0,
    "min_epsilon": 0.5,
    "decay_percentage": 0.5,

    # Test parameters
    "test_episodes": 20
})