# CVaR-QRDQN Demo

This notebook demonstrates the training and evaluation of the CVaR-QRDQN model on the CartPole-v1 environment.

In [1]:
# -*- coding: utf-8 -*-
"""
Training script for CVaR-QRDQN
"""

import numpy as np
import torch
from environment import create_environment
from agent import Agent

def train(env_name, num_episodes, max_steps, epsilon_start, epsilon_end, epsilon_decay, lr, gamma, cvar_alpha, num_quantiles):
    env = create_environment(env_name)
    agent = Agent(env.observation_space.shape[0], env.action_space.n, num_quantiles, lr, gamma, cvar_alpha)

    epsilon = epsilon_start
    for episode in range(num_episodes):
        state = env.reset()
        total_reward = 0

        for step in range(max_steps):
            action = agent.select_action(state, epsilon)
            next_state, reward, done, _ = env.step(action)
            total_reward += reward

            agent.update((state, action, reward, next_state, done))

            state = next_state
            if done:
                break

        epsilon = max(epsilon_end, epsilon * epsilon_decay)
        print(f"Episode {episode + 1}/{num_episodes}, Total Reward: {total_reward}")

    # Save the model
    torch.save(agent.model.state_dict(), "cvar_qrdqn_model.pth")

if __name__ == "__main__":
    train(env_name="CartPole-v1", num_episodes=500, max_steps=200, epsilon_start=1.0, epsilon_end=0.01, epsilon_decay=0.995, lr=0.001, gamma=0.99, cvar_alpha=0.1, num_quantiles=50)

AttributeError: module 'numpy' has no attribute 'bool8'

In [1]:
# Import necessary libraries
import numpy as np
import torch
import matplotlib.pyplot as plt
from CVaR_QRDQN.train import train
from CVaR_QRDQN.evaluate import evaluate
from CVaR_QRDQN.environment import create_environment
from CVaR_QRDQN.agent import Agent

## Training the CVaR-QRDQN Model

In [2]:
# Training parameters
env_name = "CartPole-v1"
num_episodes = 500
max_steps = 200
epsilon_start = 1.0
epsilon_end = 0.01
epsilon_decay = 0.995
lr = 0.001
gamma = 0.99
cvar_alpha = 0.1
num_quantiles = 50

# Train the model
print("Training the CVaR-QRDQN model...")
train(env_name, num_episodes, max_steps, epsilon_start, epsilon_end, epsilon_decay, lr, gamma, cvar_alpha, num_quantiles)

## Evaluating the CVaR-QRDQN Model

In [3]:
# Evaluate the model
model_path = "cvar_qrdqn_model.pth"
num_eval_episodes = 10
print("Evaluating the CVaR-QRDQN model...")
evaluate(env_name, model_path, num_eval_episodes, max_steps, num_quantiles)

## Displaying Intermediate Results

In [4]:
# Create environment and agent
env = create_environment(env_name)
agent = Agent(env.observation_space.shape[0], env.action_space.n, num_quantiles, lr, gamma, cvar_alpha)
agent.model.load_state_dict(torch.load(model_path))

# Display Q-values for a sample state
state = env.reset()
state_tensor = torch.FloatTensor(state).unsqueeze(0)
quantiles = agent.model(state_tensor)
q_values = quantiles.mean(dim=-1).detach().numpy()
print("Q-values for the sample state:", q_values)

# Plot the distribution of Q-values
plt.figure(figsize=(10, 6))
for action in range(env.action_space.n):
    plt.plot(quantiles[0, action, :].detach().numpy(), label=f'Action {action}')
plt.xlabel('Quantile Index')
plt.ylabel('Q-value')
plt.title('Distribution of Q-values')
plt.legend()
plt.show()