# Implementation of Dueling DQN algorithm from RLLib to Pong Environment

First, we need to set everything up by installing and importing all appropriate libraries for the algorithm to work correctly

In [None]:
# Uninstall conflicting versions if necessary
!pip uninstall -y jax jaxlib flax numpy

Install compatible versions line-by-line

In [None]:
!pip install "jax==0.4.23" "jaxlib==0.4.23" "flax==0.7.2"

In [None]:
!pip install "numpy==1.24.4"

In [None]:
!pip install "ray[rllib]==2.9.0"

In [None]:
!pip install "gym[atari]==0.26.2" ale-py==0.8.1

In [None]:
!pip install "gym[atari]" "autorom[accept-rom-license]"

In [None]:
!AutoROM --accept-license

In [None]:
# Force reinstall of setuptools which includes pkg_resources
!pip install --force-reinstall "setuptools==65.5.0"

In [None]:
# Install gym atari + autoRom
!pip install "gym[atari]" "autorom[accept-rom-license]"

In [None]:
!AutoROM --accept-license

In [None]:
!pip install gym[atari] autorom[accept-rom-license]

In [None]:
!pip install gymnasium[atari]

In [None]:
import pkg_resources
from pkg_resources._vendor.packaging.version import parse as parse_version

In [None]:
import ray
from ray.rllib.algorithms.dqn import DQNConfig

#ray.init(ignore_reinit_error=True)

In [None]:
# Import libraries
import gymnasium as gym
from ray import tune
from gymnasium.spaces import Box, Discrete
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Register the environment
tune.register_env("Pong-ram-v0", lambda config: gym.make("Pong-ram-v0"))

In [None]:
# Configure Dueling DQN
config = (
    DQNConfig()
    .environment(env="Pong-ram-v0", env_config={"max_episode_steps": 500})
    .rollouts(num_rollout_workers=0)
    .training(
        dueling=True,
# it was tried to implement the code also with the below parameters,
# but the running time was taking too long so it was opted against it
#        train_batch_size=256,
#        lr=1e-4,
    )
#    .exploration(
#        exploration_config={
#            "type": "EpsilonGreedy",
#            "initial_epsilon": 1.0,
#            "final_epsilon": 0.1,
#            "epsilon_timesteps": 20000,
#        }
#    )
)

# Build agent
algo = config.build()

# Train agent
rewards = []
for i in range(100):
    result = algo.train()
    rewards.append(result["episode_reward_mean"])
    print(f"Iteration: {i}, Reward: {result['episode_reward_mean']}")

In [None]:
# Save rewards
pd.DataFrame(rewards).to_csv("dueling_dqn_rewards.csv")

In [None]:
# Evaluation
env = gym.make("Pong-ram-v0")
n_episodes = 100
net_scores = []

for episode in range(1, n_episodes + 1):
    obs, info = env.reset()
    agent_pts, opponent_pts = 0, 0
    terminated = False

    while not terminated:
        action = algo.compute_single_action(obs, explore=False)
        obs, reward, terminated, truncated, info = env.step(action)

        # Track points
        if reward == 1:
            agent_pts += 1
        elif reward == -1:
            opponent_pts += 1

    score_diff = agent_pts - opponent_pts
    net_scores.append(score_diff)

    # Log every 10 episodes
    if episode % 10 == 0 or episode == n_episodes:
        print(f"[Episode {episode}] Agent: {agent_pts}, Opponent: {opponent_pts}, Net Score: {score_diff}")

In [None]:
iterations = list(range(1, len(rewards) + 1))

plt.figure(figsize=(6, 4))
plt.plot(iterations, rewards, marker='.', linestyle='-')
plt.xlabel("Iteration")
plt.ylabel("Average Reward")
plt.title("Dueling DQN Average Reward per Iteration on Pong (during training)")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
episodes = list(range(1, len(net_scores)+1))

plt.figure(figsize=(6, 4))
plt.plot(episodes, net_scores, marker='.', linestyle='-')
plt.xlabel("Episode")
plt.ylabel("Average Reward")
plt.title("Dueling DQN Average Reward per Episode on Pong (during evaluation)")
plt.grid(True)
plt.tight_layout()
plt.show()