In [1]:
import gymnasium as gym

import numpy as np
import random
import torch

import matplotlib.pyplot as plt

from Agents.DQNAgent import DQNAgent

In [2]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

In [4]:
SEEDS = [42, 0, 5]

#Hyperparameters
gamma_list = [0.9, 0.95, 0.99]
epsilon_decay_list = [0.99, 0.999]
update_frequency_list = [1, 10, 20]
batch_size_list = [32, 64]

## CartPole Environment

### Hyperparameter Sensitivity (Learning Rate)

In [None]:
lrs = [5e-3, 1e-3, 5e-4, 1e-4]
cartpole_results_lr = {}

for lr in lrs:
    all_rewards = []
    for seed in SEEDS:
        print("lr = ", lr, "seed = ", seed)
        set_seed(seed)
        env = gym.make("CartPole-v1")

        agent, rewards = DQNAgent.train_dqn(
            env=env,
            gamma=0.99,
            batch_size=32,
            epsilon=1.0,
            epsilon_decay=0.99,
            use_target_net=True,
            constant_epsilon=False,
            num_episodes=800,
            target_update_freq=10,
            lr=lr,
            seed=seed
        )

        all_rewards.append(rewards)

    all_rewards = np.array(all_rewards)
    avg_rewards = all_rewards.mean(axis=0)
    cartpole_results_lr[f"LR={lr}"] = {
        "params": {
            "lr": lr,
            "gamma": 0.99,
            "batch_size": 64,
            "epsilon_decay": 0.99,
            "target_update_freq": 10
        },
        "avg_rewards": avg_rewards
    }

In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for lr, result in cartpole_results_lr.items():
    plt.plot(result["avg_rewards"], label=lr)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on CartPole")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 20  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for lr, result in cartpole_results_lr.items():
    rewards = result["avg_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{lr}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()

### Hyperparameter Sensitivity (Epsilon Decay)

In [None]:
eps_decay_list = [0.99, 0.995, 0.999]
cartpole_results_eps_decay = {}

for eps_decay in eps_decay_list:
    all_rewards = []
    for seed in SEEDS:
        print("eps_decay = ", eps_decay, "seed = ", seed)
        set_seed(seed)
        env = gym.make("CartPole-v1")

        agent, rewards = DQNAgent.train_dqn(
            env=env,
            gamma=0.99,
            batch_size=32,
            epsilon=1.0,
            epsilon_decay=eps_decay,
            use_target_net=True,
            constant_epsilon=False,
            num_episodes=2000,
            target_update_freq=10,
            lr=1e-3,
            seed=seed
        )

        all_rewards.append(rewards)

    all_rewards = np.array(all_rewards)
    avg_rewards = all_rewards.mean(axis=0)
    cartpole_results_eps_decay[f"eps_decay={eps_decay}"] = {
        "params": {
            "lr": 1e-3,
            "gamma": 0.99,
            "batch_size": 32,
            "epsilon_decay": eps_decay,
            "target_update_freq": 10
        },
        "avg_rewards": avg_rewards
    }

In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for eps_decay, result in cartpole_results_eps_decay.items():
    plt.plot(result["avg_rewards"], label=eps_decay)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on CartPole")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 20  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for eps_decay, result in cartpole_results_eps_decay.items():
    rewards = result["avg_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{eps_decay}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()

### Hyperparameter Sensitivity (Batch Size)

In [None]:
batch_sized = [64, 32, 16, 8]
cartpole_results_bs = {}

for bs in batch_sized:
    all_rewards = []
    for seed in SEEDS:
        print("bs = ", bs, "seed = ", seed)
        set_seed(seed)
        env = gym.make("CartPole-v1")

        agent, rewards = DQNAgent.train_dqn(
            env=env,
            gamma=0.99,
            batch_size=bs,
            epsilon=1.0,
            epsilon_decay=0.99,
            use_target_net=True,
            constant_epsilon=False,
            num_episodes=2000,
            target_update_freq=10,
            lr=0.001,
            seed=seed
        )

        all_rewards.append(rewards)

    all_rewards = np.array(all_rewards)
    avg_rewards = all_rewards.mean(axis=0)
    cartpole_results_bs[f"Batch Size={bs}"] = {
        "params": {
            "lr": 1e-3,
            "gamma": 0.99,
            "batch_size": bs,
            "epsilon_decay": 0.99,
            "target_update_freq": 10
        },
        "avg_rewards": avg_rewards
    }

In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for bs, result in cartpole_results_bs.items():
    plt.plot(result["avg_rewards"], label=bs)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on CartPole")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 20  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for batch_size, result in cartpole_results_bs.items():
    rewards = result["avg_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{batch_size}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()

### Hyperparameter Sensitivity (Target Net Update Freq)

In [None]:
update_freq = [1, 10, 20, 100]
cartpole_results_tf = {}

for bs in update_freq:
    all_rewards = []
    for seed in SEEDS:
        print("bs = ", bs, "seed = ", seed)
        set_seed(seed)
        env = gym.make("CartPole-v1")

        agent, rewards = DQNAgent.train_dqn(
            env=env,
            gamma=0.99,
            batch_size=32,
            epsilon=1.0,
            epsilon_decay=0.99,
            use_target_net=True,
            constant_epsilon=False,
            num_episodes=1500,
            target_update_freq=bs,
            lr=1e-3,
            seed=seed
        )

        all_rewards.append(rewards)

    all_rewards = np.array(all_rewards)
    avg_rewards = all_rewards.mean(axis=0)
    cartpole_results_tf[f"Batch Size={bs}"] = {
        "params": {
            "lr": 1e-3,
            "gamma": 0.99,
            "batch_size": 32,
            "epsilon_decay": 0.99,
            "target_update_freq": bs
        },
        "avg_rewards": avg_rewards
    }

In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for bs, result in cartpole_results_tf.items():
    plt.plot(result["avg_rewards"], label=bs)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on Mountain Car")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 20  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for batch_size, result in cartpole_results_tf.items():
    rewards = result["avg_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{batch_size}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()

### Hyperparameter Sensitivity (Network Architecture)

In [None]:
cartpole_rewards1 = []
for seed in SEEDS:
    set_seed(seed)

    env = gym.make("CartPole-v1")
    agent, rewards = DQNAgent.train_dqn(
        env=env,
        gamma=0.99,
        lr=1e-3,
        batch_size=64,
        epsilon=1.0,
        epsilon_decay=0.995,
        epsilon_min=0.01,
        use_target_net=True,
        constant_epsilon=False,
        num_episodes=1000,
        target_update_freq=10,
        seed=42,
        smooth_plot=5
    )

    cartpole_rewards1.append(rewards)

In [None]:
cartpole_rewards1 = np.array(cartpole_rewards1)
avg_cartpole_rewards1_per_episode = cartpole_rewards1.mean(axis=0)

In [None]:
# rolling_avg = np.convolve(avg_cartpole_rewards1_per_episode, np.ones(100)/100, mode='valid')
# plt.plot(avg_cartpole_rewards1_per_episode, alpha=0.4, label="Reward per Episode")
# plt.plot(rolling_avg, label="100-Episode Rolling Average")
# plt.title("CartPole-v1: DQN Performance")
# plt.xlabel("Episodes")
# plt.ylabel("Total Reward")
# plt.legend()
# plt.grid()
# plt.show()

### Ablation Study 1: Effect of QNet Architecture

In [None]:
env = gym.make("CartPole-v1")
agent, rewards = DQNAgent.train_dqn(env, constant_epsilon=True)

In [None]:
rolling_avg = np.convolve(rewards, np.ones(100)/100, mode='valid')
plt.plot(rewards, alpha=0.4, label="Reward per Episode")
plt.plot(rolling_avg, label="100-Episode Rolling Average")
plt.title("CartPole-v1: DQN Performance")
plt.xlabel("Episodes")
plt.ylabel("Total Reward")
plt.legend()
plt.grid()
plt.show()

## Acrobot Environment

### Hyperparameter Sensitivity (Learning Rate)

In [None]:
lrs = [5e-3, 1e-3, 5e-4, 1e-4]
acrobot_results_lr = {}

for lr in lrs:
    all_rewards = []
    for seed in SEEDS:
        print("lr = ", lr, "seed = ", seed)
        set_seed(seed)
        env = gym.make("Acrobot-v1")

        agent, rewards = DQNAgent.train_dqn(
            env=env,
            gamma=0.99,
            batch_size=32,
            epsilon=1.0,
            epsilon_decay=0.99,
            use_target_net=True,
            constant_epsilon=False,
            num_episodes=1500,
            target_update_freq=10,
            lr=lr,
            seed=seed
        )

        all_rewards.append(rewards)

    all_rewards = np.array(all_rewards)
    avg_rewards = all_rewards.mean(axis=0)
    acrobot_results_lr[f"LR={lr}"] = {
        "params": {
            "lr": lr,
            "gamma": 0.99,
            "batch_size": 64,
            "epsilon_decay": 0.99,
            "target_update_freq": 10
        },
        "avg_rewards": avg_rewards
    }

In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for lr, result in acrobot_results_lr.items():
    plt.plot(result["avg_rewards"], label=lr)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on Acrobot")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 20  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for lr, result in acrobot_results_lr.items():
    rewards = result["avg_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{lr}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()

### Hyperparameter Sensitivity (Epsilon Decay)

In [None]:
eps_decay_list = [0.99, 0.995, 0.999]
acrobot_results_eps_decay = {}

for eps_decay in eps_decay_list:
    all_rewards = []
    for seed in SEEDS:
        print("eps_decay = ", eps_decay, "seed = ", seed)
        set_seed(seed)
        env = gym.make("Acrobot-v1")

        agent, rewards = DQNAgent.train_dqn(
            env=env,
            gamma=0.99,
            batch_size=32,
            epsilon=1.0,
            epsilon_decay=eps_decay,
            use_target_net=True,
            constant_epsilon=False,
            num_episodes=1500,
            target_update_freq=10,
            lr=1e-3,
            seed=seed
        )

        all_rewards.append(rewards)

    all_rewards = np.array(all_rewards)
    avg_rewards = all_rewards.mean(axis=0)
    acrobot_results_eps_decay[f"eps_decay={eps_decay}"] = {
        "params": {
            "lr": 1e-3,
            "gamma": 0.99,
            "batch_size": 32,
            "epsilon_decay": eps_decay,
            "target_update_freq": 10
        },
        "avg_rewards": avg_rewards
    }

In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for eps_decay, result in acrobot_results_eps_decay.items():
    plt.plot(result["avg_rewards"], label=eps_decay)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on Acrobot")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 10  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for eps_decay, result in acrobot_results_eps_decay.items():
    rewards = result["avg_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{eps_decay}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()

### Hyperparameter Sensitivity (Batch Size)

In [None]:
batch_sized = [64, 32, 16]
acrobot_results_bs = {}

for bs in batch_sized:
    all_rewards = []
    for seed in SEEDS:
        print("bs = ", bs, "seed = ", seed)
        set_seed(seed)
        env = gym.make("Acrobot-v1")

        agent, rewards = DQNAgent.train_dqn(
            env=env,
            gamma=0.99,
            batch_size=bs,
            epsilon=1.0,
            epsilon_decay=0.99,
            use_target_net=True,
            constant_epsilon=False,
            num_episodes=1500,
            target_update_freq=10,
            lr=1e-3,
            seed=seed
        )

        all_rewards.append(rewards)

    all_rewards = np.array(all_rewards)
    avg_rewards = all_rewards.mean(axis=0)
    acrobot_results_bs[f"Batch Size={bs}"] = {
        "params": {
            "lr": 1e-3,
            "gamma": 0.99,
            "batch_size": bs,
            "epsilon_decay": 0.99,
            "target_update_freq": 10
        },
        "avg_rewards": avg_rewards
    }

In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for bs, result in acrobot_results_bs.items():
    plt.plot(result["avg_rewards"], label=bs)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on Acrobot")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 20  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for batch_size, result in acrobot_results_bs.items():
    rewards = result["avg_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{batch_size}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()

### Hyperparameter Sensitivity (Target Net Update Freq)

In [None]:
update_freq = [1, 10, 20, 100]
acrobot_results_tf = {}

for bs in update_freq:
    all_rewards = []
    for seed in SEEDS:
        print("bs = ", bs, "seed = ", seed)
        set_seed(seed)
        env = gym.make("Acrobot-v1")

        agent, rewards = DQNAgent.train_dqn(
            env=env,
            gamma=0.99,
            batch_size=32,
            epsilon=1.0,
            epsilon_decay=0.99,
            use_target_net=True,
            constant_epsilon=False,
            num_episodes=1500,
            target_update_freq=bs,
            lr=1e-3,
            seed=seed
        )

        all_rewards.append(rewards)

    all_rewards = np.array(all_rewards)
    avg_rewards = all_rewards.mean(axis=0)
    acrobot_results_tf[f"Batch Size={bs}"] = {
        "params": {
            "lr": 1e-3,
            "gamma": 0.99,
            "batch_size": 32,
            "epsilon_decay": 0.99,
            "target_update_freq": bs
        },
        "avg_rewards": avg_rewards
    }

In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for bs, result in acrobot_results_tf.items():
    plt.plot(result["avg_rewards"], label=bs)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on Mountain Car")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 20  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for batch_size, result in acrobot_results_tf.items():
    rewards = result["avg_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{batch_size}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()

### Ablation Study 1: Effect of QNet

### Ablation Study 2: Constant Epsilon

## MountainCar Environment

In [6]:
env = gym.make("MountainCar-v0")

dqn_agent = DQNAgent(
    env=env,
    gamma=0.99,
    lr=1e-3,
    batch_size=128,
    epsilon=1.0,
    epsilon_decay=0.995,
    epsilon_min=0.01,
    hidden_dim=128,
    tau=1.0,
    num_episodes=500,
    eval_interval=5,
    target_update_freq=10,
    constant_epsilon=False
)

In [7]:
rewards_history, eval_rewards_history = dqn_agent.train_with_seed()

Episode 0, Total reward: -200.0, Epsilon: 0.995
Episode 1, Total reward: -200.0, Epsilon: 0.990
Episode 2, Total reward: -200.0, Epsilon: 0.985
Episode 3, Total reward: -200.0, Epsilon: 0.980
Episode 4, Total reward: -200.0, Epsilon: 0.975
Episode 5, Total reward: -200.0, Epsilon: 0.970
Episode 6, Total reward: -200.0, Epsilon: 0.966
Episode 7, Total reward: -200.0, Epsilon: 0.961
Episode 8, Total reward: -200.0, Epsilon: 0.956
Episode 9, Total reward: -200.0, Epsilon: 0.951
Episode 10, Total reward: -200.0, Epsilon: 0.946
Episode 11, Total reward: -200.0, Epsilon: 0.942
Episode 12, Total reward: -200.0, Epsilon: 0.937
Episode 13, Total reward: -200.0, Epsilon: 0.932
Episode 14, Total reward: -200.0, Epsilon: 0.928
Episode 15, Total reward: -200.0, Epsilon: 0.923
Episode 16, Total reward: -200.0, Epsilon: 0.918
Episode 17, Total reward: -200.0, Epsilon: 0.914
Episode 18, Total reward: -200.0, Epsilon: 0.909
Episode 19, Total reward: -200.0, Epsilon: 0.905
Episode 20, Total reward: -200

### Hyperparameter Sensitivity (Learning Rate)

In [None]:
lrs = [5e-3, 1e-3, 5e-4, 1e-4]
mc_results_lr = {}

for lr in lrs:
    all_train_rewards = []
    all_eval_rewards = []
    for seed in SEEDS:
        print("lr = ", lr, "seed = ", seed)
        set_seed(seed)
        env = gym.make("MountainCar-v0")

        dqn_agent = DQNAgent(
            env=env,
            gamma=0.99,
            lr=1e-3,
            batch_size=64,
            epsilon=1.0,
            epsilon_decay=0.995,
            epsilon_min=0.01,
            hidden_dim=128,
            tau=1.0,
            num_episodes=1000,
            eval_interval=5,
            target_update_freq=10,
            constant_epsilon=False
        )

        rewards_history, eval_rewards_history = dqn_agent.train_with_seed()

        all_train_rewards.append(rewards_history)
        all_eval_rewards.append(eval_rewards_history)

    all_train_rewards = np.array(all_train_rewards)
    avg_train_rewards = all_train_rewards.mean(axis=0)

    all_eval_rewards = np.array(all_eval_rewards)
    avg_eval_rewards = all_eval_rewards.mean(axis=0)

    mc_results_lr[f"LR={lr}"] = {
        "params": {
            "lr": lr,
            "gamma": 0.99,
            "batch_size": 64,
            "epsilon_decay": 0.99,
            "target_update_freq": 10
        },
        "avg_train_rewards": avg_train_rewards,
        "avg_eval_rewards": avg_eval_rewards
    }

lr =  0.005 seed =  42


  next_state = torch.FloatTensor(transitions.next_state)  # shape: [batch_size, state_dim]


In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for lr, result in mc_results_lr.items():
    plt.plot(result["avg_train_rewards"], label=lr)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on Mountain Car")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 20  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for lr, result in mc_results_lr.items():
    rewards = result["avg_train_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{lr}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for lr, result in mc_results_lr.items():
    plt.plot(result["avg_eval_rewards"], label=lr)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on Mountain Car")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 20  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for lr, result in mc_results_lr.items():
    rewards = result["avg_eval_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{lr}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()

### Hyperparameter Sensitivity (Epsilon Decay)

In [None]:
eps_decay_list = [0.99, 0.995, 0.999]
mc_results_eps_decay = {}

for eps_decay in eps_decay_list:
    all_rewards = []
    for seed in SEEDS:
        print("eps_decay = ", eps_decay, "seed = ", seed)
        set_seed(seed)
        env = gym.make("MountainCar-v0")

        agent, rewards = DQNAgent.train_dqn(
            env=env,
            gamma=0.99,
            batch_size=64,
            epsilon=1.0,
            epsilon_decay=eps_decay,
            use_target_net=True,
            constant_epsilon=False,
            num_episodes=1500,
            target_update_freq=10,
            hidden_dim=128,
            lr=1e-3,
            seed=seed
        )

        all_rewards.append(rewards)

    all_rewards = np.array(all_rewards)
    avg_rewards = all_rewards.mean(axis=0)
    mc_results_eps_decay[f"eps_decay={eps_decay}"] = {
        "params": {
            "lr": 1e-3,
            "gamma": 0.99,
            "batch_size": 32,
            "epsilon_decay": eps_decay,
            "target_update_freq": 10
        },
        "avg_rewards": avg_rewards
    }

In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for eps_decay, result in mc_results_eps_decay.items():
    plt.plot(result["avg_rewards"], label=eps_decay)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on MountainCar-v0")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 20  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for eps_decay, result in mc_results_eps_decay.items():
    rewards = result["avg_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{eps_decay}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()

### Hyperparameter Sensitivity (Batch Size)

In [None]:
batch_sized = [64, 32, 16]
mc_results_bs = {}

for bs in batch_sized:
    all_rewards = []
    for seed in SEEDS:
        print("bs = ", bs, "seed = ", seed)
        set_seed(seed)
        env = gym.make("MountainCar-v0")

        agent, rewards = DQNAgent.train_dqn(
            env=env,
            gamma=0.99,
            batch_size=bs,
            epsilon=1.0,
            epsilon_decay=0.99,
            use_target_net=True,
            constant_epsilon=False,
            num_episodes=1500,
            target_update_freq=10,
            lr=1e-3,
            seed=seed
        )

        all_rewards.append(rewards)

    all_rewards = np.array(all_rewards)
    avg_rewards = all_rewards.mean(axis=0)
    acrobot_results_bs[f"Batch Size={bs}"] = {
        "params": {
            "lr": 1e-3,
            "gamma": 0.99,
            "batch_size": bs,
            "epsilon_decay": 0.99,
            "target_update_freq": 10
        },
        "avg_rewards": avg_rewards
    }

In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for bs, result in mc_results_bs.items():
    plt.plot(result["avg_rewards"], label=bs)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on Mountain Car")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 20  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for batch_size, result in mc_results_bs.items():
    rewards = result["avg_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{batch_size}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()

### Hyperparameter Sensitivity (Target Net Update Freq)

In [None]:
update_freq = [1, 10, 20, 100]
mc_results_tf = {}

for bs in update_freq:
    all_rewards = []
    for seed in SEEDS:
        print("bs = ", bs, "seed = ", seed)
        set_seed(seed)
        env = gym.make("MountainCar-v0")

        agent, rewards = DQNAgent.train_dqn(
            env=env,
            gamma=0.99,
            batch_size=32,
            epsilon=1.0,
            epsilon_decay=0.99,
            use_target_net=True,
            constant_epsilon=False,
            num_episodes=1500,
            target_update_freq=bs,
            lr=1e-3,
            seed=seed
        )

        all_rewards.append(rewards)

    all_rewards = np.array(all_rewards)
    avg_rewards = all_rewards.mean(axis=0)
    mc_results_tf[f"Batch Size={bs}"] = {
        "params": {
            "lr": 1e-3,
            "gamma": 0.99,
            "batch_size": 32,
            "epsilon_decay": 0.99,
            "target_update_freq": bs
        },
        "avg_rewards": avg_rewards
    }

In [None]:
# Plot results
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
for bs, result in mc_results_tf.items():
    plt.plot(result["avg_rewards"], label=bs)
plt.xlabel("Episode")
plt.ylabel("Average Reward (3 seeds)")
plt.title("DQN Learning Rate Comparison on Mountain Car")
plt.legend()
plt.grid()
plt.show()

In [None]:
window_size = 20  # Smooth over 20 episodes (adjust as needed)

plt.figure(figsize=(10, 5))
for batch_size, result in mc_results_tf.items():
    rewards = result["avg_rewards"]
    # Compute moving average
    smoothed_rewards = np.convolve(rewards, np.ones(window_size)/window_size, mode='valid')
    episodes = np.arange(len(smoothed_rewards))
    plt.plot(episodes, smoothed_rewards, label=f"{batch_size}")

plt.xlabel("Episode")
plt.ylabel(f"Smoothed Reward (Window={window_size})")
plt.title("DQN Learning Rate Comparison (Moving Average)")
plt.legend()
plt.grid()
plt.show()