In [1]:
import gymnasium as gym
from models.dqn import DQN
import matplotlib.pyplot as plt

In [2]:
env = gym.make('LunarLander-v2', continuous=False, render_mode='rgb_array')

discount = 0.99
learning_rate = 0.001
exploration = 1.0
exploration_decay = 0.99
n_episodes = 1000

In [3]:
agent = DQN(env, discount, learning_rate, exploration, exploration_decay)
rewards, exploration_rates, steps_per_episode = agent.train(n_episodes)

[EP 1/1000]  Rewards: -66.8026 | Steps: 63 | Eps: 1.0000 | Time: 0.1634s
[EP 2/1000]  Rewards: -216.8245 | Steps: 60 | Eps: 0.9900 | Time: 6.1145s
[EP 3/1000]  Rewards: -99.5063 | Steps: 110 | Eps: 0.9801 | Time: 8.8136s
[EP 4/1000]  Rewards: -118.7900 | Steps: 113 | Eps: 0.9703 | Time: 9.1012s
[EP 5/1000]  Rewards: -293.1700 | Steps: 72 | Eps: 0.9606 | Time: 5.7704s
[EP 6/1000]  Rewards: -115.4253 | Steps: 85 | Eps: 0.9510 | Time: 7.0556s
[EP 7/1000]  Rewards: -448.1763 | Steps: 105 | Eps: 0.9415 | Time: 8.4212s
[EP 8/1000]  Rewards: -402.8980 | Steps: 124 | Eps: 0.9321 | Time: 10.0829s
[EP 9/1000]  Rewards: -184.4123 | Steps: 86 | Eps: 0.9227 | Time: 7.0351s
[EP 10/1000]  Rewards: -78.8775 | Steps: 72 | Eps: 0.9135 | Time: 5.8716s
[EP 11/1000]  Rewards: -122.8857 | Steps: 63 | Eps: 0.9044 | Time: 5.1415s
[EP 12/1000]  Rewards: -302.5519 | Steps: 103 | Eps: 0.8953 | Time: 8.3504s
[EP 13/1000]  Rewards: -136.5136 | Steps: 83 | Eps: 0.8864 | Time: 6.8236s
[EP 14/1000]  Rewards: -161.823

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(20, 5))

axes[0].plot(rewards)
axes[0].set_title('Rewards')
axes[0].set_xlabel('Episode')
axes[0].set_ylabel('Reward')

axes[1].plot(exploration_rates)
axes[1].set_title('Exploration Rates')
axes[1].set_xlabel('Episode')
axes[1].set_ylabel('Exploration Rate')

axes[2].plot(steps_per_episode)
axes[2].set_title('Steps per Episode')
axes[2].set_xlabel('Episode')
axes[2].set_ylabel('Steps')

plt.show()