In [1]:
import gymnasium as gym
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import argparse
import numpy as np
from collections import deque, namedtuple
import matplotlib.pyplot as plt
from dqn import DQNAgent

In [4]:
# Configuration parameters set directly
class Args:
    env = "LunarLander-v2"
    seed = 0
    n_episodes = 2000
    batch_size = 64
    discount = 0.99
    lr = 5e-4
    tau = 0.001
    max_size = int(1e5)
    update_freq = 4
    gpu_index = 0
    max_esp_len = 1000
    epsilon_start = 1.0
    epsilon_end = 0.01
    epsilon_decay = 0.995

args = Args()

# making the environment    
env = gym.make(args.env)

#setting seeds
torch.manual_seed(args.seed)
np.random.seed(args.seed)
random.seed(args.seed)

state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n

kwargs = {
    "state_dim":state_dim,
    "action_dim":action_dim,
    "discount":args.discount,
    "tau":args.tau,
    "lr":args.lr,
    "update_freq":args.update_freq,
    "max_size":args.max_size,
    "batch_size":args.batch_size,
    "gpu_index":args.gpu_index
}   
learner = DQNAgent(**kwargs) #Creating the DQN learning agent

moving_window = deque(maxlen=100)
epsilon = args.epsilon_start
for e in range(args.n_episodes):
    state, _ = env.reset(seed=args.seed)
    curr_reward = 0
    for t in range(args.max_esp_len):
        action = learner.select_action(state,epsilon) # Assuming select_action is implemented in DQNAgent
        n_state,reward,terminated,truncated,_ = env.step(action)
        done = terminated or truncated 
        learner.step(state,action,reward,n_state,done) # Assuming step is implemented in DQNAgent
        state = n_state
        curr_reward += reward
        if done:
            break
    moving_window.append(curr_reward)

    epsilon = decay_epsilon(epsilon, args.epsilon_end, args.epsilon_decay)
    
    if e % 100 == 0:
        print(f'Episode Number {e} Average Episodic Reward (over 100 episodes): {np.mean(moving_window):.2f}')

# Plotting the episodic rewards
plt.figure(figsize=(10, 5))
plt.plot(moving_window)
plt.xlabel('Episode')
plt.ylabel('Average Episodic Reward')
plt.title('Episodic Reward Over Time')
plt.show()


DependencyNotInstalled: Box2D is not installed, run `pip install gymnasium[box2d]`

In [3]:
%tb

SystemExit: 2