In [None]:
! pip install -r requirements.txt

In [2]:
import gymnasium as gym
import os 
import matplotlib.pyplot as plt
import json  # for dumping debug data
import time  # for benchmarking
import numpy as np
from ddqn_torch import DoubleQAgent

LEARN_EVERY = 2


def train_agent(n_episodes=2000, load_latest_model=False):
    print(
        "Training a DDQN agent on {} episodes. Pretrained model = {}".format(
            n_episodes, load_latest_model
        )
    )
    env = gym.make("LunarLander-v2")
    agent = DoubleQAgent(
        gamma=0.99,
        epsilon=1.0,
        epsilon_dec=0.995,
        lr=0.001,
        mem_size=200000,
        batch_size=128,
        epsilon_end=0.01,
    )
    if load_latest_model:
        agent.load_saved_model("ddqn_torch_model.h5")
        print("Loaded most recent: ddqn_torch_model.h5")

    scores = []
    eps_history = []
    start = time.time()
    for i in range(n_episodes):
        terminated = False
        truncated = False
        score = 0
        state = env.reset()[0]
        steps = 0
        while not (terminated or truncated):
            action = agent.choose_action(state)
            new_state, reward, terminated, truncated, info = env.step(action)
            agent.save(state, action, reward, new_state, terminated)
            state = new_state
            if steps > 0 and steps % LEARN_EVERY == 0:
                agent.learn()
            steps += 1
            score += reward

        eps_history.append(agent.epsilon)
        scores.append(score)
        avg_score = np.mean(scores[max(0, i - 100) : (i + 1)])

        if (i + 1) % 10 == 0 and i > 0:
            # Report expected time to finish the training
            print(
                "Episode {} in {:.2f} min. Expected total time for {} episodes: {:.0f} min. [{:.2f}/{:.2f}]".format(
                    (i + 1),
                    (time.time() - start) / 60,
                    n_episodes,
                    (((time.time() - start) / i) * n_episodes) / 60,
                    score,
                    avg_score,
                )
            )

        if (i + 1) % 100 == 0 and i > 0:
            # Save the model every 100-th step just in case
            agent.save_model("ddqn_torch_model.h5")
            with open(
                "scores\ddqn_torch_dqn_scores_{}.json".format(int(time.time())), "w"
            ) as fp:
                json.dump(scores, fp)
            with open(
                "episodes\ddqn_torch_eps_history_{}.json".format(int(time.time())), "w"
            ) as fp:
                json.dump(eps_history, fp)

    return agent

In [18]:
# Uncomment to train
agent = train_agent(n_episodes=1500, load_latest_model=False)

Training a DDQN agent on 1500 episodes. Pretrained model = False
Episode 10 in 0.05 min. Expected total time for 1500 episodes: 8 min. [-286.65/-193.36]
Episode 20 in 0.26 min. Expected total time for 1500 episodes: 20 min. [-110.78/-165.80]
Episode 30 in 0.53 min. Expected total time for 1500 episodes: 27 min. [-42.70/-143.79]
Episode 40 in 0.93 min. Expected total time for 1500 episodes: 36 min. [-18.58/-113.77]
Episode 50 in 1.34 min. Expected total time for 1500 episodes: 41 min. [3.81/-93.88]
Episode 60 in 1.77 min. Expected total time for 1500 episodes: 45 min. [-28.95/-85.80]
Episode 70 in 2.21 min. Expected total time for 1500 episodes: 48 min. [-40.44/-79.52]
Episode 80 in 2.67 min. Expected total time for 1500 episodes: 51 min. [12.05/-73.49]
Episode 90 in 3.09 min. Expected total time for 1500 episodes: 52 min. [-63.52/-71.84]
Episode 100 in 3.56 min. Expected total time for 1500 episodes: 54 min. [-46.22/-68.39]
Episode 110 in 4.04 min. Expected total time for 1500 episodes

In [None]:
# Visualize the model
import gymnasium as gym
import os
import matplotlib.pyplot as plt

os.environ["SDL_VIDEODRIVER"] = "dummy"
from IPython.display import clear_output

# Set path to the model to visualize
model_to_animate = "ddqn_torch_model.h5"


def animate_model(name, atype="single"):
    env = gym.make("LunarLander-v2", render_mode="rgb_array")
    agent = DoubleQAgent(
        gamma=0.99,
        epsilon=0.0,
        lr=0.0005,
        mem_size=200000,
        batch_size=64,
        epsilon_end=0.01,
    )
    agent.load_saved_model(name)
    state, info = env.reset(seed=12)
    for _ in range(5):
        terminated = False
        truncated = False
        while not (terminated or truncated):
            action = agent.choose_action(state)
            new_state, reward, terminated, truncated, info = env.step(action)
            state = new_state
            clear_output(wait=True)
            plt.imshow(env.render())
            plt.show()
        state = env.reset()[0]
    env.close()


animate_model(model_to_animate, atype="double")