# Dueling DQL Agent w/ Random Engine Failure

In [1]:
import os
import json
import wandb
import gymnasium as gym
from matplotlib import pyplot as plt
import seaborn as sns
from models.dueling_dql import DuelingDQL

In [2]:
env = gym.make('LunarLander-v2', continuous=False, render_mode='rgb_array')
learning_rate = 1e-3
discount_factor = 0.99
exploration_rate = 1.0
episodes = 500
max_steps = 1000

## Training

In [3]:
agent = DuelingDQL(env, learning_rate, discount_factor, exploration_rate,
                   random_engine_fail=True, engine_fail_prob=0.6, fname='dueling_dql_ef')
history = agent.train(episodes, max_steps, log_wandb=True, save_episodes=True, save_interval=100)

[EP 1/10] - Reward: -92.7036 - Steps: 63 - Eps: 1.0000 - Time: 0.67s
[EP 2/10] - Reward: -286.5529 - Steps: 106 - Eps: 0.0100 - Time: 10.59s
[EP 3/10] - Reward: -181.9499 - Steps: 75 - Eps: 0.0100 - Time: 4.93s
[EP 4/10] - Reward: -343.1262 - Steps: 85 - Eps: 0.0100 - Time: 6.54s
[EP 5/10] - Reward: -80.0458 - Steps: 91 - Eps: 0.0100 - Time: 5.17s
[EP 6/10] - Reward: -135.7120 - Steps: 87 - Eps: 0.0100 - Time: 4.96s
[EP 7/10] - Reward: -258.8359 - Steps: 99 - Eps: 0.0100 - Time: 4.81s
[EP 8/10] - Reward: -442.2399 - Steps: 79 - Eps: 0.0100 - Time: 3.99s
[EP 9/10] - Reward: -77.2574 - Steps: 115 - Eps: 0.0100 - Time: 6.05s
[EP 10/10] - Reward: -131.5398 - Steps: 99 - Eps: 0.0100 - Time: 5.04s
INFO:tensorflow:Assets written to: assets/dueling_dql_ef\assets


In [None]:
# save history
if not os.path.exists('./history'):
        os.mkdir('./history')

with open('./history/dueling_dql_ef_history.json', 'w') as file:
    json.dump(history, file)

## Evaluation

In [None]:
# with open('history\dueling_dql_history.json', 'r') as file:
#     history = json.load(file)

# rewards, avg_rewards, steps = history['reward'], history['avg_reward_100'], history['steps']

In [None]:
# plt.figure(figsize=(10, 5))

# plt.plot(rewards, alpha=0.8, label='Reward')
# plt.plot(avg_rewards, label='MA Reward (100)')

# plt.title('DDQL Agent w/ Random Engine Failure Rewards')
# plt.xlabel('Episode')
# plt.ylabel('Reward')
# plt.legend()

# plt.show()

In [None]:
# # plot cumulative reward
# cumulative_reward = [sum(rewards[:i+1]) for i in range(len(rewards))]
# plt.figure(figsize=(10, 5))

# plt.plot(cumulative_reward, alpha=0.8, label='Cumulative Reward')

# plt.title('DDQL Agent w/ Random Engine Failure Cumulative Rewards')
# plt.xlabel('Episode')
# plt.ylabel('Reward')
# plt.legend()
# plt.show()

In [None]:
# # plot steps and moving average steps
# avg_steps = [sum(steps[:i+1])/(i+1) for i in range(len(steps))]
# plt.figure(figsize=(10, 5))

# plt.plot(steps, alpha=0.8, label='Steps')
# plt.plot(avg_steps, label='MA Steps (100)')

# plt.title('DDQL w/ Random Engine Failure Agent Steps')
# plt.xlabel('Episode')
# plt.ylabel('Steps')
# plt.legend()
# plt.show()