# Deep SARSA Agent w/ Random Engine Failure

In [1]:
import os
import json
import wandb
import gymnasium as gym
from matplotlib import pyplot as plt
import seaborn as sns
from models.sarsa import SARSA

In [2]:
!wandb login 5f71bc6f91cdaa551a70e88cf2522fcc1425d29b

In [3]:
env = gym.make('LunarLander-v2', continuous=False, render_mode='rgb_array')
learning_rate = 0.0005
discount_factor = 0.99
exploration_rate = 1.0
episodes = 500
max_steps = 1000

wandb.init(project='lunar-lander-rl', entity='timothyckl', config={
    'learning_rate': learning_rate,
    'discount_factor': discount_factor,
    'exploration_rate': exploration_rate,
    'episodes': episodes,
    'max_steps': max_steps
})

## Training

In [4]:
agent = SARSA(env, learning_rate, discount_factor, exploration_rate, 
            random_engine_fail=True, engine_fail_prob=0.6, fname='sarsa_ef')
history = agent.train(episodes, max_steps, log_wandb=True, save_episodes=True, save_interval=100)

[EP 1/10] - Reward: -98.5108 - Steps: 86 - Eps: 0.8016 - Time: 13.86s
[EP 2/10] - Reward: -111.3492 - Steps: 77 - Eps: 0.3697 - Time: 34.07s
[EP 3/10] - Reward: -54.9558 - Steps: 106 - Eps: 0.1274 - Time: 49.83s
[EP 4/10] - Reward: -36.9948 - Steps: 120 - Eps: 0.0381 - Time: 59.88s
[EP 5/10] - Reward: -168.5406 - Steps: 114 - Eps: 0.0121 - Time: 55.30s
[EP 6/10] - Reward: -68.4566 - Steps: 89 - Eps: 0.0100 - Time: 45.69s
[EP 7/10] - Reward: -73.0086 - Steps: 95 - Eps: 0.0100 - Time: 38.51s
[EP 8/10] - Reward: -34.3499 - Steps: 93 - Eps: 0.0100 - Time: 38.31s
[EP 9/10] - Reward: -187.2120 - Steps: 79 - Eps: 0.0100 - Time: 29.75s
[EP 10/10] - Reward: -134.0260 - Steps: 77 - Eps: 0.0100 - Time: 30.46s


In [None]:
# save history
if not os.path.exists('./history'):
        os.mkdir('./history')

with open('./history/sarsa_ef_history.json', 'w') as file:
    json.dump(history, file)

## Evaluation

In [None]:
# with open('history\sarsa_ef_history.json', 'r') as file:
#     history = json.load(file)

# rewards, avg_rewards, steps = history['reward'], history['avg_reward_100'], history['steps']

In [None]:
# plt.figure(figsize=(10, 5))

# plt.plot(rewards, alpha=0.8, label='Reward')
# plt.plot(avg_rewards, label='MA Reward (100)')

# plt.title('SARSA Agent w/ Random Engine Failure Rewards')
# plt.xlabel('Episode')
# plt.ylabel('Reward')
# plt.legend()

# plt.show()

In [None]:
# # plot cumulative reward
# cumulative_reward = [sum(rewards[:i+1]) for i in range(len(rewards))]
# plt.figure(figsize=(10, 5))

# plt.plot(cumulative_reward, alpha=0.8, label='Cumulative Reward')

# plt.title('SARSA Agent w/ Random Engine Failure Cumulative Rewards')
# plt.xlabel('Episode')
# plt.ylabel('Reward')
# plt.legend()
# plt.show()

In [None]:
# # plot steps and moving average steps
# avg_steps = [sum(steps[:i+1])/(i+1) for i in range(len(steps))]
# plt.figure(figsize=(10, 5))

# plt.plot(steps, alpha=0.8, label='Steps')
# plt.plot(avg_steps, label='MA Steps (100)')

# plt.title('SARSA Agent Steps')
# plt.xlabel('Episode')
# plt.ylabel('Steps')
# plt.legend()
# plt.show()