In [4]:
import os
import sys

# Get the absolute path to the parent folder
parent_folder = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

# Add the parent folder to sys.path if it's not already included
if parent_folder not in sys.path:
    sys.path.append(parent_folder)

import gymnasium as gym
import numpy as np
from agents.sarsa import Sarsa
from agents.qlearning import QLearning
from policies.eps_greedy_policy import EpsGreedy
from policies.softmax_policy import Softmax

from utils.encoding import mc_tile_encoding, cw_encoding
from utils.utils_fct import plot_moving_averages, process_json

In [2]:
env = gym.make("CliffWalking-v0", is_slippery=True, render_mode=None)
env.reset()

(36, {'prob': 1})

In [None]:
env.step(0)

In [9]:
env = gym.make("CliffWalking-v0", is_slippery=True, render_mode=None)

sarsa_agent = Sarsa(
    encode_fct=cw_encoding,
    nb_actions=env.action_space.n,
)

_ = sarsa_agent.train(
    env=env,
    alpha=0.1,
    gamma=1,
    epsilon=0.01,
    verbose=1,
    nb_episodes=1000,
)

_ = sarsa_agent.evaluate_policy(
    env=env,
    nb_episodes=10,
    verbose=1,
)

Episode 1: Total Reward = -5124
Episode 2: Total Reward = -1435
Episode 3: Total Reward = -118
Episode 4: Total Reward = -76
Episode 5: Total Reward = -287
Episode 6: Total Reward = -272
Episode 7: Total Reward = -451
Episode 8: Total Reward = -183
Episode 9: Total Reward = -432
Episode 10: Total Reward = -111
Episode 11: Total Reward = -836
Episode 12: Total Reward = -301
Episode 13: Total Reward = -77
Episode 14: Total Reward = -324
Episode 15: Total Reward = -570
Episode 16: Total Reward = -200
Episode 17: Total Reward = -1024
Episode 18: Total Reward = -357
Episode 19: Total Reward = -403
Episode 20: Total Reward = -384
Episode 21: Total Reward = -271
Episode 22: Total Reward = -272
Episode 23: Total Reward = -604
Episode 24: Total Reward = -355
Episode 25: Total Reward = -247
Episode 26: Total Reward = -1781
Episode 27: Total Reward = -1551
Episode 28: Total Reward = -104
Episode 29: Total Reward = -954
Episode 30: Total Reward = -381
Episode 31: Total Reward = -125
Episode 32: To

In [8]:
env = gym.make("CliffWalking-v0", is_slippery=False, render_mode=None)

sarsa_agent = Sarsa(
    encode_fct=cw_encoding,
    policy=EpsGreedy(0.01),
    nb_actions=env.action_space.n,
)

_ = sarsa_agent.train(
    env=env,
    alpha=0.1,
    gamma=1,
    policy_action_params={},
    policy_update_params={"to_decay": True},
    nb_episodes=1000,
    verbose=2,
)
_ = sarsa_agent.evaluate_policy(
    env=env, policy_action_params={"hard_policy": True}, nb_episodes=10, verbose=1
)

Episode 1: Total Reward = -758
Episode 2: Total Reward = -1556
Episode 3: Total Reward = -308
Episode 4: Total Reward = -112
Episode 5: Total Reward = -309
Episode 6: Total Reward = -87
Episode 7: Total Reward = -87
Episode 8: Total Reward = -127
Episode 9: Total Reward = -255
Episode 10: Total Reward = -111
Episode 11: Total Reward = -73
Episode 12: Total Reward = -119
Episode 13: Total Reward = -137
Episode 14: Total Reward = -117
Episode 15: Total Reward = -115
Episode 16: Total Reward = -83
Episode 17: Total Reward = -83
Episode 18: Total Reward = -101
Episode 19: Total Reward = -139
Episode 20: Total Reward = -57
Episode 21: Total Reward = -99
Episode 22: Total Reward = -123
Episode 23: Total Reward = -73
Episode 24: Total Reward = -89
Episode 25: Total Reward = -119
Episode 26: Total Reward = -67
Episode 27: Total Reward = -91
Episode 28: Total Reward = -73
Episode 29: Total Reward = -35
Episode 30: Total Reward = -133
Episode 31: Total Reward = -77
Episode 32: Total Reward = -47