In [4]:
import numpy as np
import random
import time
import gym

def initialize_q_table(states, actions):
    """Initialize the Q-table with small random values."""
    return np.random.uniform(low=-0.01, high=0.01, size=(states, actions))

def choose_action(state, q_table, epsilon):
    """Choose an action using epsilon-greedy strategy."""
    if random.uniform(0, 1) < epsilon:
        return random.randint(0, q_table.shape[1] - 1) 
    else:
        return np.argmax(q_table[state]) 

def update_q_table(q_table, state, action, reward, next_state, next_action, alpha, gamma):
    """Update the Q-value using the SARSA update rule."""
    state = int(state)
    action = int(action)
    next_state = int(next_state)
    next_action = int(next_action)
    
    td_target = reward + gamma * q_table[next_state, next_action]
    td_error = td_target - q_table[state, action]
    q_table[state, action] += alpha * td_error

def sarsa(env, episodes, alpha, gamma, epsilon, epsilon_decay):
    """SARSA algorithm implementation."""
    q_table = initialize_q_table(env.observation_space.n, env.action_space.n)

    start_time = time.time()

    for episode in range(episodes):
        state = env.reset()
        if isinstance(state, tuple): 
            state = state[0]

        action = choose_action(state, q_table, epsilon)
        total_reward = 0 
        
        print(f"Episode {episode + 1}/{episodes}")

        while True:
            result = env.step(action)
            if len(result) == 5: 
                next_state, reward, done, truncated, _ = result
            elif len(result) == 4: 
                next_state, reward, done, truncated = result
            else:
                raise ValueError("Unexpected return format from env.step()")

            if isinstance(next_state, tuple):
                next_state = next_state[0]

            next_action = choose_action(next_state, q_table, epsilon)
            update_q_table(q_table, state, action, reward, next_state, next_action, alpha, gamma)

            total_reward += reward 
            state, action = next_state, next_action

            # Render the environment few times
            if episode % 10 == 0:
                env.render()

            if done or truncated:
                print(f"Episode finished with total reward: {total_reward}\n")
                break

        epsilon = max(epsilon * epsilon_decay, 0.01) 

    elapsed_time = time.time() - start_time
    print(f"Total time for SARSA: {elapsed_time:.2f} seconds")
    return q_table, elapsed_time

if __name__ == "__main__":
    env = gym.make("FrozenLake-v1", is_slippery=False, render_mode="rgb_array")

    # Hyperparameters
    episodes = 500
    alpha = 0.3  # Learning rate
    gamma = 0.99  # Discount factor
    epsilon = 1.0  # Exploration rate
    epsilon_decay = 0.995

    q_table, sarsa_time = sarsa(env, episodes, alpha, gamma, epsilon, epsilon_decay)

    print("Trained Q-Table:")
    print(q_table)

    # table for algorithms to compare
    print("\nComparison Table:")
    print("Algorithm    | Time (seconds)")
    print("------------------------------")
    print(f"SARSA        | {sarsa_time:.2f}")


Episode 1/500
Episode finished with total reward: 0.0

Episode 2/500
Episode finished with total reward: 0.0

Episode 3/500
Episode finished with total reward: 0.0

Episode 4/500
Episode finished with total reward: 0.0

Episode 5/500
Episode finished with total reward: 0.0

Episode 6/500
Episode finished with total reward: 0.0

Episode 7/500
Episode finished with total reward: 0.0

Episode 8/500
Episode finished with total reward: 0.0

Episode 9/500
Episode finished with total reward: 0.0

Episode 10/500
Episode finished with total reward: 0.0

Episode 11/500
Episode finished with total reward: 0.0

Episode 12/500
Episode finished with total reward: 0.0

Episode 13/500
Episode finished with total reward: 0.0

Episode 14/500
Episode finished with total reward: 0.0

Episode 15/500
Episode finished with total reward: 0.0

Episode 16/500
Episode finished with total reward: 0.0

Episode 17/500
Episode finished with total reward: 0.0

Episode 18/500
Episode finished with total reward: 0.0

E

Episode finished with total reward: 1.0

Episode 162/500
Episode finished with total reward: 1.0

Episode 163/500
Episode finished with total reward: 0.0

Episode 164/500
Episode finished with total reward: 0.0

Episode 165/500
Episode finished with total reward: 0.0

Episode 166/500
Episode finished with total reward: 0.0

Episode 167/500
Episode finished with total reward: 0.0

Episode 168/500
Episode finished with total reward: 0.0

Episode 169/500
Episode finished with total reward: 0.0

Episode 170/500
Episode finished with total reward: 0.0

Episode 171/500
Episode finished with total reward: 0.0

Episode 172/500
Episode finished with total reward: 1.0

Episode 173/500
Episode finished with total reward: 0.0

Episode 174/500
Episode finished with total reward: 0.0

Episode 175/500
Episode finished with total reward: 0.0

Episode 176/500
Episode finished with total reward: 0.0

Episode 177/500
Episode finished with total reward: 0.0

Episode 178/500
Episode finished with total rew

Episode finished with total reward: 1.0

Episode 412/500
Episode finished with total reward: 1.0

Episode 413/500
Episode finished with total reward: 1.0

Episode 414/500
Episode finished with total reward: 0.0

Episode 415/500
Episode finished with total reward: 1.0

Episode 416/500
Episode finished with total reward: 1.0

Episode 417/500
Episode finished with total reward: 1.0

Episode 418/500
Episode finished with total reward: 1.0

Episode 419/500
Episode finished with total reward: 0.0

Episode 420/500
Episode finished with total reward: 1.0

Episode 421/500
Episode finished with total reward: 1.0

Episode 422/500
Episode finished with total reward: 1.0

Episode 423/500
Episode finished with total reward: 1.0

Episode 424/500
Episode finished with total reward: 1.0

Episode 425/500
Episode finished with total reward: 1.0

Episode 426/500
Episode finished with total reward: 0.0

Episode 427/500
Episode finished with total reward: 1.0

Episode 428/500
Episode finished with total rew

In [2]:
# For visualizing frozen lake but not able to visualize
import os
os.environ["XDG_RUNTIME_DIR"] = "/tmp/runtime-$(id -u)"

# i also tried with this one but didn't work
env = gym.make("FrozenLake-v1", is_slippery=False, render_mode="human")