# Q-Learning on FrozenLake (Gymnasium)

Final cleaned submission notebook.

## Imports and Environment Setup

In [None]:
import gymnasium as gym
import numpy as np
import random

## Creating Non-trivial Solvable Map

In [None]:
custom_map = [
    "SFFFFFFF",
    "FFFFFHFF",
    "FFFHFFFF",
    "FFFFFHFF",
    "FFFHFFFF",
    "FHHFFFHF",
    "FHFFHFHF",
    "FFFFFFFG"
]

env = gym.make(
    "FrozenLake-v1",
    desc=custom_map,
    is_slippery=True
)

n_states = env.observation_space.n
n_actions = env.action_space.n

## Hyperparameters

In [None]:
alpha = 0.8      # learning rate
gamma = 0.95     # discount factor

eps_start = 1.0
eps_end = 0.05
eps_decay = 0.995

episodes = 20000
max_steps = 100

## Initialize Q-Table

In [None]:
Q = np.zeros((n_states, n_actions))

## Training using Q-Learning

In [None]:
for episode in range(episodes):
    state, _ = env.reset()
    epsilon = max(eps_end, eps_start * (eps_decay ** episode))

    for _ in range(max_steps):
        if random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(Q[state])

        next_state, reward, terminated, truncated, _ = env.step(action)

        Q[state, action] += alpha * (
            reward + gamma * np.max(Q[next_state]) - Q[state, action]
        )

        state = next_state

        if terminated or truncated:
            break

## Save Q-Table

In [None]:
np.save("frozenlake_qtable.npy", Q)
print("Q-table saved.")

Q-table saved.


## Load Q-Table

In [None]:
Q_loaded = np.load("frozenlake_qtable.npy")

## Testing Learned Policy

In [None]:

env = gym.make(
    "FrozenLake-v1",
    desc=custom_map,
    is_slippery=True,
    render_mode="human"
)

episodes_test = 5

for ep in range(episodes_test):
    state, _ = env.reset()
    done = False
    total_reward = 0

    while not done:
        action = np.argmax(Q_loaded[state])
        state, reward, terminated, truncated, _ = env.step(action)
        total_reward += reward
        done = terminated or truncated

    print(f"Episode {ep+1} | Reward: {total_reward}")

env.close()


  from pkg_resources import resource_stream, resource_exists


Episode 1 | Reward: 0
Episode 2 | Reward: 0
Episode 3 | Reward: 0
