# Q-Learning on FrozenLake-v1
Train a simple tabular Q-learning agent.

In [None]:
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

In [None]:
env = gym.make("FrozenLake-v1", is_slippery=False)
n_actions = env.action_space.n
n_states = env.observation_space.n

Q = defaultdict(lambda: np.zeros(n_actions))

In [None]:
def epsilon_greedy_policy(state, epsilon=0.1):
    if np.random.rand() < epsilon:
        return np.random.randint(n_actions)
    return np.argmax(Q[state])

In [None]:
# Hyperparameters
alpha = 0.8
gamma = 0.95
epsilon = 0.1
n_episodes = 2000
reward_history = []

for episode in range(n_episodes):
    state, _ = env.reset()
    total_reward = 0
    done = False
    while not done:
        action = epsilon_greedy_policy(state, epsilon)
        next_state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated
        best_next_action = np.argmax(Q[next_state])
        td_target = reward + gamma * Q[next_state][best_next_action]
        Q[state][action] += alpha * (td_target - Q[state][action])
        state = next_state
        total_reward += reward
    reward_history.append(total_reward)

In [None]:
plt.plot(reward_history)
plt.title("Reward over Episodes")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.grid(True)
plt.show()

Try changing the environment, number of episodes, epsilon, etc.