In [None]:
!pip install gymnasium



In [None]:
import gymnasium as gym
import numpy as np
import pickle


def run(episodes, render=False):
    # Custom 6x6 FrozenLake map
    custom_map = [
        "SFFFFF",
        "FHFHFF",
        "FFFHFF",
        "HFFFFH",
        "FHFFHF",
        "FFFFFG"
    ]

    env = gym.make("FrozenLake-v1", desc=custom_map, is_slippery=False, render_mode="human" if render else None)  #setting up the environment

    q = np.zeros((env.observation_space.n, env.action_space.n)) #initialising a 36*4 sized array

    learning_rate_alpha = 0.8
    discount_factor_gamma = 0.95

    eps_start = 0.9
    eps_end = 0.01
    eps_decay = 0.00015

    rewards_per_episode = np.zeros(episodes)

    for episode in range(episodes):
        state, info = env.reset()
        terminated = False
        truncated = False

        while not terminated and not truncated:
            # using epsilon-greedy policy
            if np.random.random() < eps_start:
                action = env.action_space.sample()
            else:
                action = np.argmax(q[state, :])

            new_state, reward, terminated, truncated, info = env.step(action)

            # updating q value for each state
            q[state, action] += learning_rate_alpha * (
                reward + discount_factor_gamma * np.max(q[new_state, :]) - q[state, action]
            )

            state = new_state

        if reward == 1:
            rewards_per_episode[episode] = 1

        eps_start = max(eps_start - eps_decay, eps_end)

    env.close()


    with open("frozen_lake_qtable.pkl", "wb") as f:  #saving q table
        pickle.dump(q, f)

    print("Training is now finished")



if __name__ == "__main__":
    run(7000)


Training is now finished
