In [11]:

!apt-get update > /dev/null
!apt-get install -y xvfb > /dev/null
!pip install gymnasium box2d pygame > /dev/null


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


In [12]:
# ----------- Learning Cell ----------------

import gymnasium as gym
import numpy as np

Lr = 0.78
Df = 0.95

eps_str = 1.0
eps_end = 0.1
eps_dec = 0.999

Q_table = np.zeros((16, 4))
n = 10000    #number of episodes that you want

env = gym.make("FrozenLake-v1",is_slippery=False,success_rate=1,reward_schedule=(10, -10, -1))

abs_reward = 0

for eps_num in range(n):
    state, info = env.reset()
    done = False
    total_reward = 0

    eps = max(eps_end, eps_str * (eps_dec ** eps_num))

    while not done:
        # Îµ-greedy policy
        if np.random.rand() > eps:
            action = np.argmax(Q_table[state])
        else:
            action = env.action_space.sample()

        next_state, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated

        # Q-learning update
        Q_table[state, action] += Lr * (
            reward
            + Df * np.max(Q_table[next_state])
            - Q_table[state, action]
        )

        state = next_state
        total_reward += reward

    abs_reward += total_reward

env.close()

print("Learned Q-table:")
print(Q_table)
print("Average reward:", abs_reward / n)

Learned Q-table:
[[  2.05275672   3.21342812   3.21342812   2.05275672]
 [  2.05275672 -10.           4.4351875    3.21342812]
 [  3.21342812   5.72125      3.21342812   4.4351875 ]
 [  4.4351875  -10.           3.21342812   3.21339021]
 [  3.21342812   4.4351875  -10.           2.05275672]
 [  0.           0.           0.           0.        ]
 [-10.           7.075      -10.           4.4351875 ]
 [  0.           0.           0.           0.        ]
 [  4.4351875  -10.           5.72125      3.21342812]
 [  4.4351875    7.075        7.075      -10.        ]
 [  5.72125      8.5        -10.           5.72125   ]
 [  0.           0.           0.           0.        ]
 [  0.           0.           0.           0.        ]
 [-10.           7.075        8.5          5.72125   ]
 [  7.075        8.5         10.           7.075     ]
 [  0.           0.           0.           0.        ]]
Average reward: 1.0979


In [13]:
#-----------Testing Cell---------------

env = gym.make(
    "FrozenLake-v1",
    is_slippery=False,
    success_rate=1,
    reward_schedule=(10, -10, -1)
)

state, info = env.reset()
done = False
total_reward = 0

path_states = [state]

while not done:
    action = np.argmax(Q_table[state])

    next_state, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated

    path_states.append(next_state)
    total_reward += reward

    state = next_state

env.close()

print("Path (states):", path_states)
print("Total Reward:", total_reward)


Path (states): [0, 4, 8, 9, 13, 14, 15]
Total Reward: 5


In [14]:
#----------Saving and Downloading Q_Table-----------------

from google.colab import files

np.save("Q_table.npy", Q_table)
Q_table = np.load("Q_table.npy")
files.download("Q_table.npy")




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>