<a href="https://colab.research.google.com/github/nikhilreddy2002/FrozenLake-v0/blob/main/frozen_lake.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **FrozenLake-v0**
<br>
The agent controls the movement of a character in a grid world. Some tiles of the grid are walkable, and others lead to the agent falling into the water. Additionally, the movement direction of the agent is uncertain and only partially depends on the chosen direction. The agent is rewarded for finding a walkable path to a goal tile.
<br>
Link:  https://gym.openai.com/envs/FrozenLake-v0/

In [1]:
import numpy as np
import gym
import random
import time
from IPython.display import clear_output

In [4]:
#Creating Environment
env = gym.make("FrozenLake-v0") 

In [10]:
#making Q-table
state_space_size = env.observation_space.n
action_space_size = env.action_space.n
q_table = np.zeros((state_space_size,action_space_size))
q_table

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [36]:
num_episodes = 10000
max_steps_per_episode = 100

learning_rate = 0.1
discount_rate = 0.99

exploration_rate = 1
max_exploration_rate = 1
min_exploration_rate = 0.01
exploration_decay_rate = 0.001

In [37]:
rewards_all_episodes = []
for episode in range(0,num_episodes):
  state = env.reset()
  done = False
  rewards_for_the_current_episode = 0
  for step in range(max_steps_per_episode):
    #epsilon greedy strategy
    exploration_rate_random = random.uniform(0,1)
    if exploration_rate_random > exploration_rate:
      action = np.argmax(q_table[state,:])
    else:
      action = env.action_space.sample()
    #taking the next step
    new_state, reward, done, info = env.step(action)
    #updating Q table
    q_table[state,action] = (1-learning_rate)*(q_table[state,action]) + learning_rate*(reward + discount_rate*(np.max(q_table[new_state,:])))
    state = new_state
    rewards_for_the_current_episode += reward
    if done == True:
      break
  #updating exploration rate and rewards
  exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate)*np.exp(-exploration_decay_rate*episode)
  rewards_all_episodes.append(rewards_for_the_current_episode)
#Printing average reward
rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes),num_episodes/1000)
count = 1000
print("********Average reward per thousand episodes********\n")
for r in rewards_per_thousand_episodes:
    print(count, ": ", str(sum(r/1000)))
    count += 1000

********Average reward per thousand episodes********

1000 :  0.03900000000000003
2000 :  0.17200000000000013
3000 :  0.4070000000000003
4000 :  0.5480000000000004
5000 :  0.6420000000000005
6000 :  0.6560000000000005
7000 :  0.6710000000000005
8000 :  0.6990000000000005
9000 :  0.6640000000000005
10000 :  0.6770000000000005


In [41]:
print(q_table)

[[0.57695759 0.51784906 0.51608208 0.50871919]
 [0.39735985 0.28585045 0.35566082 0.51348129]
 [0.41736987 0.41313841 0.41678921 0.47088702]
 [0.36305345 0.26678322 0.34835851 0.45514028]
 [0.59832218 0.41204337 0.36729944 0.32051531]
 [0.         0.         0.         0.        ]
 [0.21151544 0.1452824  0.33854893 0.11612866]
 [0.         0.         0.         0.        ]
 [0.28499737 0.41429239 0.41601416 0.63898973]
 [0.34196811 0.67242248 0.44319677 0.30827074]
 [0.61093782 0.42196856 0.28396192 0.27041202]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.58809086 0.57061401 0.79313743 0.37518641]
 [0.72413694 0.90249424 0.75622204 0.74052027]
 [0.         0.         0.         0.        ]]


In [44]:
#visualizing The above code
for episode in range(10):
  state = env.reset()
  done = False
  print("*****EPISODE ", episode+1, "*****\n\n\n\n")
  time.sleep(1)
  for step in range (0,max_states_per_episode):
    clear_output(wait = True)
    env.render()
    time.sleep(0.3)

    action = np.argmax(q_table[state,:])
    new_state, reward, done, info = env.step(action)

    if done:
      clear_output(wait=True)
      env.render()
      if reward == 1:
        print("****You reached the goal!****")
        time.sleep(3)
      else:
        print("****You fell through a hole!****")
        time.sleep(3)
      clear_output(wait=True)
      break

    state = new_state


  (Down)
SFFF
FHFH
FFFH
HFF[41mG[0m
****You reached the goal!****
