# Building a Q-table

We can populate the values of the Q-table using the following strategy:
1. Initialize the game environment and the Q-table with zeros.
2. Take a random action and fetch the next state, reward, the flag stating
whether the game was completed, and additional information.
3. Update the Q-value using the Bellman equation we defined earlier.
4. Repeat steps 2 and 3 so that there's a maximum of 50 steps in an episode.
5. Repeat steps 2, 3, and 4 over multiple episodes.

In [2]:
import numpy as np
import gym
import random
env = gym.make('FrozenLake-v1', is_slippery=False)

In [3]:
action_size=env.action_space.n
state_size=env.observation_space.n
qtable=np.zeros((state_size,action_size))

In [4]:
episode_rewards = []
for i in range(10000):
    state=env.reset()
    total_rewards = 0
    for step in range(50):
        action=env.action_space.sample()
        new_state,reward,done,info=env.step(action)
        qtable[state,action]+=0.1*(reward+0.9*np.max(qtable[new_state,:])-qtable[state,action])
        state=new_state
        total_rewards+=reward
    episode_rewards.append(total_rewards)
print(qtable)

  if not isinstance(terminated, (bool, np.bool8)):


ValueError: too many values to unpack (expected 4)

In [None]:
episode_rewards = []
epsilon=1
max_epsilon=1
min_epsilon=0.01
decay_rate=0.005
for episode in range(1000):
    state=env.reset()
    total_rewards = 0
    for step in range(50):
        exp_exp_tradeoff=random.uniform(0,1)
        ## Exploitation:
        if exp_exp_tradeoff>epsilon:
            action=np.argmax(qtable[state,:])
        else:
            ## Exploration
            action=env.action_space.sample()
        new_state,reward,done,info=env.step(action)
        qtable[state,action]+=0.9*(reward+0.9*np.max(qtable[new_state,:])-qtable[state,action])
        state=new_state
        total_rewards+=reward
    episode_rewards.append(total_rewards)
    epsilon=min_epsilon+(max_epsilon-min_epsilon)*np.exp(decay_rate*episode)
print(qtable)

[[0.531441   0.59049    0.59049    0.531441  ]
 [0.531441   0.         0.6561     0.59049   ]
 [0.59049    0.729      0.59049    0.6561    ]
 [0.6561     0.         0.59049    0.59049   ]
 [0.59049    0.6561     0.         0.531441  ]
 [0.         0.         0.         0.        ]
 [0.         0.81       0.         0.6561    ]
 [0.         0.         0.         0.        ]
 [0.6561     0.         0.729      0.59049   ]
 [0.6561     0.81       0.81       0.        ]
 [0.729      0.9        0.         0.729     ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.80999999 0.9        0.729     ]
 [0.81       0.9        1.         0.81      ]
 [0.         0.         0.         0.        ]]


In [None]:
env.reset()
for episode in range(1):
    state=env.reset()
    step=0
    done=False
    print("-----------------------")
    print("Episode",episode)
    for step in range(50):
        env.render()
        action=np.argmax(qtable[state,:])
        print(action)
        new_state,reward,done,info=env.step(action) 
        if done:
            print("Number of Steps",step+1)
            break
        state=new_state
env.close()

-----------------------
Episode 0

[41mS[0mFFF
FHFH
FFFH
HFFG
2
  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
2
  (Right)
SF[41mF[0mF
FHFH
FFFH
HFFG
1
  (Down)
SFFF
FH[41mF[0mH
FFFH
HFFG
1
  (Down)
SFFF
FHFH
FF[41mF[0mH
HFFG
1
  (Down)
SFFF
FHFH
FFFH
HF[41mF[0mG
2
Number of Steps 6
