### Import libraries including the OpenAI gym module

In [60]:
import numpy as np
import gym
import random
#from frozen_lake import generate_random_map,FrozenLakeEnv

### Create a FrozenLake-v0 environment using the `gym.make()` method

In [89]:
env = gym.make('FrozenLake-v0')
action_size = env.action_space.n
state_size = env.observation_space.n

### How does it look like? Basically, a 4x4 grid world

In [90]:
env.render()


[41mS[0mFFF
FHFH
FFFH
HFFG


### What are the action size and the total size of the state space?

In [91]:
print("Number of available actions at any state:",action_size)
print("Size of the total state space:",state_size)

Number of available actions at any state: 4
Size of the total state space: 16


In [93]:
env.reset()
env.step(1)

(1, 0.0, False, {'prob': 0.3333333333333333})

In [94]:
env.step(2)

(2, 0.0, False, {'prob': 0.3333333333333333})

### Initiate an empty (filled with zeroes) Q-table

In [11]:
qtable = np.zeros((state_size, action_size))

### Let's define some parameters for Q-learning

In [101]:
total_episodes = 250
learning_rate = 0.8
max_steps = 99
gamma = 0.95

### Probability threshold parameters (for deciding whether to choose the current maximum of the Q-table or to select a complety random one from the action space)

In [102]:
epsilon = 1.0
max_epsilon = 1.0
min_epsilon = 0.01
decay_rate = 0.01

### Run the Q-learning to fill up the Q-table

In [103]:
rewards = []
for episode in range(total_episodes):
    state = env.reset()
    total_rewards = 0

    for step in range(max_steps):
        exp_exp_tradeoff = random.uniform(0, 1)
        if exp_exp_tradeoff > epsilon:
            action = np.argmax(qtable[state])
        else:
            action = env.action_space.sample()

        new_state, reward, done, info = env.step(action)
        
        # Updating Q-table
        qtable[state, action] = qtable[state, action] + learning_rate * \
        (reward + gamma * np.max(qtable[new_state]) - qtable[state, action])

        state = new_state
        total_rewards += reward
        if done: break
    
    # Decaying the epsilon
    epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * (episode+1))
    rewards.append(total_rewards)

    #print('[*] episode {}, total reward {}, \
    #average score {}'.format(episode+1, total_rewards, sum(rewards)/(episode+1)))

### Show the Q function table

In [104]:
print(qtable)

[[9.86264015e-02 9.12458605e-02 9.46786422e-02 9.15141219e-02]
 [2.76554309e-02 7.50391696e-02 4.75262572e-03 7.38726202e-02]
 [7.70086210e-02 2.27811283e-02 6.81175443e-02 7.10883661e-02]
 [2.69734556e-02 1.54727310e-02 1.47086572e-02 7.21555355e-02]
 [1.22940622e-01 5.07195536e-02 2.34023009e-03 1.05384759e-02]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [1.62946187e-02 6.23308974e-04 3.32817579e-03 1.38906424e-02]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [1.46545963e-02 8.34212322e-02 1.82322142e-02 3.24048025e-01]
 [2.76834491e-02 6.44094658e-01 4.07571670e-02 9.69351630e-02]
 [1.93568435e-01 3.46275003e-02 2.70710172e-03 4.85800458e-03]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [5.15716414e-01 1.75007630e-01 6.03640158e-01 4.78241644e-01]
 [6.16145984e-01 6.14232048e-01 5.75909912e-01 8.11702284e-01]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.000000

### Show a demo of playing a game

In [105]:
# Play the game
for episode in range(1):
    state = env.reset()
    print('*'*20)
    print('EPISODE ', episode)
    print('*'*20)

    for step in range(max_steps):
        print("-"*25)
        print("Step ",step)
        print("-"*25)
        env.render()
        action = np.argmax(qtable[state])
        state, reward, done, info = env.step(action)
        if done: break
env.close()

********************
EPISODE  0
********************
-------------------------
Step  0
-------------------------

[41mS[0mFFF
FHFH
FFFH
HFFG
-------------------------
Step  1
-------------------------
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
-------------------------
Step  2
-------------------------
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
-------------------------
Step  3
-------------------------
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
-------------------------
Step  4
-------------------------
  (Left)
SFFF
[41mF[0mHFH
FFFH
HFFG
-------------------------
Step  5
-------------------------
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
-------------------------
Step  6
-------------------------
  (Left)
SFFF
[41mF[0mHFH
FFFH
HFFG
-------------------------
Step  7
-------------------------
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
-------------------------
Step  8
-------------------------
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
-------------------------
Step  9
-------------------------
  (Left)
SFFF