In [44]:
#!pip install gym
#!pip install 'gym[toy_text]'
import gym
import numpy as np
import time
import random

In [120]:
env=gym.make("FrozenLake-v1",render_mode='human', is_slippery = False)

action_size = env.action_space.n
state_size = env.observation_space.n

qtable = np.zeros((state_size, action_size)) #rows are states, columns are actions

#Hyper parameters

total_episodes = 1000 #10000
learning_rate = 0.2 #0.2
max_steps = 100 #100
gamma = 0.99 #0.99
epsilon = 1 #1: explore, 0: exploit
max_epsilon = 1 #1
min_epsilon = 0.01 #0.01
decay_rate = 0.001 #0.001

In [121]:
rewards = []

for episode in range(total_episodes):
    state = env.reset()[0]
    step = 0
    done = False
    total_rewards = 0
    
    for step in range(max_steps):
        
        if random.uniform(0, 1) > epsilon:
            action = np.argmax(qtable[state, :]) #exploit, get the largest valued action for the state row
        
        else:
            action = env.action_space.sample() #explore, do something random
            
        new_state, reward, done, info, _ = env.step(action)
        env.render()
        max_new_state = np.max(qtable[new_state, :])
        
        qtable[state, action] = qtable[state, action] + learning_rate*(reward + gamma*max_new_state-qtable[state, action]) 
        
        total_rewards += reward
        state = new_state
        
        print('Step: {}'.format(step))
        
        if done:
            break
        
    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)
    rewards.append(total_rewards)
    
    print('Episode: {}, epsilon: {}, rewards: {}.'.format(episode, epsilon, total_rewards))
        
print("Score:",str(sum(rewards)/total_episodes))
    

Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Episode: 0, epsilon: 1.0, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Episode: 1, epsilon: 0.9990104948350412, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Step: 14
Step: 15
Step: 16
Step: 17
Episode: 2, epsilon: 0.9980219786806598, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 3, epsilon: 0.9970344505483393, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Episode: 4, epsilon: 0.9960479094505515, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 5, epsilon: 0.9950623544007555, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Episode: 6, epsilon: 0.9940777844133959, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Episode: 7, epsilon: 0.9930941985039028, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5

Step: 2
Step: 3
Step: 4
Step: 5
Episode: 60, epsilon: 0.9423468882484062, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Episode: 61, epsilon: 0.9414150073782496, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Episode: 62, epsilon: 0.940484057923178, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Episode: 63, epsilon: 0.9395540389522419, rewards: 0.0.
Step: 0
Step: 1
Episode: 64, epsilon: 0.9386249495354222, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Episode: 65, epsilon: 0.9376967887436294, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Episode: 66, epsilon: 0.9367695556487027, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Episode: 67, epsilon: 0.9358432493234088, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Episode: 68, epsilon: 0.9349178688414413, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6


Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Step: 14
Step: 15
Step: 16
Step: 17
Step: 18
Step: 19
Step: 20
Episode: 130, epsilon: 0.8793144766113556, rewards: 0.0.
Step: 0
Step: 1
Episode: 131, epsilon: 0.8784455966471331, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Episode: 132, epsilon: 0.8775775851285795, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 133, epsilon: 0.8767104411876834, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Episode: 134, epsilon: 0.8758441639573007, rewards: 1.0.
Step: 0
Step: 1
Episode: 135, epsilon: 0.8749787525711541, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 136, epsilon: 0.8741142061638321, rewards: 0.0.
Step: 0
Step: 1
Episode: 137, epsilon: 0.8732505238707883, rewards: 0.0.
Step: 0
Step: 1
Episode: 138, epsilon: 0

Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Episode: 193, epsilon: 0.826237154397717, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Episode: 194, epsilon: 0.8254213252258911, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Episode: 195, epsilon: 0.8246063114754583, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Episode: 196, epsilon: 0.8237921123314047, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Episode: 197, epsilon: 0.8229787269795313, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 198, epsilon: 0.8221661546064527, rewards: 0.0.
Step: 0
Step: 1
Episode: 199, epsilon: 0.8213543943995963, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Episode: 200, epsilon: 0.820543445547202, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Episode: 201, epsilon: 0.8197333072383208, rewards: 0.0.
Step: 0
Step

Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Episode: 262, epsilon: 0.7718159134705, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Episode: 263, epsilon: 0.7710544783380486, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 264, epsilon: 0.770293804260139, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Episode: 265, epsilon: 0.7695338904760971, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Episode: 266, epsilon: 0.768774736226009, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Episode: 267, epsilon: 0.7680163407507201, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Episode: 268, epsilon: 0.7672587032918353, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 269, epsil

Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 331, epsilon: 0.7210231173501646, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 332, epsilon: 0.7203124496258988, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Episode: 333, epsilon: 0.719602492214142, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Episode: 334, epsilon: 0.7188932444049364, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Episode: 335, epsilon: 0.7181847054890343, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 336, epsilon: 0.7174768747578967, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 337, epsilon: 0.7167697515036928, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Step: 14
Step: 15
Step: 16
Step: 17
Step: 18
Step: 19
Episode: 338, epsilon: 0.7160633350192994, rewar

Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Episode: 395, epsilon: 0.676943238856379, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Episode: 396, epsilon: 0.6762766289780126, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 397, epsilon: 0.6756106853763306, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Episode: 398, epsilon: 0.6749454073853897, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 399, epsilon: 0.6742807943399114, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Episode: 400, epsilon: 0.6736168455752829, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Episode: 401, epsilon: 0.6729535604275553, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Episode: 402, epsilon: 0.6722909382334432, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Episode: 403, epsilon: 0.6716

Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 463, epsilon: 0.6330987061830813, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Step: 14
Step: 15
Step: 16
Episode: 464, epsilon: 0.6324759189224274, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 465, epsilon: 0.6318537541377444, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 466, epsilon: 0.6312322112068672, rewards: 1.0.
Step: 0
Step: 1
Episode: 467, epsilon: 0.6306112895082533, rewards: 0.0.
Step: 0
Step: 1
Episode: 468, epsilon: 0.6299909884209803, rewards: 0.0.
Step: 0
Step: 1
Episode: 469, epsilon: 0.6293713073247477, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Step: 14
Step: 15
Step: 16
Step: 17
Episode: 470, epsilon: 0.6287522455998737, rewards: 0.0.
Step: 0
Step: 1
Episode: 471, epsilon: 0.62813380

Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Step: 14
Episode: 527, epsilon: 0.5944697016008592, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Episode: 528, epsilon: 0.5938855240367219, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Step: 14
Step: 15
Step: 16
Episode: 529, epsilon: 0.5933019303581573, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Episode: 530, epsilon: 0.5927189199815717, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Episode: 531, epsilon: 0.5921364923239546, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Episode: 532, epsilon: 0.5915546468028782, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 533, epsilon: 0.5909733828364971, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step:

Step: 11
Step: 12
Step: 13
Step: 14
Step: 15
Episode: 591, epsilon: 0.5582355021758396, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 592, epsilon: 0.5576875407000651, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Step: 14
Step: 15
Step: 16
Step: 17
Step: 18
Step: 19
Step: 20
Step: 21
Step: 22
Step: 23
Step: 24
Episode: 593, epsilon: 0.557140126911877, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Episode: 594, epsilon: 0.5565932602638612, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 595, epsilon: 0.5560469402091515, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Episode: 596, epsilon: 0.5555011662014273, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 597, epsilon: 0.5549559376949149, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step:

Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Episode: 662, epsilon: 0.5206604781882281, rewards: 0.0.
Step: 0
Step: 1
Episode: 663, epsilon: 0.5201500729551902, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Episode: 664, epsilon: 0.5196401778722678, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Episode: 665, epsilon: 0.5191307924295656, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Episode: 666, epsilon: 0.5186219161176984, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 667, epsilon: 0.5181135484277896, rewards: 0.0.
Step: 0
Step: 1
Episode: 668, epsilon: 0.5176056888514716, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 669, epsilon: 0.5170983368808848, rewards: 1.0.
Step: 0
Step: 1
Episode: 670, epsilon: 0.516591492008677, rewards: 0.0.
Step: 0
Step: 1
Episode: 671, epsilon: 0.5160851537280036, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Episode: 672, epsilon: 0.515579321532526, rewards: 0.0.
Step: 0

Step: 9
Step: 10
Step: 11
Step: 12
Episode: 734, epsilon: 0.48518535222387066, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 735, epsilon: 0.4847104043851452, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Episode: 736, epsilon: 0.4842359312568636, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Episode: 737, epsilon: 0.4837619323645528, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Episode: 738, epsilon: 0.48328840723421385, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 739, epsilon: 0.48281535539232157, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 740, epsilon: 0.48234277636582407, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Episode: 741, epsilon: 0.4818706696821423, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9


Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 804, epsilon: 0.45305988572336825, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 805, epsilon: 0.45261704729376284, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Episode: 806, epsilon: 0.4521746514812417, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Episode: 807, epsilon: 0.4517326978434088, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Episode: 808, epsilon: 0.45129118593831063, rewards: 0.0.
Step: 0
Step: 1
Episode: 809, epsilon: 0.45085011532443514, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Episode: 810, epsilon: 0.4504094855607117, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Episode: 811, epsilon: 0.44996929620651055, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step

Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 873, epsilon: 0.4235196122180188, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Episode: 874, epsilon: 0.42310629929670424, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Episode: 875, epsilon: 0.4226933994817233, rewards: 0.0.
Step: 0
Step: 1
Episode: 876, epsilon: 0.4222809123601763, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Episode: 877, epsilon: 0.421868837519576, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Episode: 878, epsilon: 0.4214571745478475, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Episode: 879, epsilon: 0.42104592303332794, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 880, epsilon: 0.42063508256476556, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Episode: 881, epsilon: 0.42022465273132004, rewards: 0.0.


Step: 5
Step: 6
Step: 7
Episode: 939, epsilon: 0.397108471987189, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Episode: 940, epsilon: 0.3967215570049359, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 941, epsilon: 0.39633502874427196, rewards: 1.0.
Step: 0
Step: 1
Episode: 942, epsilon: 0.395948886818669, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 943, epsilon: 0.39556313084198497, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Episode: 944, epsilon: 0.3951777604284639, rewards: 0.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Step: 9
Step: 10
Step: 11
Step: 12
Step: 13
Step: 14
Step: 15
Episode: 945, epsilon: 0.3947927751927355, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6
Step: 7
Step: 8
Episode: 946, epsilon: 0.3944081747498142, rewards: 1.0.
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Episode: 947, e

In [123]:
#!pip install pickle
import pickle

print(qtable)

# Save the qtable using pickle
with open('qtable.pkl', 'wb') as f:
    pickle.dump(qtable, f)

[[0.94148015 0.94244609 0.95099005 0.94148015]
 [0.94148015 0.         0.96059601 0.95099005]
 [0.95099005 0.970299   0.95099005 0.96059601]
 [0.96059601 0.         0.94983531 0.93623972]
 [0.93187482 0.9567561  0.         0.94148015]
 [0.         0.         0.         0.        ]
 [0.         0.9801     0.         0.96059601]
 [0.         0.         0.         0.        ]
 [0.76323145 0.         0.96978624 0.84337072]
 [0.836756   0.93192761 0.98009959 0.        ]
 [0.97028651 0.99       0.         0.97029839]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.88757573 0.98995546 0.76840893]
 [0.97972497 0.98999934 1.         0.98007762]
 [0.         0.         0.         0.        ]]


In [125]:
#exploit the path that grants the most rewards from the qtable

env.reset()

# Load the Q-table from the file
with open('qtable.pkl', 'rb') as f:
    qtable = pickle.load(f)

for episode in range(10):
    
    state = env.reset()[0]
    step = 0
    done = False
    
    print('Episode: {}'.format(episode))
    
    for step in range(max_steps):
        
        action = np.argmax(qtable[state, :])
        new_state, reward, done, info, _ = env.step(action)
        env.render()
        
        if done:
            
            print("Steps required:", str(step+1))
            break
        
        state = new_state
        

Episode: 0
Steps required: 6
Episode: 1
Steps required: 6
Episode: 2
Steps required: 6
Episode: 3
Steps required: 6
Episode: 4
Steps required: 6
Episode: 5
Steps required: 6
Episode: 6
Steps required: 6
Episode: 7
Steps required: 6
Episode: 8
Steps required: 6
Episode: 9
Steps required: 6


In [None]:
#random walk

'''
j = 0

while j < 10 :

    print('Episode: {}'.format(j))
    env=gym.make("FrozenLake-v1",render_mode='human', is_slippery = False)
    env.reset()
    env.render()
    numberOfIterations=30

    for i in range(numberOfIterations):
        randomAction= env.action_space.sample()
        returnValue=env.step(randomAction)
        env.render()
        print('Iteration: {} and action {}'.format(i+1,randomAction))
        time.sleep(0.5)
        if returnValue[2]:
            break

    env.close()  

    j += 1 
    
''' 