In [1]:
import gym
import numpy as np

In [2]:
def value_iteration(env_model, discount, theta=0.0001):
    state_len = env.nS
    action_len = env.nA
    delta = theta * 2
    states = np.zeros((state_len))
    while delta>theta:
        delta = 0
        for s in range(state_len):
            temp_array = np.zeros((action_len))
            for a in range(action_len):
                transitions_list = env_model[s][a]
                for i in transitions_list:
                    transition_prob, next_state, reward, done = i
                    if done:
                        temp_array[a] += transition_prob * reward
                    else:
                        temp_array[a] += transition_prob * (reward + discount * states[next_state])
            v_max = np.max(temp_array)
            delta = max(delta, np.abs(v_max - states[s]))
            states[s] = v_max
            
    policy = np.zeros((state_len, action_len))
    
    for s in range(state_len):
        temp_array = np.zeros((action_len))
        for a in range(action_len):
            transitions_list = env_model[s][a]
            for i in transitions_list:
                transition_prob, next_state, reward, done = i
                temp_array[a] += transition_prob * (reward + discount * states[next_state])
        policy[s, np.argmax(temp_array)] = 1.
        
    return states, policy

In [3]:
from gym.envs.registration import register
register(
    id='FrozenLakeNotSlippery-v0',
    entry_point='gym.envs.toy_text:FrozenLakeEnv',
    kwargs={'map_name' : '4x4', 'is_slippery': False},
    max_episode_steps=100,
    reward_threshold=0.78, # optimum = .8196
)

In [4]:
env = gym.make('FrozenLakeNotSlippery-v0')

gamma = 0.99
state_value_array, policy_array = value_iteration(env.P, gamma)

In [5]:
np.round(policy_array.reshape(16, 4), 3)

array([[0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.]])