In [3]:
import gym
import numpy as np

def policy_evaluation(env, policy, gamma=1.0, theta=1e-6):
    V = np.zeros(env.observation_space.n)
    while True:
        delta = 0
        for s in range(env.observation_space.n):
            v = sum(prob * (reward + gamma * V[next_state])
                    for action, action_prob in enumerate(policy[s])
                    for prob, next_state, reward, _ in env.P[s][action])
            delta = max(delta, np.abs(v - V[s]))
            V[s] = v
        if delta < theta:
            break
    return V

def policy_iteration(env, gamma=1.0):
    policy = np.ones([env.observation_space.n, env.action_space.n]) / env.action_space.n
    while True:
        V = policy_evaluation(env, policy, gamma)
        new_policy = np.zeros([env.observation_space.n, env.action_space.n])
        for s in range(env.observation_space.n):
            action_values = np.zeros(env.action_space.n)
            for a in range(env.action_space.n):
                for prob, next_state, reward, _ in env.P[s][a]:
                    action_values[a] += prob * (reward + gamma * V[next_state])
            best_action = np.argmax(action_values)
            new_policy[s][best_action] = 1.0
        if np.array_equal(new_policy, policy):
            break
        policy = new_policy
    return policy, V

# Define the environment
env = gym.make('FrozenLake-v1')

# Perform Policy Iteration
optimal_policy, optimal_value = policy_iteration(env)

# Now you can use the results
print("Optimal Policy:")
print(optimal_policy)
print("Optimal Value Function:")
print(optimal_value)


Optimal Policy:
[[1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]]
Optimal Value Function:
[inf inf inf inf inf  0. inf  0. inf inf inf  0.  0. inf inf  0.]


  v = sum(prob * (reward + gamma * V[next_state])
  delta = max(delta, np.abs(v - V[s]))
