# RL Introduction
> RL Introduction based on MDP

- toc: true 
- badges: true
- comments: true
- categories: [RL]
- image: images/chart-preview.png

![](my_icons/rl_logo.png)

In [2]:
#hide
import math

import numpy as np

import gym


In [5]:
from gym.envs.registration import register
np.set_printoptions(precision=3)
# Evaluate deterministic
register(
    id='Deterministic-4x4-FrozenLake-v0',
    entry_point='gym.envs.toy_text.frozen_lake:FrozenLakeEnv',
    kwargs={'map_name': '4x4',
            'is_slippery': False})


In [6]:

def policy_evaluation(P, nS, nA, policy, gamma=0.9, tol=1e-8):
    """Evaluate the value function from a given policy.

    Parameters:
    ----------
    P, nS, nA, gamma:
        defined at beginning of file
    policy: np.array[nS,nA]
        The policy to evaluate. Maps states to actions.
    tol: float
        Terminate policy evaluation when
            max |value_function(s) - prev_value_function(s)| < tol
    Returns:
    -------
    value_function: np.ndarray[nS]
        The value function of the given policy, where value_function[s] is
        the value of state s
    """
    
    value_function= np.zeros(nS)
    
  
    while True:
      
        difference=0
      
        for s in range(nS):
            sum = 0
            for a, action_prob in enumerate(policy[s]):
                
                for probability, next_state, reward, terminal in P[s][a]:
                  
                    sum += action_prob * probability * (reward + gamma * value_function[next_state])

           
            difference = max(difference, np.abs(value_function[s]-sum))

            
            value_function[s] = sum

       
        if difference < tol:
            break

    return value_function

In [11]:
env = gym.make("FrozenLake-v1")
env = env.unwrapped
random_policy2 = np.ones([env.nS, env.nA]) / env.nA
policy_evaluation(env.P,env.nS,env.nA, random_policy2,tol=1e-8)

array([0.004, 0.004, 0.01 , 0.004, 0.007, 0.   , 0.026, 0.   , 0.019,
       0.058, 0.107, 0.   , 0.   , 0.13 , 0.391, 0.   ])