# Monte Carlo prediction

In [7]:
import numpy as np
import gym

In [2]:
env = gym.make('FrozenLake8x8-v0')

In [3]:
#define random policy
def policy(state):
    return np.random.randint(0,4)

In [4]:
def mc_prediction(policy,n_episodes,env,gamma):
    
    n_states = env.observation_space.n
    n_actions = env.observation_space.n
    
    #initialize state-value function
    V = np.zeros((n_states))
    N = np.zeros((n_states))
    
    for episode in range(n_episodes):
        
        observation = env.reset()
        state_reward_pairs = []
        
        #generate episode (the agent gets 200 steps to reach the goal)
        for t in range(200):
            action = policy(observation)
            old_observation = observation
            observation, reward, done, info = env.step(action)
            state_reward_pairs.append((old_observation,reward))
            if done:
                break
        
        #update values for visited states
        G = 0
        #loop over states going backwards from the terminal state
        for state, reward in state_reward_pairs[::-1]:
            G = reward + gamma * G 
            N[state] += 1
            V[state] = (V[state] + G) / N[state]
    
    return V

In [5]:
gamma = 0.9
V = mc_prediction(policy,n_episodes=20000,env,gamma)

In [6]:
V

array([0.00000000e+000, 0.00000000e+000, 6.99792668e-290, 1.35822702e-114,
       1.74377034e-034, 0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
       0.00000000e+000, 0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
       2.55704574e-029, 0.00000000e+000, 8.02590144e-315, 1.31565905e-284,
       0.00000000e+000, 0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
       1.38245953e-015, 0.00000000e+000, 0.00000000e+000, 1.07292256e-165,
       0.00000000e+000, 0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
       3.20419464e-014, 0.00000000e+000, 0.00000000e+000, 2.87477838e-054,
       0.00000000e+000, 0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
       2.12192127e-009, 3.29047788e-006, 8.53629977e-004, 6.65024631e-004,
       0.00000000e+000, 0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
       0.00000000e+000, 9.78804558e-052, 0.00000000e+000, 2.05480531e-003,
       0.00000000e+000, 0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
       0.00000000e+000, 7