In [1]:
import gym
import random
import numpy as np

from gym.envs.registration import register

register(
    id='FrozenLakeNoSlip-v0',
    entry_point='gym.envs.toy_text:FrozenLakeEnv',
    kwargs={'map_name' : '4x4', 'is_slippery':False},
    max_episode_steps=100,
    reward_threshold=0.78, # optimum = .8196
)

## Intro to Q learning

#### Overview:

Before we created a random agent that can simulate continuous and discrete action spaces. Now we'll be looking at Frozen Lake. Slippery ice means actions don't always pan out (stochastic). We'll set `is_slippery` to `False` to make this a deterministic environment.

Change iteration loop to run while `done=False`.

Print updates at 0.5 second intervals.

Flush notebook output with `clear_output` command.

To implement Q-learning, create Q-agent which a subclass of the parent agent.

Q table:
-States as rows
-Actions as columns

#### Redfine get-action method to select actions corresponding to the state. `argmax` is used to select the action with highest q-value.

#### Define a `train` method for updating the q-table at each step.

- It receives the `experience` tuple with `(state, action, next_state, reward, done)`.
- Calls for `q_next` (pulls next_state from q-table), and checks if this next_state is `done`.
- Calculates `q_target` from the Q-learning equation (reward plus _discounted_ future reward)
- Calculates `q_update`, which is the distance from the current q-value to the target.
- Pushes update to the q-table using _learning rate_.
- Constructor defines both the `discount_rate` and the `learning_rate` used above.





https://www.youtube.com/watch?v=wN3rxIKmMgE&list=PLIfPjWrv526bMF8_vx9BqWjec-F-g-lQO&index=3

In [2]:
#env_name = "CartPole-v1"
#env_name = "MountainCar-v0"
#env_name = "MountainCarContinuous-v0"
#env_name = "Acrobot-v1"
#env_name = "Pendulum-v0"
#env_name = "FrozenLake-v0"
env_name = "FrozenLakeNoSlip-v0"   # we set is_slippery=False in the register up above

env = gym.make(env_name)
print("Observation space:", env.observation_space)
print("Action space:", env.action_space)

type(env.action_space)

Observation space: Discrete(16)
Action space: Discrete(4)


gym.spaces.discrete.Discrete

In [3]:
class Agent():
    def __init__(self, env):
        self.is_discrete = \
            type(env.action_space) == gym.spaces.discrete.Discrete
        
        if self.is_discrete:
            self.action_size = env.action_space.n
            print("Action size:", self.action_size)
            
        else:
            self.action_low = env.action_space.low
            self.action_high = env.action_space.high
            self.action_shape = env.action_space.shape
            print("Action range:", self.action_low, self.action_high)
        
    def get_action(self, state):
        if self.is_discrete:
            action = random.choice(range(self.action_size))
        else:
            action = np.random.uniform(self.action_low,
                                      self.action_high,
                                      self.action_shape)
        return action

In [None]:
# reset the state
agent = Agent(env)
state = env.reset()

# loop for 200 timesteps, taking a random action each time
for i in range(200):
    action = agent.get_action(state)
    state, reward, done, info = env.step(action)
    env.step(action)
    env.render()