# 0. Import dependencies

Import Libraries:
- Numpy for the Qtable
- OpenAI gym for the environment
- Random to generate random numbers

In [20]:
import numpy as np
import gym
import random
from gym import Env
from gym.spaces import Discrete, Box

# 1. Environment

In [21]:
class ShowerEnv(Env):
    def __init__(self):
        #actions we can take, down / stay / up
        self.action_space = Discrete(3)
        #temperature array
        self.observation_space = Box(low=np.array([0]), high=np.array([100]))
        #set starting temperature
        self.state = 38 + random.randint(-3,3)
        #set shower length
        self.shower_length = 60
    
    def step(self, action):
        #apply action
        # 0 -1 = -1 temperature
        # 1 -1 = 0
        # 2 -1 = 1 temperature
        self.state += action -1
        #reduce shower length by 1 second
        self.shower_length -= 1
        
        #calculate reward
        if self.state >= 37 and self.state <= 39:
            reward = 1
        else:
            reward = -1
        
        #ceck if shower is done
        if self.shower_length <=0:
            done = True
        else:
            done = False
            
        #apply temperature noise
        self.state += random.randint(-1,1)
        #set placeholder for info
        info = {}
        
        #return step information
        return self.state, reward, done, info
        
        pass
    def render(self):
        #implement visualization
        pass
    def reset(self):
        #reset shower temperature
        self.state = 38 + random.randint(-3,3)
        #reset shower time
        self.shower_length = 60
        return self.state

In [22]:
env = ShowerEnv()

# 2. Create Qtable and initialize

- Create the Qtable
- Action and state size are calculated to know how much rows and columns are needed

In [25]:
action_size = env.action_space.n
print("Action size ", action_size)

state_size = env.observation_space
print("State size ", state_size)

Action size  3
State size  Box([0.], [100.], (1,), float32)


In [26]:
qtable = np.zeros((state_size, action_size))
print(qtable)

TypeError: 'Box' object cannot be interpreted as an integer

# 3. Hyperparameters

- Specify the required hyperparameters

In [18]:
total_episodes = 50000       #Total episodes
total_test_episodes = 100    #Total test episodes
max_steps = 99               #Max steps per episodes

learning_rate = 0.7          #Learning rate
gamma = 0.618                #Discounting rate

#Exploration parameters
epsilon = 1.0               #Exploration rate
max_epsilon = 1.0           #Exploration probability at start
min_epsilon = 0.01          #Minimum exploration probability
decay_rate = 0.01           #Exponential decay rate for exploration probability

# 4. Qlearning algorithm

In [19]:
#Until learning is stopped
for episode in range(total_episodes):
    #Reset the environment
    state = env.reset()
    step = 0
    done = False
    
    for step in range(max_steps):
        #Choose an action in the current state
        ##Randomize a number
        exp_exp_tradeoff = random.uniform(0,1)
        
        ##If this number is greater than epsilon --> exploitation (taking the biggest Q value for this state)
        if exp_exp_tradeoff > epsilon:
            action = np.argmax(qtable[state,:])
        
        #Else doing a random choice --> exploration
        else:
            action = env.action_space.sample()
        
        #Take the action and observe the outcome state and reward
        new_state, reward, done, info = env.step(action)
        
        qtable[state, action] = qtable[state, action] + learning_rate * (reward + gamma * np.max(qtable[new_state, :]) - qtable[state, action])
        
        #Our new state is state
        state = new_state
        
        #If done : finish episode
        if done == True:
            break
    
    episode += 1
    
    #Reduce epsilon because we need less and less exploration
    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)
        

NameError: name 'qtable' is not defined

# 5. Use Qtable to play

In [7]:
env.reset()
rewards = []

for episode in range(total_test_episodes):
    state = env.reset()
    step = 0
    done = False
    total_rewards = 0
    print("****************")
    print("EPISODE ", episode)
    
    for step in range(max_steps):
        env.render()
        #Take the action that have the maximum expected future reward given that state
        action = np.argmax(qtable[state,:])
        
        new_state, reward, done, info = env.step(action)
        
        total_rewards += reward
        
        if done:
            rewards.append(total_rewards)
            break
        state = new_state
env.close()

****************
EPISODE  0
+---------+
|[34;1mR[0m: | : :G|
| : | : : |
| : : : : |
| | : | : |
|Y|[43m [0m: |[35mB[0m: |
+---------+

+---------+
|[34;1mR[0m: | : :G|
| : | : : |
| : : : : |
| |[43m [0m: | : |
|Y| : |[35mB[0m: |
+---------+
  (North)
+---------+
|[34;1mR[0m: | : :G|
| : | : : |
| :[43m [0m: : : |
| | : | : |
|Y| : |[35mB[0m: |
+---------+
  (North)
+---------+
|[34;1mR[0m: | : :G|
| :[43m [0m| : : |
| : : : : |
| | : | : |
|Y| : |[35mB[0m: |
+---------+
  (North)
+---------+
|[34;1mR[0m:[43m [0m| : :G|
| : | : : |
| : : : : |
| | : | : |
|Y| : |[35mB[0m: |
+---------+
  (North)
+---------+
|[34;1m[43mR[0m[0m: | : :G|
| : | : : |
| : : : : |
| | : | : |
|Y| : |[35mB[0m: |
+---------+
  (West)
+---------+
|[42mR[0m: | : :G|
| : | : : |
| : : : : |
| | : | : |
|Y| : |[35mB[0m: |
+---------+
  (Pickup)
+---------+
|R:[42m_[0m| : :G|
| : | : : |
| : : : : |
| | : | : |
|Y| : |[35mB[0m: |
+---------+
  (East)
+---------+
|R: | : 

In [8]:
print("Score", total_rewards)
print ("Score over time: " + str(sum(rewards)/total_test_episodes))

Score 10
Score over time: 7.77
