In [14]:
import gym 
import random
import tensorflow as tf 
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [3]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random

In [4]:
class ShowerEnv(Env):
    def __init__(self):
        # Actions we can take, down, stay, up
        self.action_space = Discrete(3)
        # Temperature array
        self.observation_space = Box(low=np.array([0]), high=np.array([100]))
        # Set start temp
        self.state = 38 + random.randint(-3,3)
        # Set shower length
        self.shower_length = 60
        
    def step(self, action):
        # Apply action
        # 0 -1 = -1 temperature
        # 1 -1 = 0 
        # 2 -1 = 1 temperature 
        self.state += action -1 
        # Reduce shower length by 1 second
        self.shower_length -= 1 
        
        # Calculate reward
        if self.state >=37 and self.state <=39: 
            reward =1 
        else: 
            reward = -1 
        
        # Check if shower is done
        if self.shower_length <= 0: 
            done = True
        else:
            done = False
        
        # Apply temperature noise
        #self.state += random.randint(-1,1)
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        # Reset shower temperature
        self.state = 38 + random.randint(-3,3)
        # Reset shower time
        self.shower_length = 60 
        return self.state

The observation_space defines the structure of the observations your environment will be returning. Learning agents usually need to know this before they start running, in order to set up the policy function. Some general-purpose learning agents can handle a wide range of observation types: Discrete, Box, or pixels (which is usually a Box(0, 255, [height, width, 3]) for RGB pixels).

See at https://gym.openai.com/docs under Spaces.

In [5]:
env = ShowerEnv()



In [6]:
env

<__main__.ShowerEnv at 0x4181841d00>

In [7]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

Episode:1 Score:-60
Episode:2 Score:-34
Episode:3 Score:-30
Episode:4 Score:-34
Episode:5 Score:-2
Episode:6 Score:2
Episode:7 Score:16
Episode:8 Score:-48
Episode:9 Score:-60
Episode:10 Score:-44


In [8]:
states = env.observation_space.shape[0]
actions = env.action_space.n

In [9]:
states , actions

(1, 3)

In [10]:
env.observation_space

Box(0.0, 100.0, (1,), float32)

In [38]:
def build_model(states, actions):
    model = Sequential() 
    model.add(Flatten(input_shape=[1]))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [39]:
model = build_model(states, actions)

In [40]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_5 (Flatten)          (None, 1)                 0         
_________________________________________________________________
dense_12 (Dense)             (None, 24)                48        
_________________________________________________________________
dense_13 (Dense)             (None, 24)                600       
_________________________________________________________________
dense_14 (Dense)             (None, 3)                 75        
Total params: 723
Trainable params: 723
Non-trainable params: 0
_________________________________________________________________


In [41]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [42]:
dqn = build_agent(model, actions)

In [43]:
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

Training for 50000 steps ...
Interval 1 (0 steps performed)
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
    1/10000 [..............................] - ETA: 10:50 - reward: 1.0000



166 episodes - episode_reward: -26.386 [-60.000, 32.000] - loss: 1.433 - mae: 7.910 - mean_q: -11.171

Interval 2 (10000 steps performed)
167 episodes - episode_reward: -16.719 [-60.000, 44.000] - loss: 1.368 - mae: 7.614 - mean_q: -10.689

Interval 3 (20000 steps performed)
167 episodes - episode_reward: 20.994 [-52.000, 60.000] - loss: 1.036 - mae: 5.157 - mean_q: 1.162

Interval 4 (30000 steps performed)
166 episodes - episode_reward: 48.759 [26.000, 60.000] - loss: 9.758 - mae: 22.465 - mean_q: 34.233

Interval 5 (40000 steps performed)
done, took 245.342 seconds


<tensorflow.python.keras.callbacks.History at 0x4197eadd00>

In [45]:
dqn.test(env, nb_episodes=15, visualize=False)

Testing for 15 episodes ...
Episode 1: reward: 60.000, steps: 60
Episode 2: reward: 60.000, steps: 60
Episode 3: reward: 60.000, steps: 60
Episode 4: reward: 60.000, steps: 60
Episode 5: reward: 58.000, steps: 60
Episode 6: reward: 60.000, steps: 60
Episode 7: reward: 58.000, steps: 60
Episode 8: reward: 60.000, steps: 60
Episode 9: reward: 58.000, steps: 60
Episode 10: reward: 60.000, steps: 60
Episode 11: reward: 58.000, steps: 60
Episode 12: reward: 58.000, steps: 60
Episode 13: reward: 60.000, steps: 60
Episode 14: reward: 58.000, steps: 60
Episode 15: reward: 60.000, steps: 60


<tensorflow.python.keras.callbacks.History at 0x4197e49430>