In [1]:
# !pip3 install gym
!pip3 install keras-rl2



You should consider upgrading via the '/Users/vaishnavbharadwaj/Documents/lectures/mldm_cw/env/bin/python3 -m pip install --upgrade pip' command.[0m


In [2]:
from gym import Env
from gym import spaces
import numpy as np
import random

In [8]:
class tempReg(Env):
    def __init__(self):
        # actions are regulator up, down or stay the same
        self.action_space = spaces.Discrete(3)
        #chiller temperature array
        self.observation_space = spaces.Box(low=np.array([0]), high=np.array([100]))
        #set start temp of chiller
        self.state = 2 + random.randint(-3,3)
        #set chiller duration to maintain the temperature
        self.chiller_length = 60
        
    def step(self, action):
        self.state += action -1
        self.chiller_length -= 1
        
        if self.state >=1 and self.state <=5:
            # reward if the chiller temperature is in between this region
            reward = 1
        else:
            # remove the reward from the state
            reward = -1
            
        if self.chiller_length <=0:
            done=True
        else:
            done = False
            
        self.state += random.randint(-1,1)
        
        info = {}
        
        return self.state, reward, done, info
    
    def render(self):
        pass
    def reset(self):
        self.state = 3 + random.randint(-2,2)
        self.chiller_length = 60
        return self.state

In [None]:
env = tempReg()

In [10]:
env.action_space.sample()

0

In [11]:
env.observation_space.sample()

array([42.87957225])

In [12]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode: {} score: {}'.format(episode, score))

Episode: 1 score: -6
Episode: 2 score: 0
Episode: 3 score: -54
Episode: 4 score: -42
Episode: 5 score: -40
Episode: 6 score: 20
Episode: 7 score: -60
Episode: 8 score: 16
Episode: 9 score: -42
Episode: 10 score: -26


## Creating a deep learning model with keras

In [13]:
from tensorflow import keras
from tensorflow.keras import layers
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Flatten
# from tensorflow.keras.optimizers import Adam
# from tensorflow import keras

In [14]:
states = env.observation_space.shape
actions = env.action_space.n

In [15]:
states

(1,)

In [16]:
actions

3

In [33]:
def build_model(states, actions):
    model = keras.Sequential() 
    model.add(layers.Dense(24, activation='relu',input_shape=states))
    model.add(layers.Dense(24, activation='relu'))
    model.add(layers.Dense(actions, activation="linear"))
    return model

In [32]:
#del model

In [34]:
model = build_model(states, actions)

In [35]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 24)                48        
                                                                 
 dense_7 (Dense)             (None, 24)                600       
                                                                 
 dense_8 (Dense)             (None, 3)                 75        
                                                                 
Total params: 723
Trainable params: 723
Non-trainable params: 0
_________________________________________________________________


## Building agents with keras-RL

In [36]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [37]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=10000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [38]:
dqn = build_agent(model, actions)
dqn.compile(keras.optimizers.Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=10000, visualize=False, verbose=1)

Training for 10000 steps ...
Interval 1 (0 steps performed)
done, took 92.436 seconds


<keras.callbacks.History at 0x7f7c35002a90>

In [39]:
score = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(score.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: 60.000, steps: 60
Episode 2: reward: 60.000, steps: 60
Episode 3: reward: 60.000, steps: 60
Episode 4: reward: 60.000, steps: 60
Episode 5: reward: 60.000, steps: 60
Episode 6: reward: 60.000, steps: 60
Episode 7: reward: 60.000, steps: 60
Episode 8: reward: 60.000, steps: 60
Episode 9: reward: 60.000, steps: 60
Episode 10: reward: 60.000, steps: 60
Episode 11: reward: 60.000, steps: 60
Episode 12: reward: 60.000, steps: 60
Episode 13: reward: 60.000, steps: 60
Episode 14: reward: 60.000, steps: 60
Episode 15: reward: 60.000, steps: 60
Episode 16: reward: 60.000, steps: 60
Episode 17: reward: 60.000, steps: 60
Episode 18: reward: 60.000, steps: 60
Episode 19: reward: 60.000, steps: 60
Episode 20: reward: 60.000, steps: 60
Episode 21: reward: 60.000, steps: 60
Episode 22: reward: 60.000, steps: 60
Episode 23: reward: 60.000, steps: 60
Episode 24: reward: 60.000, steps: 60
Episode 25: reward: 60.000, steps: 60
Episode 26: reward: 60.000, st