#0. Install Dependencies

In [2]:
!pip install tensorflow
!pip install keras
!pip install gym
!pip install gymnasium
!pip install keras-rl2
!pip install stable-baselines3



#1. Build Environment with OpenAI Gymnasium

In [17]:
import numpy as np
import random
import gymnasium

In [24]:
class ShowerEnv(gymnasium.Env):
  def __init__(self):
    super().__init__() # Initializes the parent class
    # Actions we can take, down, stay, up
    self.action_space = gymnasium.spaces.Discrete(3)
    # Temperature array
    self.observation_space = gymnasium.spaces.Box(low=np.array([0], dtype=np.float32), high=np.array([100], dtype=np.float32))
    # Set start temp
    self.state = 38 + random.randint(-3,3)
    # Set shower length
    self.shower_length = 60

  def step(self, action):
    # Apply action
    # 0 -1 = -1 temperature
    # 1 -1 = 0
    # 2 -1 = 1 temperature
    self.state += action -1
    # Reduce shower length by 1 second
    self.shower_length -= 1

    # Calculate reward
    if self.state >=37 and self.state <=39:
        reward =1
    else:
        reward = -1

    # Check if shower is done
    if self.shower_length <= 0:
        done = True
    else:
        done = False

    # Apply temperature noise
    #self.state += random.randint(-1,1)
    # Set placeholder for info
    info = {}

    # Set placeholder for truncated
    truncated = False

    # set 'observation' to a numpy array
    observation = np.array([self.state], dtype=np.float32)

    # Return step information
    return observation, reward, done, truncated, info

  def render(self):
    # Implement viz
    pass

  def reset(self, seed=None, options=None):
    super().reset(seed=seed)
    # Reset shower temperature
    self.state = 38 + random.randint(-3,3)
    # Reset shower time
    self.shower_length = 60

    # convert 'observation' to a NumPy array
    observation = np.array([self.state], dtype=np.float32)

    # return 'observation' and 'info'
    return observation, {}


Evaluate environment with random actions:

In [47]:
env = ShowerEnv()

  and should_run_async(code)


In [48]:
env.observation_space.sample()

array([82.62943], dtype=float32)

In [49]:
episodes = 1000
score_log = []
for episode in range(1, episodes+1):
    state, _ = env.reset()
    done = False
    truncated = False
    score = 0

    while not done:
        #env.render()
        action = env.action_space.sample()
        state, reward, done, truncated, info = env.step(action)
        score+=reward

    score_log.append(score)
    # print('Episode:{} Score:{}'.format(episode, score))

mean_sample_score = np.mean(score_log)
print(f"Mean Score over {episodes} episodes: {mean_sample_score}")

Mean Score over 1000 episodes: -27.634


#2. Create Agent with Stable Baselines3

In [22]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv

In [51]:
env = ShowerEnv()

check_env(env)

env = DummyVecEnv([lambda: env])

In [52]:
# might be good to look into tensorboard in the future
#model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=log_path)


# Selected algorithm (PPO in this case)
model = PPO("MlpPolicy", env, verbose=1)

# Training the model
model.learn(total_timesteps=10000)

Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1306 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 932         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.005216279 |
|    clip_fraction        | 0.0274      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.000159   |
|    learning_rate        | 0.0003      |
|    loss                 | 27.4        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0015     |
|    value_loss           | 56.7        |
-----------------------------------------
-----------------

<stable_baselines3.ppo.ppo.PPO at 0x7c9710ef9a20>

Evaluate environment with agent

In [60]:
num_test_episodes = 1000
all_scores = []

# Run multiple test episodes
for episode in range(num_test_episodes):
    obs = env.reset()
    done = False
    episode_score = 0

    while not done:
        action, _states = model.predict(obs, deterministic=True)  # Use deterministic actions during testing
        obs, reward, done, info = env.step(action)
        episode_score += reward

    all_scores.append(episode_score)

# Calculate the mean score across all test episodes
mean_score = np.mean(all_scores)
print(f"Mean Score over {num_test_episodes} episodes: {mean_score}")

Mean Score over 1000 episodes: -5.760000228881836


#3. Reloading Agent from Memory

In [54]:
# save the model
model.save("ppo_shower_example_agent1")

  and should_run_async(code)


In [62]:
env = DummyVecEnv([lambda: ShowerEnv()])

# Load the trained model
model = PPO.load("ppo_shower_example_agent1", env=env)

# Continue training or evaluate the agent
#model.learn(total_timesteps=10000)  # or model.predict() for evaluation

for episode in range(num_test_episodes):
    obs = env.reset()
    done = False
    episode_score = 0

    while not done:
        action, _states = model.predict(obs, deterministic=True)  # Use deterministic actions during testing
        obs, reward, done, info = env.step(action)
        episode_score += reward

    all_scores.append(episode_score)

# Calculate the mean score across all test episodes
mean_score = np.mean(all_scores)
print(f"Mean Score over {num_test_episodes} episodes: {mean_score}")

  and should_run_async(code)


Mean Score over 1000 episodes: -7.380000114440918


#4. Additional Testing (Not Relevant)

In [63]:
!pip install traci

Collecting traci
  Downloading traci-1.21.0-py3-none-any.whl.metadata (3.0 kB)
Collecting sumolib>=1.21.0 (from traci)
  Downloading sumolib-1.21.0-py3-none-any.whl.metadata (2.4 kB)
Downloading traci-1.21.0-py3-none-any.whl (126 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.2/126.2 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sumolib-1.21.0-py3-none-any.whl (149 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.9/149.9 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sumolib, traci
Successfully installed sumolib-1.21.0 traci-1.21.0
