# Shower Problem
Simplify the shower problem from:
* Turn it up, leave it alone, or turn in down

to:
* Turn the shower on hot, medium, or cold

In [1]:
import gym 
from gym import Env
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete 
import numpy as np
import random
import os
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_checker import check_env
import time


# Environment

In [15]:
class ShowerEnv(Env):
    listTemperatureKnob = (10, 30, 50)
    shower_length = 3

    def __init__(self):
        self.action_space = Discrete(3)
        self.observation_space = Box(low=np.array([0], dtype=np.float32), high=np.array([100], dtype=np.float32))
        self.reset()
        
    def step(self, action):
        temperatureShowerHead = self.listTemperatureKnob[action]
        self.state = temperatureShowerHead
        self.shower_length -= 1 

        if self.state > 27 and self.state < 33: 
            reward =1 
        else: 
            reward = -1 
        
        if self.shower_length <= 0: 
            done = True
        else:
            done = False
        
        # Apply temperature noise
        #self.state += random.randint(-1,1)
        # Set placeholder for info
        info = {"temperatureShower":temperatureShowerHead}

        obs = np.array([self.state], dtype=np.float32)
        # Return step information
        return obs, reward, done, info

    def render(self):
        pass
    
    def reset(self):
        self.state = self.listTemperatureKnob[0]
        self.shower_length = 3
        obs = np.array([self.state], dtype=np.float32)

        return obs

env = ShowerEnv()
check_env(env, warn=True)

# Time a few Episodes

In [18]:
timeStart = time.time()

episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

timeEnd = time.time()
print("Elapsed Time: " + str(timeEnd- timeStart))

Episode:1 Score:1
Episode:2 Score:1
Episode:3 Score:-1
Episode:4 Score:-3
Episode:5 Score:1
Elapsed Time: 0.0009996891021728516


# Train Model

Use the `command prompt` to navigate to the `Logs` directory and use

`>tensorboard --logdir=./`

to start the `tensorboard` monitor.  Then use a browser to monitor:  `http://localhost:6006/`

This will update real-time and you can monitor the logs as they are changed, including:
* `ep_len_mean` = average number of *steps* for each *episode*
* `ep_rew_mean` = average reward of the *episode*

In [25]:
log_path = os.path.join('Training', 'Logs')
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=log_path)

timeStart = time.time()
model.learn(total_timesteps=500000)

timeEnd = time.time()
print("Elapsed Time: " + str(timeEnd- timeStart))

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to Training\Logs\PPO_9
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 3        |
|    ep_rew_mean     | -1.02    |
| time/              |          |
|    fps             | 1495     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3           |
|    ep_rew_mean          | -0.14       |
| time/                   |             |
|    fps                  | 1025        |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.037348226 |
|    clip_fraction        | 0.841       |
|    clip_range           | 0.2

# 6. Save Model

In [21]:
model.save('PPO')

In [23]:
evaluate_policy(model, env, n_eval_episodes=10)

(3.0, 0.0)