## 1. Import Dependencies

In [None]:
# IMPORT GYM STUFF
import gymnasium as gym
from gymnasium import Env
from gymnasium.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete

# IMPORT HELPERS
import numpy as np
import random
import os

# IMPORT STABLE BASELINES STUFF
from stable_baselines3 import ppo
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

## 2. Types of Spaces

In [None]:
Discrete(3)

In [None]:
Discrete(3).sample()

In [None]:
Box(0, 1, shape=(3, 3))

In [None]:
Box(0, 1, shape=(3, 3)).sample()

In [None]:
# stable baselines doesn't support tuple
# Tuple and Dict act like wrappers to wrape several types of spaces
Tuple((Discrete(3), Box(0, 1, shape=(3, ))))

In [None]:
Tuple((Discrete(3), Box(0, 1, shape=(3, )))).sample()

In [None]:
Dict({'height': Discrete(2), 'speed': Box(0, 100, shape=(1,))})

In [None]:
Dict({'height': Discrete(2), 'speed': Box(0, 100, shape=(1,)), 'color':MultiBinary(4)}).sample()

In [None]:
MultiBinary(4)

In [None]:
MultiBinary(4).sample()

In [None]:
MultiDiscrete([5, 2, 2])

In [None]:
MultiDiscrete([5, 2, 2]).sample()

## 3. Building an Environment
- Build an agent to give us the best shower possible 
- Randomely temperature
- 37 and 39 degree range is ideal
- Train an agent to automatically respond to the changes in temperature and get it within the ideal temperature

In [None]:
class ShowerEnv(Env):
    def __init__(self):
        self.action_space = Discrete(3)  # actions we can take (down, stay, up)
        self.observation_space = Box(low=0, high=100, shape=(1,))  # temperature array
        self.state = 38 + random.randint(-3, 3)  # set start temperature
        self.shower_length = 60  # set shower (aka episode) length

    def step(self, action):
        # apply temp adj, to apply the impact of our action to state
        # apply action:
        # 0 -1 = -1 temperature
        # 1 -1 = 0
        # 2 -1 = 1 temperature
        self.state += action-1

        # decrease shower time by 1 second
        self.shower_length -= 1
        
        # calculate reward
        if self.state >= 37 and self.state <= 39:
            reward = 1
        else:
            reward = -1

        # check whether the shower is done
        if self.shower_length <= 0:
            done = True
        else:
            done = False
        
        # apply temperature noise
        # self.state += random.randint(-1, 1)
        # set placeholder for info
        info = {}

        # return step information: temperature, reward for the particular episode, whether it's done and info
        return self.state, reward, done, info

    def render(self):
        # implement viz
        pass

    def reset(self):
        pass