### Import

In [1]:
import os
import copy
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt

from gymnasium.spaces import Dict, Box, Discrete, MultiDiscrete

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import CheckpointCallback

### Utils

In [18]:
def is_between(A, b):
    return A[0] < b <= A[1]

def calculate_soil_evap_rate(T, R):
    A0 = 1.0
    A1 = 0.1
    return np.float32(A0*(1 + A1*T)*(1-R*0.01))

def calculate_plant_evap_rate(T, R, light_on_off):
    A2 = 1.0
    A3 = 0.1
    A4 = 0.1
    return np.float32(A2*(1 + A3*T)*(1-R*0.01)*(1+A4*light_on_off))

### PlantAirControl Environment

In [28]:
class PlantAirControl(gym.Env):
    # Constants are hard-coded for now but can set up to read from a spreadsheet
    CHAMBER_VOLUME = 1.0
    NON_LIGHT_HEAT = 0.5
    LIGHT_HEAT = 1.0

    FAN_MAX_WATTAGE = 0.5 # kW
    HEAT_MAX_WATTAGE = 1.5 # kW
    COOL_MAX_WATTAGE = 1.5 # kW

    FAN_MAX_AIR_INOUT_RATE = 10.0 
    HEAT_MAX_WATER_TEMP_UP_RATE = 50.0
    COOL_MAX_WATER_TEMP_DOWN_RATE = 50.0

    DESIRED_TEMPS = [30.0, 40.0] # Celcius
    DESIRED_HUMIDS = [60.0, 80.0] 

    def __init__(self, render_mode=None,
                light_heat = LIGHT_HEAT,
                non_light_heat = NON_LIGHT_HEAT,
                chamber_volume = CHAMBER_VOLUME,

                fan_max_wattage = FAN_MAX_WATTAGE,
                heat_max_wattage = HEAT_MAX_WATTAGE,
                cool_max_wattage = COOL_MAX_WATTAGE,

                fan_max_air_inout_rate = FAN_MAX_AIR_INOUT_RATE,
                heat_max_water_temp_up_rate = HEAT_MAX_WATER_TEMP_UP_RATE,
                cool_max_water_temp_down_rate = COOL_MAX_WATER_TEMP_DOWN_RATE,

                desired_temps = DESIRED_TEMPS,
                desired_humids = DESIRED_HUMIDS,
                ):
        # Set up some variables
        self.chamber_volume = np.float32(chamber_volume)
        self.light_heat = np.float32(light_heat)
        self.non_light_heat = np.float32(non_light_heat)

        self.fan_max_wattage = np.float32(fan_max_wattage)
        self.heat_max_wattage = np.float32(heat_max_wattage)
        self.cool_max_wattage = np.float32(cool_max_wattage)

        self.fan_max_air_inout_rate = np.float32(fan_max_air_inout_rate)
        self.heat_max_water_temp_up_rate = np.float32(heat_max_water_temp_up_rate)
        self.cool_max_water_temp_down_rate = np.float32(cool_max_water_temp_down_rate)

        self.dersired_temps = np.float32(np.array(desired_temps))
        self.dersired_humids = np.float32(np.array(desired_humids))

        # Specify the action_space
        self.action_space = MultiDiscrete([101]*3) # e.g., fan capacity, heating component capacity, cooling component capacity
        
        # Speicfy the observation_space
        self.observation_space = Dict({"InTemp/InHumid/OutTemp/OutHumid/Energy": Box(-100, 100, shape=(5,)),
                                       "LightOnOff": Discrete(2),
                                        "Status": Discrete(2),})
        
        # # Logger objects for longer time scale
        # self.inside_temps = []
        # self.inside_humids = []
        # self.outside_temps = []
        # self.outside_humids = []
        # self.fan_controls = []
        # self.heat_controls = []
        # self.cool_controls = []


    def reset(self, seed=None):
        super().reset(seed=seed) # To enable self.np_random seeding
        rng = self.np_random

        # Start state initialisation
        self.light_on_off = rng.integers(2)

        self.inside_temp = rng.uniform(low = 0, high = 70)
        self.inside_humid = rng.uniform(low = 40, high = 90)
        self.outside_temp = rng.uniform(low = - 10, high = 30)
        self.outside_humid = rng.uniform(low = 30, high = 70)

        self.energy = 0
        
        self.plant_OK = 0 # 0 means not in the box yet, 1 means OK
        
        observation = {"InTemp/InHumid/OutTemp/OutHumid/Energy": np.float32(np.array([self.inside_temp, 
                                                                          self.inside_humid, 
                                                                          self.outside_temp,
                                                                          self.outside_humid,
                                                                          self.energy])),
                        "LightOnOff": self.light_on_off,
                        "Status": self.plant_OK}

        info = {}

        return observation, info

    def step(self, action):
        # # Log things for longer time scale
        # self.inside_temps.append(self.inside_temp)
        # self.inside_humids.append(self.inside_humid)
        # self.outside_temps.append(self.outside_temp)
        # self.outside_humids.append(self.outside_humid)
        # self.fan_controls.append(action[0])
        # self.heat_controls.append(action[1])
        # self.cool_controls.append(action[2])

        # Caculate some quantities
        self.plant_evap_rate = calculate_plant_evap_rate(self.inside_temp, self.inside_humid, self.light_on_off)
        self.soil_evap_rate = calculate_soil_evap_rate(self.inside_temp, self.inside_humid)

        # Interprete action into parameters of mathematical model
        self.fan_air_inout_rate = (action[0] * self.fan_max_air_inout_rate)/100
        self.heat_water_temp_up_rate = (action[1] * self.heat_max_water_temp_up_rate)/100
        self.cool_water_temp_down_rate = (action[2] * self.cool_max_water_temp_down_rate)/100

        self.fan_wattage = (action[0] * self.fan_max_wattage)/100
        self.heat_wattage = (action[1] * self.heat_max_wattage)/100
        self.cool_wattage = (action[2] * self.cool_max_wattage)/100

        # Update observations via model for temp, humididty
        self.inside_temp += (self.non_light_heat + self.light_on_off * self.light_heat)/self.chamber_volume \
            + self.fan_air_inout_rate * (self.outside_temp - self.inside_temp) \
            + self.heat_water_temp_up_rate - self.cool_water_temp_down_rate

        self.inside_humid += (self.plant_evap_rate + self.soil_evap_rate)/self.chamber_volume \
            + self.fan_air_inout_rate * (self.outside_humid - self.inside_humid)

        self.energy += self.fan_wattage + self.heat_wattage + self.cool_wattage

        if is_between(self.dersired_temps, self.inside_temp) and is_between(self.dersired_humids, self.inside_humid):
            self.plant_OK = 1

        observation = {"InTemp/InHumid/OutTemp/OutHumid/Energy": np.float32(np.array([self.inside_temp, 
                                                                          self.inside_humid, 
                                                                          self.outside_temp,
                                                                          self.outside_humid,
                                                                          self.energy])),
                        "LightOnOff": self.light_on_off,
                        "Status": self.plant_OK}

        # Reward
        reward = self.plant_OK * 10 - self.energy

        terminated = True

        info = {}
            
        return observation, reward, terminated, False, info

### Testing

In [29]:
env = PlantAirControl()
episodes = 10
for episode in range(1, episodes + 1):
    obsINI, infoINI  = env.reset()
    print(obsINI)
    score = 0
    terminated = False
    truncated = False

    while not terminated or truncated:
        action = env.action_space.sample()
        obs, reward, terminated, truncated, info = env.step(action)
        print(obs)
        print(f"The final reward is {reward}")

: 

In [27]:
len(env.inside_temps)

35

### Training

In [21]:
log_path = os.path.join(os.getcwd(), "Logs")
save_path = os.path.join(os.getcwd(), "Saved Models")

In [22]:
 # Set up the training environment
n_envs = 8
envs = make_vec_env(PlantAirControl, 
                    n_envs=n_envs,
                    seed=42,
                    vec_env_cls=DummyVecEnv)

# Set up the training model
model = PPO("MultiInputPolicy", 
            envs, 
            verbose=0, 
            tensorboard_log= os.path.join(log_path, "Training"), 
            device = "cpu")

# Set up checkpoints for during training
checkpoint_callback = CheckpointCallback(save_freq= 1e4, 
                                            save_path=os.path.join(save_path, 'Checkpoints'),
                                            name_prefix='Checkpoint',
                                            save_replay_buffer=True,
                                            save_vecnormalize=True,
                                            verbose = 0)

# Training
model.learn(1e5, callback = checkpoint_callback, progress_bar = True)

# Save the model
model_final_path = os.path.join(save_path, 'Final') 
model.save(model_final_path)

Output()

ValueError: Expected parameter logits (Tensor of shape (8, 101)) of distribution Categorical(logits: torch.Size([8, 101])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[-4.6178, -4.6196, -4.6099, -4.6129, -4.6168, -4.6227, -4.6149, -4.6177,
         -4.6127, -4.6120, -4.6180, -4.6085, -4.6134, -4.6176, -4.6235, -4.6146,
         -4.6136, -4.6148, -4.6088, -4.6164, -4.6146, -4.6132, -4.6161, -4.6177,
         -4.6148, -4.6137, -4.6198, -4.6140, -4.6135, -4.6162, -4.6129, -4.6157,
         -4.6157, -4.6153, -4.6164, -4.6161, -4.6138, -4.6125, -4.6150, -4.6134,
         -4.6199, -4.6186, -4.6139, -4.6152, -4.6190, -4.6177, -4.6134, -4.6157,
         -4.6174, -4.6135, -4.6194, -4.6124, -4.6180, -4.6112, -4.6012, -4.6180,
         -4.6129, -4.6185, -4.6155, -4.6137, -4.6145, -4.6152, -4.6119, -4.6171,
         -4.6105, -4.6184, -4.6164, -4.6177, -4.6185, -4.6128, -4.6195, -4.6198,
         -4.6133, -4.6139, -4.6115, -4.6128, -4.6177, -4.6138, -4.6114, -4.6144,
         -4.6183, -4.6171, -4.6185, -4.6134, -4.6167, -4.6179, -4.6114, -4.6156,
         -4.6161, -4.6153, -4.6196, -4.6136, -4.6111, -4.6137, -4.6083, -4.6165,
         -4.6167, -4.6162, -4.6117, -4.6165, -4.6111],
        [    nan,     nan,     nan,     nan,     nan,     nan,     nan,     nan,
             nan,     nan,     nan,     nan,     nan,     nan,     nan,     nan,
             nan,     nan,     nan,     nan,     nan,     nan,     nan,     nan,
             nan,     nan,     nan,     nan,     nan,     nan,     nan,     nan,
             nan,     nan,     nan,     nan,     nan,     nan,     nan,     nan,
             nan,     nan,     nan,     nan,     nan,     nan,     nan,     nan,
             nan,     nan,     nan,     nan,     nan,     nan,     nan,     nan,
             nan,     nan,     nan,     nan,     nan,     nan,     nan,     nan,
             nan,     nan,     nan,     nan,     nan,     nan,     nan,     nan,
             nan,     nan,     nan,     nan,     nan,     nan,     nan,     nan,
             nan,     nan,     nan,     nan,     nan,     nan,     nan,     nan,
             nan,     nan,     nan,     nan,     nan,     nan,     nan,     nan,
             nan,     nan,     nan,     nan,     nan],
        [-4.6125, -4.6107, -4.6204, -4.6173, -4.6135, -4.6076, -4.6154, -4.6125,
         -4.6175, -4.6183, -4.6123, -4.6218, -4.6168, -4.6127, -4.6067, -4.6157,
         -4.6167, -4.6155, -4.6214, -4.6138, -4.6156, -4.6171, -4.6142, -4.6125,
         -4.6154, -4.6166, -4.6105, -4.6163, -4.6168, -4.6140, -4.6173, -4.6146,
         -4.6146, -4.6149, -4.6138, -4.6141, -4.6164, -4.6178, -4.6153, -4.6168,
         -4.6103, -4.6116, -4.6164, -4.6150, -4.6112, -4.6126, -4.6169, -4.6146,
         -4.6128, -4.6167, -4.6109, -4.6178, -4.6123, -4.6190, -4.6291, -4.6123,
         -4.6173, -4.6118, -4.6148, -4.6166, -4.6158, -4.6150, -4.6183, -4.6132,
         -4.6197, -4.6119, -4.6138, -4.6126, -4.6118, -4.6175, -4.6108, -4.6104,
         -4.6170, -4.6164, -4.6188, -4.6174, -4.6126, -4.6165, -4.6189, -4.6159,
         -4.6119, -4.6131, -4.6118, -4.6168, -4.6135, -4.6123, -4.6188, -4.6147,
         -4.6142, -4.6150, -4.6106, -4.6167, -4.6191, -4.6166, -4.6220, -4.6137,
         -4.6136, -4.6140, -4.6186, -4.6138, -4.6192],
        [-4.6125, -4.6107, -4.6204, -4.6173, -4.6135, -4.6076, -4.6154, -4.6125,
         -4.6175, -4.6183, -4.6123, -4.6218, -4.6168, -4.6127, -4.6067, -4.6157,
         -4.6167, -4.6155, -4.6214, -4.6138, -4.6156, -4.6171, -4.6142, -4.6125,
         -4.6154, -4.6166, -4.6105, -4.6163, -4.6168, -4.6140, -4.6173, -4.6146,
         -4.6146, -4.6149, -4.6138, -4.6141, -4.6164, -4.6178, -4.6153, -4.6168,
         -4.6103, -4.6116, -4.6164, -4.6150, -4.6112, -4.6126, -4.6169, -4.6146,
         -4.6128, -4.6167, -4.6109, -4.6178, -4.6123, -4.6190, -4.6291, -4.6123,
         -4.6173, -4.6118, -4.6148, -4.6166, -4.6158, -4.6150, -4.6183, -4.6132,
         -4.6197, -4.6119, -4.6138, -4.6126, -4.6118, -4.6175, -4.6108, -4.6104,
         -4.6170, -4.6164, -4.6188, -4.6174, -4.6126, -4.6165, -4.6189, -4.6159,
         -4.6119, -4.6131, -4.6118, -4.6168, -4.6135, -4.6123, -4.6188, -4.6147,
         -4.6142, -4.6150, -4.6106, -4.6167, -4.6191, -4.6166, -4.6220, -4.6137,
         -4.6136, -4.6140, -4.6186, -4.6138, -4.6192],
        [-4.6144, -4.6095, -4.6201, -4.6170, -4.6143, -4.6079, -4.6155, -4.6138,
         -4.6158, -4.6171, -4.6134, -4.6224, -4.6162, -4.6127, -4.6095, -4.6168,
         -4.6186, -4.6156, -4.6202, -4.6142, -4.6167, -4.6137, -4.6151, -4.6116,
         -4.6150, -4.6145, -4.6094, -4.6161, -4.6166, -4.6120, -4.6178, -4.6162,
         -4.6133, -4.6145, -4.6139, -4.6139, -4.6163, -4.6188, -4.6132, -4.6169,
         -4.6088, -4.6111, -4.6175, -4.6150, -4.6103, -4.6137, -4.6174, -4.6143,
         -4.6127, -4.6165, -4.6099, -4.6192, -4.6119, -4.6189, -4.6277, -4.6110,
         -4.6158, -4.6104, -4.6153, -4.6154, -4.6172, -4.6156, -4.6180, -4.6142,
         -4.6181, -4.6096, -4.6142, -4.6114, -4.6115, -4.6182, -4.6118, -4.6114,
         -4.6166, -4.6145, -4.6199, -4.6166, -4.6126, -4.6156, -4.6179, -4.6166,
         -4.6129, -4.6131, -4.6131, -4.6183, -4.6131, -4.6116, -4.6197, -4.6156,
         -4.6156, -4.6158, -4.6096, -4.6168, -4.6205, -4.6162, -4.6235, -4.6142,
         -4.6137, -4.6162, -4.6193, -4.6131, -4.6214],
        [-4.6125, -4.6107, -4.6204, -4.6173, -4.6135, -4.6076, -4.6154, -4.6125,
         -4.6175, -4.6183, -4.6123, -4.6218, -4.6168, -4.6127, -4.6067, -4.6157,
         -4.6167, -4.6155, -4.6214, -4.6138, -4.6156, -4.6171, -4.6142, -4.6125,
         -4.6154, -4.6166, -4.6105, -4.6163, -4.6168, -4.6140, -4.6173, -4.6146,
         -4.6146, -4.6149, -4.6138, -4.6141, -4.6164, -4.6178, -4.6153, -4.6168,
         -4.6103, -4.6116, -4.6164, -4.6150, -4.6112, -4.6126, -4.6169, -4.6146,
         -4.6128, -4.6167, -4.6109, -4.6178, -4.6123, -4.6190, -4.6291, -4.6123,
         -4.6173, -4.6118, -4.6148, -4.6166, -4.6158, -4.6150, -4.6183, -4.6132,
         -4.6197, -4.6119, -4.6138, -4.6126, -4.6118, -4.6175, -4.6108, -4.6104,
         -4.6170, -4.6164, -4.6188, -4.6174, -4.6126, -4.6165, -4.6189, -4.6159,
         -4.6119, -4.6131, -4.6118, -4.6168, -4.6135, -4.6123, -4.6188, -4.6147,
         -4.6142, -4.6150, -4.6106, -4.6167, -4.6191, -4.6166, -4.6220, -4.6137,
         -4.6136, -4.6140, -4.6186, -4.6138, -4.6192],
        [-4.6178, -4.6196, -4.6099, -4.6129, -4.6168, -4.6227, -4.6149, -4.6177,
         -4.6127, -4.6120, -4.6180, -4.6085, -4.6134, -4.6176, -4.6235, -4.6146,
         -4.6136, -4.6148, -4.6088, -4.6164, -4.6146, -4.6132, -4.6161, -4.6177,
         -4.6148, -4.6137, -4.6198, -4.6140, -4.6135, -4.6162, -4.6129, -4.6157,
         -4.6157, -4.6153, -4.6164, -4.6161, -4.6138, -4.6125, -4.6150, -4.6134,
         -4.6199, -4.6186, -4.6139, -4.6152, -4.6190, -4.6177, -4.6134, -4.6157,
         -4.6174, -4.6135, -4.6194, -4.6124, -4.6180, -4.6112, -4.6012, -4.6180,
         -4.6129, -4.6185, -4.6155, -4.6137, -4.6145, -4.6152, -4.6119, -4.6171,
         -4.6105, -4.6184, -4.6164, -4.6177, -4.6185, -4.6128, -4.6195, -4.6198,
         -4.6133, -4.6139, -4.6115, -4.6128, -4.6177, -4.6138, -4.6114, -4.6144,
         -4.6183, -4.6171, -4.6185, -4.6134, -4.6167, -4.6179, -4.6114, -4.6156,
         -4.6161, -4.6153, -4.6196, -4.6136, -4.6111, -4.6137, -4.6083, -4.6165,
         -4.6167, -4.6162, -4.6117, -4.6165, -4.6111],
        [-4.6125, -4.6107, -4.6204, -4.6173, -4.6135, -4.6076, -4.6154, -4.6125,
         -4.6175, -4.6183, -4.6123, -4.6218, -4.6168, -4.6127, -4.6067, -4.6157,
         -4.6167, -4.6155, -4.6214, -4.6138, -4.6156, -4.6171, -4.6142, -4.6125,
         -4.6154, -4.6166, -4.6105, -4.6163, -4.6168, -4.6140, -4.6173, -4.6146,
         -4.6146, -4.6149, -4.6138, -4.6141, -4.6164, -4.6178, -4.6153, -4.6168,
         -4.6103, -4.6116, -4.6164, -4.6150, -4.6112, -4.6126, -4.6169, -4.6146,
         -4.6128, -4.6167, -4.6109, -4.6178, -4.6123, -4.6190, -4.6291, -4.6123,
         -4.6173, -4.6118, -4.6148, -4.6166, -4.6158, -4.6150, -4.6183, -4.6132,
         -4.6197, -4.6119, -4.6138, -4.6126, -4.6118, -4.6175, -4.6108, -4.6104,
         -4.6170, -4.6164, -4.6188, -4.6174, -4.6126, -4.6165, -4.6189, -4.6159,
         -4.6119, -4.6131, -4.6118, -4.6168, -4.6135, -4.6123, -4.6188, -4.6147,
         -4.6142, -4.6150, -4.6106, -4.6167, -4.6191, -4.6166, -4.6220, -4.6137,
         -4.6136, -4.6140, -4.6186, -4.6138, -4.6192]])

### Deployment

In [None]:
# Model path
model_final_path = os.path.join(os.getcwd(), "Saved Models", 'Final')

In [None]:
# Load env and model
env = PlantAirControl()
model = PPO.load(model_final_path, env, tensorboard_log=log_path)

In [None]:
episodes = 5
for episode in range(1, episodes + 1):
    obsINI, infoINI  = env.reset()
    score = 0
    terminated = False
    truncated = False

    while not terminated or truncated:
        action, _  = model.predict(obsINI, deterministic = False)
        obs, reward, terminated, truncated, info = env.step(action)
        score += reward
        print(f"{score}")

-0.925
8.965
-0.495
-1.0250000000000001
9.6
