### Import

In [10]:
import os
import copy
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cma

from gymnasium.spaces import Dict, Box, Discrete, MultiDiscrete
from torch import nn as nn

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

### Utils

In [11]:
def is_between(A, b):
    return A[0] < b <= A[1]

### PlantAirControl Environment

In [14]:
class PlantAirControl(gym.Env):
    # Constants are hard-coded for now but can set up to read from a spreadsheet
    CHAMBER_VOLUME = 1.0
    NON_LIGHT_HEAT = 0.5
    LIGHT_HEAT = 1.0

    FAN_MAX_WATTAGE = 0.5 # kW
    HEAT_MAX_WATTAGE = 1.5 # kW
    COOL_MAX_WATTAGE = 1.5 # kW

    FAN_MAX_AIR_INOUT_RATE = 1.0 
    HEAT_MAX_WATER_TEMP_UP_RATE = 1.0
    COOL_MAX_WATER_TEMP_DOWN_RATE = 1.0

    DESIRED_TEMPS = [30.0, 40.0] # Celcius
    DESIRED_HUMIDS = [50.0, 70.0] # Percentage (relavtive humidity)
    DESIRED_VPDS = [10.0, 20.0]

    def __init__(self, render_mode=None,
                light_heat = LIGHT_HEAT,
                fan_max_wattage = FAN_MAX_WATTAGE,
                heat_max_wattage = HEAT_MAX_WATTAGE,
                cool_max_wattage = COOL_MAX_WATTAGE,

                fan_max_air_inout_rate = FAN_MAX_AIR_INOUT_RATE,
                heat_max_water_temp_up_rate = HEAT_MAX_WATER_TEMP_UP_RATE,
                cool_max_water_temp_down_rate = COOL_MAX_WATER_TEMP_DOWN_RATE,

                desired_temps = DESIRED_TEMPS,
                desired_humids = DESIRED_HUMIDS,
                desired_VPDs = DESIRED_VPDS
                ):
        
        # Maths constants


        # Set up some variables
        self.light_heat = np.float32(light_heat)

        self.fan_max_wattage = np.float32(fan_max_wattage)
        self.heat_max_wattage = np.float32(heat_max_wattage)
        self.cool_max_wattage = np.float32(cool_max_wattage)

        self.fan_max_air_inout_rate = np.float32(fan_max_air_inout_rate)
        self.heat_max_water_temp_up_rate = np.float32(heat_max_water_temp_up_rate)
        self.cool_max_water_temp_down_rate = np.float32(cool_max_water_temp_down_rate)

        self.dersired_temps = np.float32(np.array(desired_temps))
        self.dersired_humids = np.float32(np.array(desired_humids))
        self.desired_VPDs = np.float32(np.array(desired_VPDs))

        # Specify the action_space
        self.action_space = MultiDiscrete([101]*3) # e.g., fan capacity, heating component capacity, cooling component capacity
        
        # Speicfy the observation_space
        self.observation_space = Dict({"InTemp/InHumid/OutTemp/OutHumid/Energy": Box(-100, 100, shape=(5,)),
                                       "LightOnOff": Discrete(2),
                                        "Status": Discrete(2),})

    def reset(self, seed=None):
        super().reset(seed=seed) # To enable self.np_random seeding
        rng = self.np_random

        # Start state initialisation
        self.light_on_off = rng.integers(2)

        self.inside_temp = rng.uniform(low = 0, high = 70)
        self.inside_humid = rng.uniform(low = 30, high = 90)
        self.outside_temp = rng.uniform(low = - 10, high = 30)
        self.outside_humid = rng.uniform(low = 30, high = 90)

        self.energy = 0
        
        self.plant_OK = 0 # 0 means not in the box yet, 1 means OK
        
        observation = {"InTemp, InHumid, OutTemp, OutHumid, Energy": np.float32(np.array([self.inside_temp, 
                                                                          self.inside_humid, 
                                                                          self.outside_temp,
                                                                          self.outside_humid,
                                                                          self.energy])),
                        "LightOnOff": self.light_on_off,
                        "Status": self.plant_OK}

        info = {}

        return observation, info

    def step(self, action):
        # Interprete action into parameters of mathematical model
        self.fan_air_inout_rate = (action[0] * self.fan_max_air_inout_rate)/100
        self.heat_water_temp_up_rate = (action[1] * self.heat_max_water_temp_up_rate)/100
        self.cool_water_temp_down_rate = (action[2] * self.cool_max_water_temp_down_rate)/100

        self.fan_wattage = (action[0] * self.fan_max_wattage)/100
        self.heat_wattage = (action[1] * self.heat_max_wattage)/100
        self.cool_wattage = (action[2] * self.cool_max_wattage)/100

        # Update observations via model for temp, humididty, VPD
        self.inside_temp += self.fan_air_inout_rate * self.outside_temp \
            + self.heat_water_temp_up_rate * 10 \
            + self.cool_water_temp_down_rate * 10 \
            + self.light_on_off * self.light_heat
        
        self.inside_humid += self.fan_air_inout_rate * self.outside_humid + 10

        self.energy += self.fan_wattage + self.heat_wattage + self.cool_wattage

        if is_between(self.dersired_temps, self.inside_temp) and is_between(self.dersired_humids, self.inside_humid) and is_between(self.desired_VPDs, self.VPD):
            self.plant_OK = 1

        observation = {"InTemp, InHumid, OutTemp, OutHumid, Energy": np.float32(np.array([self.inside_temp, 
                                                                          self.inside_humid, 
                                                                          self.outside_temp,
                                                                          self.outside_humid,
                                                                          self.energy])),
                        "LightOnOff": self.light_on_off,
                        "Status": self.plant_OK}

        # Reward
        reward = self.plant_OK * 10 - self.energy

        terminated = True # episode completes after 1 step

        info = {}
            
        return observation, reward, terminated, False, info

### Testing

In [18]:
env = PlantAirControl()
episodes = 10
for episode in range(1, episodes + 1):
    obsINI, infoINI  = env.reset()
    print(obsINI)
    score = 0
    terminated = False
    truncated = False

    while not terminated or truncated:
        action = env.action_space.sample()
        obs, reward, terminated, truncated, info = env.step(action)
        print(obs)
        print(f"The final reward is {reward}")

{'InTemp, InHumid, OutTemp, OutHumid, Energy': array([ 3.1792803, 49.924603 , 27.388287 , 53.39958  ,  0.       ],
      dtype=float32), 'LightOnOff': 0, 'Status': 0}
{'InTemp, InHumid, OutTemp, OutHumid, Energy': array([24.809172, 69.53653 , 27.388287, 53.39958 ,  2.595   ],
      dtype=float32), 'LightOnOff': 0, 'Status': 0}
The final reward is -2.5949999999999998
{'InTemp, InHumid, OutTemp, OutHumid, Energy': array([ 5.2240863, 35.72998  , 17.847113 , 64.943405 ,  0.       ],
      dtype=float32), 'LightOnOff': 1, 'Status': 0}
{'InTemp, InHumid, OutTemp, OutHumid, Energy': array([16.387268, 52.873756, 17.847113, 64.943405,  1.285   ],
      dtype=float32), 'LightOnOff': 1, 'Status': 0}
The final reward is -1.285
{'InTemp, InHumid, OutTemp, OutHumid, Energy': array([47.065044 , 32.89876  , -4.1990595, 54.332306 ,  0.       ],
      dtype=float32), 'LightOnOff': 1, 'Status': 0}
{'InTemp, InHumid, OutTemp, OutHumid, Energy': array([50.467224 , 53.2219   , -4.1990595, 54.332306 ,  0.575