In [1]:
import numpy as np
import pandas as pd
import gymnasium as gym
import random
import tensorflow as tf
from tensorflow.keras.models import load_model

csv_file = '../resources/esb1_preprocessed.csv'
model_dir = '../resources/mlp'

2024-03-27 09:25:42.809937: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
columns = ['ESB_Tower_1 leavingWaterTemp', 'Cell1_Fan vfdPercent', 
       'ESB_Tower_1 enteringWaterTemp', 'ESB_Tower_1 bypassValveOpenClose',
       'ESB_Tower_1 outdoorAirDryBulb', 'ESB_Tower_1 outdoorAirHumidity',
       'ESB_Tower_1 outdoorAirWetBulb', 'DayOfWeek', 'HourOfDay',
       'Setpoint_Python', ]
df = pd.read_csv(csv_file, index_col='time')[columns]
df.columns

Index(['ESB_Tower_1 leavingWaterTemp', 'Cell1_Fan vfdPercent',
       'ESB_Tower_1 enteringWaterTemp', 'ESB_Tower_1 bypassValveOpenClose',
       'ESB_Tower_1 outdoorAirDryBulb', 'ESB_Tower_1 outdoorAirHumidity',
       'ESB_Tower_1 outdoorAirWetBulb', 'DayOfWeek', 'HourOfDay',
       'Setpoint_Python'],
      dtype='object')

In [3]:
class HVACEnvironment(gym.Env):
    def __init__(self, csv_file, model_file):
        super(HVACEnvironment, self).__init__()

        columns = ['ESB_Tower_1 leavingWaterTemp', 'Cell1_Fan vfdPercent', 
            'ESB_Tower_1 enteringWaterTemp', 'ESB_Tower_1 bypassValveOpenClose',
            'ESB_Tower_1 outdoorAirDryBulb', 'ESB_Tower_1 outdoorAirHumidity',
            'ESB_Tower_1 outdoorAirWetBulb', 'DayOfWeek', 'HourOfDay',
            'Setpoint_Python', ]
        
        # Load data from CSV
        self.data = pd.read_csv(csv_file, index_col='time')[columns]
        
        # Define observation space (features)
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(9,), dtype=np.float32)
        
        # Define action space (python_setpoint)
        # actions we can take: decrease, maintain, increase by 1
        self.action_space = gym.spaces.Discrete(3)
        
        # Load model
        self.model = load_model(model_file)
        
        # Initial state
        self.state = self.data.iloc[0].values
    
    def step(self, action):
        """
        action 0: self.state += (0-1) --> -1
        action 1: self.state += (1-1) --> +0
        action 2: self.state += (2-1) --> +1
        """
        # Apply action (update python_setpoint)
        python_setpoint = action

        fanVFD = self.state[1]
        
        # Adjust fanVFD proportionally
        # Assuming fanVFD is inversely proportional to python_setpoint
        fanVFD = fanVFD - action / 100  # Assuming 100 is the maximum value of python_setpoint
        
        # Update state
        self.state[2] = fanVFD
        self.state[-1] = python_setpoint
        
        # Predict LeavingWaterTemp using the model
        inp = np.asarray(self.state[1:]).reshape(1, 9).astype('float32')
        print(inp)
        leaving_water_temp = self.model.predict(inp)
        
        # Compute reward (you need to define your own reward function)
        reward = self._calculate_reward(leaving_water_temp, fanVFD)
        
        # Check if termination condition is met (optional)
        done = False  # Define your termination condition
        
        return self.state, reward, done, {}
    
    def reset(self):
        # Reset environment to initial state
        self.state = self.data.iloc[0].values
        return self.state
    
    def render(self, mode='human'):
        # Optionally, add rendering functionality
        pass
    
    def close(self):
        # Clean up resources
        pass
    
    def _calculate_reward(self, leaving_water_temp, fanVFD):
        # Define your reward function

        # Define target values
        target_leaving_water_temp = 0  # Adjust as needed
        target_fanVFD = 0  # Adjust as needed
    
        # Define constants
        leaving_temp_weight = 0.7  # Weight for leaving water temperature in the reward calculation
        perfreq_weight = 0.3  # Weight for fanVFD in the reward calculation
        
        # Compute reward based on leaving water temperature
        leaving_temp_penalty = abs(leaving_water_temp - target_leaving_water_temp)
        leaving_temp_reward = max(0, 1 - leaving_temp_penalty)  # Linearly decreasing reward with increasing deviation
        
        # Compute reward based on fanVFD
        perfreq_penalty = abs(fanVFD - target_fanVFD)
        perfreq_reward = max(0, 1 - perfreq_penalty)  # Linearly decreasing reward with increasing deviation
        
        # Combine the rewards with weights
        total_reward = leaving_temp_weight * leaving_temp_reward + perfreq_weight * perfreq_reward

        print(total_reward)
        
        return total_reward

In [4]:
# Example usage with random actions
env = HVACEnvironment(csv_file=csv_file, model_file=model_dir)
observation = env.reset()
total_reward = 0

for step in range(100):  # Set a maximum number of steps
    action = env.action_space.sample()  # Take a random action
    observation, reward, done, info = env.step(action)
    total_reward += reward
    
    print(f"Step {step}: Action taken: {action}, Reward: {reward}, Done: {done}")
    
    if done:
        print("Environment reached termination condition.")
        break

print(f"Total reward accumulated: {total_reward}")
env.close()

2024-03-27 09:26:01.569715: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


[[ 0.        0.        0.       84.522064 51.04264  70.414894  2.
   2.        0.      ]]
0.3
Step 0: Action taken: 0, Reward: 0.3, Done: False
[[ 0.0000000e+00 -2.0000000e-02  0.0000000e+00  8.4522064e+01
   5.1042641e+01  7.0414894e+01  2.0000000e+00  2.0000000e+00
   2.0000000e+00]]
0.294
Step 1: Action taken: 2, Reward: 0.294, Done: False
[[ 0.        0.        0.       84.522064 51.04264  70.414894  2.
   2.        0.      ]]
0.3
Step 2: Action taken: 0, Reward: 0.3, Done: False
[[ 0.0000000e+00 -2.0000000e-02  0.0000000e+00  8.4522064e+01
   5.1042641e+01  7.0414894e+01  2.0000000e+00  2.0000000e+00
   2.0000000e+00]]
0.294
Step 3: Action taken: 2, Reward: 0.294, Done: False
[[ 0.        0.        0.       84.522064 51.04264  70.414894  2.
   2.        0.      ]]
0.3
Step 4: Action taken: 0, Reward: 0.3, Done: False
[[ 0.        0.        0.       84.522064 51.04264  70.414894  2.
   2.        0.      ]]
0.3
Step 5: Action taken: 0, Reward: 0.3, Done: False
[[ 0.0000000e+00 -2.00