# Prepare Data

In [None]:
# Import necessary libraries
import pandas as pd
import json
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Load columns_info.json
with open('../data/columns_info.json') as f:
    columns_info = json.load(f)

# Load chiller_plant_dataset.parquet
chiller_data = pd.read_parquet('../data/chiller_plant_dataset.parquet')

In [None]:
# Drop columns
import ast

column_list = chiller_data.columns.to_list()
columns_info_list = []
for key in columns_info:
    tup = ast.literal_eval(key)
    columns_info_list.append(tup)
columns_to_drop = [col for col in column_list if col not in columns_info_list]
columns_to_drop.append(('chiller_6', 'power'))
chiller_data = chiller_data.drop(columns=columns_to_drop)
chiller_data.shape

In [None]:
# Clean missing value
chiller_data.ffill(inplace=True)
chiller_data.bfill(inplace=True)
chiller_data.isnull().sum().sum()

In [None]:
# Remove negative value
# Check which columns have negative values
negative_columns = chiller_data.columns[chiller_data.lt(0).any()]

# Count negative values in each column
negative_counts = chiller_data[negative_columns].lt(0).sum().sum()

print("negative values:")
print(negative_counts)
# Replace negative values with NaN
chiller_data[negative_columns] = chiller_data[negative_columns].mask(chiller_data[negative_columns] < 0)
# Forward fill NaN values
chiller_data.ffill(inplace=True)
chiller_data.bfill(inplace=True)

# Verify if negative values are replaced
negative_counts_after_fill = chiller_data[negative_columns].lt(0).sum().sum()

print("negative values after forward filling:")
print(negative_counts_after_fill)

In [None]:
# Change Multidexing to normal columns name
chiller_data.columns = ['_'.join(col) if isinstance(col, tuple) else col for col in chiller_data.columns]

In [None]:
chiller_data['plant_target_chw_setpoint'].describe()

In [None]:
chiller_data.to_parquet('chiller_data_pre.parquet')

In [None]:
chiller_data = pd.read_parquet('chiller_data_pre.parquet')


In [None]:
chiller_data['chiller_1_evap_water_flow_rate'].describe()

In [None]:
return_temp_cal = np.where(
    chiller_data['chiller_1_evap_water_flow_rate'] == 0,
    chiller_data['chiller_1_evap_leaving_water_temperature'],
    24 * chiller_data['chiller_1_cooling_rate'] / chiller_data['chiller_1_evap_water_flow_rate'] + \
    chiller_data['chiller_1_evap_leaving_water_temperature']
)

return_temp_actual = chiller_data['chiller_1_evap_entering_water_temperature']

# Add these columns to the DataFrame for plotting
chiller_data['return_temp_cal'] = return_temp_cal
chiller_data['return_temp_actual'] = return_temp_actual

# Create a Plotly figure
fig = px.line(chiller_data, x=chiller_data.index, y=['return_temp_cal', 'return_temp_actual'], 
              labels={'value': 'Temperature (°C)', 'variable': 'Temperature Type'},
              title='Comparison of Calculated and Actual Return Temperatures')

# Show the plot
fig.show()

In [None]:
chiller_data.index

In [None]:
# Manually extract the day from the datetime string
def extract_day(date_str):
    return date_str.split()[0]

# Create a new column with the extracted day
chiller_data['day'] = chiller_data.index.map(extract_day)

# Create the 'new_day' feature
chiller_data['new_day'] = (chiller_data['day'] != chiller_data['day'].shift(1)).astype(int)

# Drop the temporary 'day' column
chiller_data.drop(columns=['day'], inplace=True)

# Display the first few rows to check the new feature
print(chiller_data[['new_day']].head(10))

In [None]:
chiller_data.shape

In [None]:
chiller_data['plant_power_all_pumps'] = chiller_data['plant_power_all_chps'] + chiller_data['plant_power_all_cdps']

# Load plant power prediction model

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

features = ['plant_cooling_rate',
            'outdoor_weather_station_wetbulb_temperature',
            'outdoor_weather_station_relative_humidity',
            'chilled_water_loop_return_water_temperature',
            'chilled_water_loop_supply_water_temperature'] + \
            [f'chiller_{i}_status_read' for i in range(1,6)] + \
            [f'chiller_{i}_setpoint_read' for i in range(1,6)] + \
            ['chilled_water_loop_flow_rate',
             'condenser_water_loop_flow_rate']

target = ['plant_power'] 

X = chiller_data[features]
y = chiller_data[target]

X_train, y_train = X[:int(0.6*len(X))], y[:int(0.6*len(y))]
X_test, y_test = X[int(0.6*len(X)):], y[int(0.6*len(y)):]
# Random Forest Model
rf_model = RandomForestRegressor(n_estimators=400,
                           max_depth=30,
                           min_samples_split=10,
                           min_samples_leaf=6, 
                           random_state=42, 
                           bootstrap=True)
rf_model.fit(X_train, y_train.values.ravel())

y_pred_rf = rf_model.predict(X_test)

mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f"Random Forest MSE: {mse_rf}")

y_pred_rf_df = pd.DataFrame(y_pred_rf, index=y_test.index, columns=y_test.columns)
fig = px.line(x=X_test.index, y=y_pred_rf_df[target[0]], title=f'{target[0]}')
fig.update_traces(line_color='red', name=f'{target[0]}')
fig.add_trace(px.line(x=X_test.index, y=y_test[target[0]]).data[0])
fig.show()

In [None]:
import joblib

joblib.dump(rf_model, 'random_forest_model.pkl')

# DQN

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
import joblib
from sklearn.ensemble import RandomForestRegressor


In [None]:
class Chiller:
    def __init__(self, chiller_id):
        self.chiller_id = chiller_id
        self.power_model = joblib.load(f'chiller_{chiller_id}_power_lr_model.pkl')
        self.chwrt_model = joblib.load(f'chiller_{chiller_id}_chwrt_lr_model.pkl')
        self.count_on = 0
        self.count_off = 0
        self.mode = 0

        self.feature_columns_power = [
            f'chiller_{chiller_id}_cooling_rate',
            f'chiller_{chiller_id}_evap_leaving_water_temperature',
            f'chiller_{chiller_id}_evap_entering_water_temperature',
            f'chiller_{chiller_id}_evap_water_flow_rate',
            f'chiller_{chiller_id}_cond_water_flow_rate',
            f'chiller_{chiller_id}_status_read'
        ]
        self.feature_columns_chwrt = [
            f'chiller_{chiller_id}_cooling_rate',
            f'chiller_{chiller_id}_evap_leaving_water_temperature',
            f'chiller_{chiller_id}_evap_water_flow_rate',
            f'chiller_{chiller_id}_cond_water_flow_rate',
            f'chiller_{chiller_id}_status_read'
        ]
    
    def get_power_consumption(self, state, action):
        chiller_mode, chiller_setpoint, chwr_temp, chw_flow, cdw_flow = action
        Crc = state
        input_data = np.concatenate([[Crc], [chiller_setpoint, chwr_temp, chw_flow, cdw_flow, chiller_mode]]).reshape(1, -1)
        input_data_df = pd.DataFrame(input_data, columns=self.feature_columns_power)
        power = self.power_model.predict(input_data_df)[0]
        
        return power.clip(min=0)
    
    def get_chwr_temp(self, state, action):
        chiller_mode, chiller_setpoint, chw_flow, cdw_flow = action
        
        input_data = np.concatenate([[state[0]], [chiller_setpoint], [chw_flow, cdw_flow, chiller_mode]]).reshape(1, -1)
        input_data_df = pd.DataFrame(input_data, columns=self.feature_columns_chwrt)
        chwr_temp = self.chwrt_model.predict(input_data_df)[0]
        
        return chwr_temp
    
    def reset_status(self):
        self.count_on = 0
        self.count_off = 0
    
class Pump:
    def __init__(self):
        self.power_model = joblib.load('pumps_power_model.pkl')
        
        self.feature_columns = ['plant_cooling_rate',
                                'chilled_water_loop_flow_rate',
                                'condenser_water_loop_flow_rate']
    
    def get_power_consumption(self, state, action):
        chw_flows, condw_flows = action
        chw_loop_flow = sum(chw_flows)
        cdw_loop_flow = sum(condw_flows)
        input_data = np.concatenate([[state[0]], [chw_loop_flow, cdw_loop_flow]]).reshape(1, -1)
        input_data_df = pd.DataFrame(input_data, columns=self.feature_columns)
        power = self.power_model.predict(input_data_df)[0]
        
        return power.clip(min=0)

class CoolingTower:
    def __init__(self):
        # Example of cooling tower power model loading
        self.power_model = joblib.load('cts_power_model.pkl')
        
        self.feature_columns = [
            'plant_cooling_rate',
            'outdoor_weather_station_wetbulb_temperature',
            'outdoor_weather_station_relative_humidity',
            # 'chilled_water_loop_return_water_temperature',
            # 'chilled_water_loop_supply_water_temperature',
            'chilled_water_loop_flow_rate',
            'condenser_water_loop_flow_rate'
        ]
    
    def get_power_consumption(self, state, action):
        chw_flows, condw_flows = action
        chw_loop_flow = sum(chw_flows)
        cdw_loop_flow = sum(condw_flows)
        input_data = np.concatenate([state[0:3], [chw_loop_flow, cdw_loop_flow]]).reshape(1, -1)
        input_data_df = pd.DataFrame(input_data, columns=self.feature_columns)
        power = self.power_model.predict(input_data_df)[0]
        
        return power.clip(min=0)

In [None]:
class ChillerPlantEnvironment:
    def __init__(self, data):
        self.data = data
        self.current_step = 0
        self.max_steps = len(data)
        
        # Define action and state spaces
        self.n_chillers = 5  # Excluding standby chiller
        self.action_space = 20  # 5 for chiller on/off, 5 for setpoints, 5 for chw_flow, 5 for condw_flow
        self.state_space = 5
        
        self.chillers = [Chiller(i) for i in range(1, self.n_chillers + 1)]
        self.pumps = Pump()
        self.cooling_towers = CoolingTower()
        self.consecutive_violations = 0
    
    def reset(self):
        self.current_step = 0
        self.consecutive_violations = 0
        return self._get_state()
    
    def step(self, action):
        # Decode action
        chiller_modes, chiller_setpoints, chw_flows, condw_flows = self._decode_action(action)
        
        # Get current state
        state = self._get_state()

        # Update chillers status
        operation_penalty = self._update_chillers_status(chiller_modes)

        # Ccooling rate per chiller: balance cooling rate distribution
        Crc = state[0] / sum(chiller_modes) if sum(chiller_modes) != 0 else 0

        # Get chilled water return temperature for each chiller
        chwr_temps = [chiller.get_chwr_temp(state, (mode, setpoint, chw_flow, cdw_flow)) \
                      for chiller, mode, setpoint, chw_flow, cdw_flow \
                        in zip(self.chillers, chiller_modes, chiller_setpoints, chw_flows, condw_flows)]
        chwr_temps = [temp[0] for temp in chwr_temps]

        # Calculate power consumption for each chiller, pump, and cooling tower
        chiller_powers = [chiller.get_power_consumption(Crc, (mode, setpoint, chwr_temp, chw_flow, cdw_flow)) \
                          for chiller, mode, setpoint, chwr_temp, chw_flow, cdw_flow \
                            in zip(self.chillers, chiller_modes, chiller_setpoints, chwr_temps, chw_flows, condw_flows)]
        pump_power = self.pumps.get_power_consumption(state, (chw_flows, condw_flows))
        ct_power = self.cooling_towers.get_power_consumption(state, (chw_flows, condw_flows))  # No action needed for cooling tower
        
        total_power_consumption = sum(chiller_powers) + pump_power + ct_power

        # Check constraints
        constraints_violated, penalty = self._check_constraints(state, chiller_modes, 
                                                chiller_setpoints, operation_penalty, Crc, chwr_temps)
        # Update consecutive violations counter
        if constraints_violated:
            self.consecutive_violations += 1
        else:
            if self.consecutive_violations > 0:
                self.consecutive_violations -= 10
            else:
                self.consecutive_violations > 0

        # Calculate reward
        scaled_penalty = penalty * self.consecutive_violations
        reward = -total_power_consumption[0] - scaled_penalty  # Large penalty for violating constraints
        
        # Reset On/Off count on new day
        if state[4]:
            for chiller in self.chillers:
                chiller.reset_status()

        # Move to next step
        self.current_step += 1
        done = self.current_step >= self.max_steps
        
        return self._get_state(), reward, done
    
    def _get_state(self):
        row = self.data.iloc[self.current_step]
        return np.array([
            row['plant_cooling_rate'],
            row['outdoor_weather_station_wetbulb_temperature'],
            row['outdoor_weather_station_relative_humidity'],
            row['plant_target_chw_setpoint'],
            row['new_day']
        ])
    
    def _decode_action(self, action):
        # Decode chiller on/off states (first 5 elements of action)
        chiller_modes = [int(action[i] > 0) for i in range(5)]
        
        # Decode setpoint temperatures for each chiller
        chiller_setpoints = [43.5 + 11.5 * action[i+5] for i in range(5)]  # 32-55°F
        
        # Decode chilled/condenser water flow rate for each chiller 
        chw_flows = [1450 + 1050 * action[i+10] for i in range(5)]  # 400-2500 GPM
        condw_flows = [1750 + 1250 * action[i+15] for i in range(5)]  # 500-3000 GPM
        
        return chiller_modes, chiller_setpoints, chw_flows, condw_flows
    
    def _check_constraints(self, state, chiller_modes, chiller_setpoints, operation_penalty, Crc, chwr_temps):
        target_setpoint = state[3]
        plant_cooling_rate = state[0]
        chwrt_penalty = 0
        for chwr_temp in chwr_temps:
            if chwr_temp > 59:
                chwrt_penalty += 1
        constraints = [
            plant_cooling_rate > sum(chiller_modes) * 800,
            Crc > 800,
            chwrt_penalty,
            operation_penalty
            # turn on/off more than once a day
        ]
        # penalties = {
        #     "cooling_capacity": 2000 if constraints[0] else 0,
        #     "Crc": 2000 if constraints[1] else 0,
        #     "chwr_temp":  2000,
        #     "operation_penalty": 2000 if constraints[3] else 0
        # }
        # total_penalty = sum(penalties.values())
        constraints_violated = any(constraints)
        return constraints_violated, 1
    
    def _update_chillers_status(self, action):
        chiller_modes = action
        penalty = 0
        for i in range (len(chiller_modes)):
            chiller = self.chillers[i]
            if chiller.mode == 0 and chiller_modes[i]==1:
                chiller.count_on += 1
                if chiller.count_on > 1:
                    penalty += chiller.count_on
            elif chiller.mode == 1 and chiller_modes[i]==0:
                chiller.count_off += 1
                if chiller.count_off > 1:
                    penalty += chiller.count_off
            else:
                pass
        return penalty

In [None]:
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return torch.tanh(self.fc3(x))

class DQNAgent:
    def __init__(self, state_space, action_space):
        self.state_space = state_space
        self.action_space = action_space
        self.memory = deque(maxlen=2000)
        self.gamma = 0.99  # discount rate
        self.epsilon = 0.8  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.99
        self.learning_rate = 0.0001
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = DQN(state_space, action_space).to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.criterion = nn.MSELoss()

    def remember(self, state, action, reward, next_state, done):
        # Ensure reward is a scalar
        if isinstance(reward, (list, np.ndarray)):
            reward = reward[0] if len(reward) > 0 else 0.0
        self.memory.append((np.array(state), np.array(action), float(reward), np.array(next_state), done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.uniform(-1, 1, 20).tolist()
        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        with torch.no_grad():
            act_values = self.model(state)
        return act_values.cpu().data.numpy()[0].tolist()  # Returns 20 values (action space)

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        # Convert the batch to numpy arrays
        states = np.array([transition[0] for transition in minibatch])
        actions = np.array([transition[1] for transition in minibatch])
        rewards = np.array([transition[2] for transition in minibatch], dtype=np.float32)
        next_states = np.array([transition[3] for transition in minibatch])
        dones = np.array([transition[4] for transition in minibatch], dtype=np.float32)

        # Convert numpy arrays to tensors
        states = torch.FloatTensor(states).to(self.device)
        actions = torch.FloatTensor(actions).to(self.device)
        rewards = torch.FloatTensor(rewards).to(self.device)
        next_states = torch.FloatTensor(next_states).to(self.device)
        dones = torch.FloatTensor(dones).to(self.device)

        # Compute Q values for current states
        current_q_values = self.model(states)

        # Compute Q values for next states
        with torch.no_grad():
            next_q_values = self.model(next_states)
            max_next_q_values = next_q_values.max(1)[0]

        # Compute target Q values
        target_q_values = rewards + (1 - dones) * self.gamma * max_next_q_values

        # Compute loss
        loss = self.criterion(current_q_values, actions * target_q_values.unsqueeze(1) + (1 - actions) * current_q_values)

        # Optimize the model
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [None]:

def train_agent(episodes, batch_size):

    data = pd.read_parquet('chiller_data_pre.parquet')
    data = data[:int(0.6*len(data))]
    
    env = ChillerPlantEnvironment(data)
    agent = DQNAgent(env.state_space, env.action_space)
    
    for e in range(episodes):
        state = env.reset()
        total_reward = 0
        
        for time in range(env.max_steps):
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
            print(f"Timestep: {time}, Action: {action}, Reward: {reward}")
            if done:
                print(f"Episode: {e+1}/{episodes}, Total Reward: {total_reward}")
                break
            
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)
        
        # Save the model periodically
        if e % 100 == 0:
            torch.save(agent.model.state_dict(), f'chiller_dqn_model_{e}.pth')
    
    return agent

def evaluate_agent(agent, episodes):

    data = pd.read_parquet('chiller_data_pre.parquet')
    data = data[int(0.6*len(data)):]
    
    env = ChillerPlantEnvironment(data)
    agent.epsilon = 0  # No exploration during evaluation
    
    total_rewards = []
    for e in range(episodes):
        state = env.reset()
        total_reward = 0
        actions = []
        
        for time in range(env.max_steps):
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            state = next_state
            total_reward += reward
            actions.append(action)
            
            if done:
                break
        
        total_rewards.append(total_reward)
        print(f"Evaluation Episode: {e+1}/{episodes}, Total Reward: {total_reward}")
    
    print(f"Average Reward over {episodes} episodes: {np.mean(total_rewards)}")
    return actions


In [None]:

# Train the agent
trained_agent = train_agent(episodes=1000, batch_size=32)

# Evaluate the trained agent
optimal_actions = evaluate_agent(trained_agent, episodes=10)

# Double DQN

In [None]:
class Chiller:
    def __init__(self, chiller_id):
        self.chiller_id = chiller_id
        self.power_model = joblib.load(f'chiller_{chiller_id}_power_lr_model.pkl')
        self.chwrt_model = joblib.load(f'chiller_{chiller_id}_chwrt_lr_model.pkl')
        self.count_on = 0
        self.count_off = 0
        self.mode = 0

        self.feature_columns_power = [
            f'chiller_{chiller_id}_cooling_rate',
            f'chiller_{chiller_id}_evap_leaving_water_temperature',
            f'chiller_{chiller_id}_evap_entering_water_temperature',
            f'chiller_{chiller_id}_evap_water_flow_rate',
            f'chiller_{chiller_id}_cond_water_flow_rate',
            f'chiller_{chiller_id}_status_read'
        ]
        self.feature_columns_chwrt = [
            f'chiller_{chiller_id}_cooling_rate',
            f'chiller_{chiller_id}_evap_leaving_water_temperature',
            f'chiller_{chiller_id}_evap_water_flow_rate',
            f'chiller_{chiller_id}_cond_water_flow_rate',
            f'chiller_{chiller_id}_status_read'
        ]
    
    def get_power_consumption(self, state, action):
        chiller_mode, chiller_setpoint, chwr_temp, chw_flow, cdw_flow = action
        Crc = state
        input_data = np.concatenate([[Crc], [chiller_setpoint, chwr_temp, chw_flow, cdw_flow, chiller_mode]]).reshape(1, -1)
        input_data_df = pd.DataFrame(input_data, columns=self.feature_columns_power)
        power = self.power_model.predict(input_data_df)[0]
        
        return power.clip(min=0)
    
    def get_chwr_temp(self, state, action):
        chiller_mode, chiller_setpoint, chw_flow, cdw_flow = action
        
        input_data = np.concatenate([[state[0]], [chiller_setpoint], [chw_flow, cdw_flow, chiller_mode]]).reshape(1, -1)
        input_data_df = pd.DataFrame(input_data, columns=self.feature_columns_chwrt)
        chwr_temp = self.chwrt_model.predict(input_data_df)[0]
        
        return chwr_temp
    
    def reset_status(self):
        self.count_on = 0
        self.count_off = 0
    
class Pump:
    def __init__(self):
        self.power_model = joblib.load('pumps_power_model.pkl')
        
        self.feature_columns = ['plant_cooling_rate',
                                'chilled_water_loop_flow_rate',
                                'condenser_water_loop_flow_rate']
    
    def get_power_consumption(self, state, action):
        chw_flows, condw_flows = action
        chw_loop_flow = sum(chw_flows)
        cdw_loop_flow = sum(condw_flows)
        input_data = np.concatenate([[state[0]], [chw_loop_flow, cdw_loop_flow]]).reshape(1, -1)
        input_data_df = pd.DataFrame(input_data, columns=self.feature_columns)
        power = self.power_model.predict(input_data_df)[0]
        
        return power.clip(min=0)

class CoolingTower:
    def __init__(self):
        # Example of cooling tower power model loading
        self.power_model = joblib.load('cts_power_model.pkl')
        
        self.feature_columns = [
            'plant_cooling_rate',
            'outdoor_weather_station_wetbulb_temperature',
            'outdoor_weather_station_relative_humidity',
            # 'chilled_water_loop_return_water_temperature',
            # 'chilled_water_loop_supply_water_temperature',
            'chilled_water_loop_flow_rate',
            'condenser_water_loop_flow_rate'
        ]
    
    def get_power_consumption(self, state, action):
        chw_flows, condw_flows = action
        chw_loop_flow = sum(chw_flows)
        cdw_loop_flow = sum(condw_flows)
        input_data = np.concatenate([state[0:3], [chw_loop_flow, cdw_loop_flow]]).reshape(1, -1)
        input_data_df = pd.DataFrame(input_data, columns=self.feature_columns)
        power = self.power_model.predict(input_data_df)[0]
        
        return power.clip(min=0)

In [None]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

class ChillerPlantEnvironment:
    def __init__(self, data):
        self.data = data
        self.current_step = 0
        self.max_steps = len(data)
        self.n_chillers = 5
        self.state_space = 5
        self.action_space = 20  # Keep the same number of actions
        self.action_bins = {
            'chiller_modes': [0, 1],  # Binary
            'chiller_setpoints': np.linspace(43.5, 55, 5),  # 5 discrete values
            'chw_flows': np.linspace(400, 2500, 5),  # 5 discrete values
            'condw_flows': np.linspace(500, 3000, 5)  # 5 discrete values
        }
        
        # Add state normalization
        self.state_scaler = MinMaxScaler()
        self.state_scaler.fit(self.data[['plant_cooling_rate', 'outdoor_weather_station_wetbulb_temperature', 
                                         'outdoor_weather_station_relative_humidity', 'plant_target_chw_setpoint', 'new_day']])
        
        self.chillers = [Chiller(i) for i in range(1, self.n_chillers + 1)]
        self.pumps = Pump()
        self.cooling_towers = CoolingTower()
        self.consecutive_violations = 0
        self.max_consecutive_violations = 50


    def reset(self):
        self.current_step = 0
        self.consecutive_violations = 0
        return self._normalize_state(self._get_state())

    def step(self, action):
        # Decode action
        chiller_modes, chiller_setpoints, chw_flows, condw_flows = self._decode_action(action)

        # Get current state
        state = self._get_state()

        # Update chillers status
        operation_penalty = self._update_chillers_status(chiller_modes)

        # Ccooling rate per chiller: balance cooling rate distribution
        Crc = state[0] / sum(chiller_modes) if sum(chiller_modes) != 0 else 0

        # Get chilled water return temperature for each chiller
        chwr_temps = [chiller.get_chwr_temp(state, (mode, setpoint, chw_flow, cdw_flow)) \
                      for chiller, mode, setpoint, chw_flow, cdw_flow \
                        in zip(self.chillers, chiller_modes, chiller_setpoints, chw_flows, condw_flows)]
        chwr_temps = [temp[0] for temp in chwr_temps]

        # Calculate power consumption for each chiller, pump, and cooling tower
        chiller_powers = [chiller.get_power_consumption(Crc, (mode, setpoint, chwr_temp, chw_flow, cdw_flow)) \
                          for chiller, mode, setpoint, chwr_temp, chw_flow, cdw_flow \
                            in zip(self.chillers, chiller_modes, chiller_setpoints, chwr_temps, chw_flows, condw_flows)]
        pump_power = self.pumps.get_power_consumption(state, (chw_flows, condw_flows))
        ct_power = self.cooling_towers.get_power_consumption(state, (chw_flows, condw_flows))  # No action needed for cooling tower
        
        total_power_consumption = sum(chiller_powers) + pump_power + ct_power

        # Check constraints
        constraints_violated, penalty = self._check_constraints(state, chiller_modes, 
                                                chiller_setpoints, operation_penalty, Crc, chwr_temps)
        # Update consecutive violations counter
        if constraints_violated:
            self.consecutive_violations += 1
        else:
            if self.consecutive_violations > 0:
                self.consecutive_violations -= 10
            else:
                self.consecutive_violations > 0

        # Calculate reward
        scaled_penalty = penalty * self.consecutive_violations
        reward = -total_power_consumption[0] - scaled_penalty  # Large penalty for violating constraints

        # Scale reward
        reward = reward / 1000  # Assuming typical power consumption is in the thousands

         # Reset On/Off count on new day
        if state[4]:
            for chiller in self.chillers:
                chiller.reset_status()

        # Move to next step
        self.current_step += 1
        done = self.current_step >= self.max_steps
        
        return self._normalize_state(self._get_state()), reward, done

    def _normalize_state(self, state):
        normalized_state = self.state_scaler.transform(state.reshape(1, -1)).flatten()
        return normalized_state  # Append the 'new_day' boolean

    def _decode_action(self, action):
        chiller_modes = [self.action_bins['chiller_modes'][int(action[i])] for i in range(5)]
        chiller_setpoints = [self.action_bins['chiller_setpoints'][int(action[i+5])] for i in range(5)]
        chw_flows = [self.action_bins['chw_flows'][int(action[i+10])] for i in range(5)]
        condw_flows = [self.action_bins['condw_flows'][int(action[i+15])] for i in range(5)]
        
        return chiller_modes, chiller_setpoints, chw_flows, condw_flows

    def _get_state(self):
        row = self.data.iloc[self.current_step]
        return np.array([
            row['plant_cooling_rate'],
            row['outdoor_weather_station_wetbulb_temperature'],
            row['outdoor_weather_station_relative_humidity'],
            row['plant_target_chw_setpoint'],
            row['new_day']
        ])
    
    def _check_constraints(self, state, chiller_modes, chiller_setpoints, operation_penalty, Crc, chwr_temps):
        target_setpoint = state[3]
        plant_cooling_rate = state[0]
        chwrt_penalty = 0
        for chwr_temp in chwr_temps:
            if chwr_temp > 59:
                chwrt_penalty += 1
        constraints = [
            plant_cooling_rate > sum(chiller_modes) * 800,
            Crc > 800,
            chwrt_penalty,
            operation_penalty]
        constraints_violated = any(constraints)
        return constraints_violated, 1
    
    def _update_chillers_status(self, action):
        chiller_modes = action
        penalty = 0
        for i in range (len(chiller_modes)):
            chiller = self.chillers[i]
            if chiller.mode == 0 and chiller_modes[i]==1:
                chiller.count_on += 1
                if chiller.count_on > 1:
                    penalty += chiller.count_on
            elif chiller.mode == 1 and chiller_modes[i]==0:
                chiller.count_off += 1
                if chiller.count_off > 1:
                    penalty += chiller.count_off
            else:
                pass
        return penalty

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from collections import deque
import random

class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

class DoubleDQNAgent:
    def __init__(self, state_space, action_space):
        self.state_space = state_space
        self.action_space = action_space
        self.memory = deque(maxlen=10000)
        self.gamma = 0.99  # discount rate
        self.epsilon = 0.99  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.tau = 0.001  # for soft update of target parameters
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        self.model = DQN(state_space, action_space).to(self.device)
        self.target_model = DQN(state_space, action_space).to(self.device)
        self.target_model.load_state_dict(self.model.state_dict())
        self.target_model.eval()
        
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.criterion = nn.MSELoss()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            # Epsilon-greedy exploration
            action = np.zeros(self.action_space)
            # First 5 actions are discrete (0 or 1)
            action[:5] = np.random.randint(0, 2, 5)
            # Remaining actions are continuous between -1 and 1
            action[5:] = np.random.uniform(-1, 1, self.action_space - 5)
        else:
            state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
            with torch.no_grad():
                q_values = self.model(state)
            action = q_values.cpu().data.numpy()[0]
            # Discretize the first 5 actions
            action[:5] = (action[:5] > 0).astype(int)
            # Clip the continuous actions to [-1, 1]
            action[5:] = np.clip(action[5:], -1, 1)
        
        return action

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        
        minibatch = random.sample(self.memory, batch_size)
        states, actions, rewards, next_states, dones = zip(*minibatch)

        states = torch.FloatTensor(states).to(self.device)
        actions = torch.FloatTensor(actions).to(self.device)
        rewards = torch.FloatTensor(rewards).to(self.device)
        next_states = torch.FloatTensor(next_states).to(self.device)
        dones = torch.FloatTensor(dones).to(self.device)

        # Get Q values for current states using online network
        current_q_values = self.model(states)

        # Separate handling for discrete and continuous actions
        discrete_q_values = current_q_values[:, :5]
        continuous_q_values = current_q_values[:, 5:]

        discrete_actions = actions[:, :5]
        continuous_actions = actions[:, 5:]

        # Compute Q values for taken actions
        discrete_action_q_values = torch.sum(discrete_q_values * discrete_actions, dim=1)
        continuous_action_q_values = torch.sum(continuous_q_values * continuous_actions, dim=1)
        taken_action_q_values = discrete_action_q_values + continuous_action_q_values

        # Double DQN: Use online network to select actions, target network to evaluate
        with torch.no_grad():
            # Select actions using online network
            online_next_q_values = self.model(next_states)
            best_discrete_actions = torch.argmax(online_next_q_values[:, :5], dim=1)
            best_continuous_actions = online_next_q_values[:, 5:]
            
            # Evaluate Q-values using target network
            target_next_q_values = self.target_model(next_states)
            best_discrete_q_values = target_next_q_values[:, :5].gather(1, best_discrete_actions.unsqueeze(1)).squeeze(1)
            best_continuous_q_values = torch.sum(target_next_q_values[:, 5:] * best_continuous_actions, dim=1)
            best_q_values = best_discrete_q_values + best_continuous_q_values

        # Compute target Q values
        target_q_values = rewards + (1 - dones) * self.gamma * best_q_values

        # Compute loss
        loss = self.criterion(taken_action_q_values, target_q_values.detach())

        # Optimize the model
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # Soft update target network
        self.soft_update(self.model, self.target_model)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def soft_update(self, local_model, target_model):
        for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
            target_param.data.copy_(self.tau * local_param.data + (1.0 - self.tau) * target_param.data)

    def save(self, filename):
        torch.save(self.model.state_dict(), filename)

    def load(self, filename):
        self.model.load_state_dict(torch.load(filename))
        self.target_model.load_state_dict(self.model.state_dict())

In [None]:
import pandas as pd
import torch
import numpy as np

def train_agent(episodes, batch_size):
    data = pd.read_parquet('chiller_data_pre.parquet')
    data = data[:int(0.6*len(data))]
    
    env = ChillerPlantEnvironment(data)
    agent = DoubleDQNAgent(env.state_space, env.action_space)
    
    for e in range(episodes):
        state = env.reset()
        total_reward = 0
        
        for time in range(env.max_steps):
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
            print(f"Episode: {e+1}, Timestep: {time}, Action: {action}, Reward: {reward}")
            
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)
            
            if done:
                break
        
        print(f"Episode: {e+1}/{episodes}, Total Reward: {total_reward}, Epsilon: {agent.epsilon:.4f}")
        
        # Save the model periodically
        if (e + 1) % 100 == 0:
            agent.save(f'double_dqn_model_{e+1}.pth')
    
    return agent

def evaluate_agent(agent, episodes):
    data = pd.read_parquet('chiller_data_pre.parquet')
    data = data[int(0.6*len(data)):]
    
    env = ChillerPlantEnvironment(data)
    agent.epsilon = 0  # No exploration during evaluation
    
    total_rewards = []
    all_actions = []
    
    for e in range(episodes):
        state = env.reset()
        total_reward = 0
        episode_actions = []
        
        for time in range(env.max_steps):
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            state = next_state
            total_reward += reward
            episode_actions.append(action)
            
            if done:
                break
        
        total_rewards.append(total_reward)
        all_actions.append(episode_actions)
        print(f"Evaluation Episode: {e+1}/{episodes}, Total Reward: {total_reward}")
    
    print(f"Average Reward over {episodes} episodes: {np.mean(total_rewards)}")
    return all_actions

# Usage example
episodes = 1000
batch_size = 64

# Train the agent
trained_agent = train_agent(episodes, batch_size)

# Evaluate the trained agent
evaluation_actions = evaluate_agent(trained_agent, 10)

# PPO

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
import joblib
from sklearn.ensemble import RandomForestRegressor

class ChillerPlantEnvironment:
    def __init__(self, data):
        # self.data = data
        self.current_step = 0
        self.max_steps = len(data)
        
        self.n_chillers = 5
        self.action_space = 12
        self.state_space = 5
        
        self.power_model = joblib.load('random_forest_model.pkl')

        self.feature_columns = ['plant_cooling_rate',
            'outdoor_weather_station_wetbulb_temperature',
            'outdoor_weather_station_relative_humidity',
            'chilled_water_loop_return_water_temperature',
            'chilled_water_loop_supply_water_temperature'] + \
            [f'chiller_{i}_status_read' for i in range(1, 6)] + \
            [f'chiller_{i}_setpoint_read' for i in range(1, 6)] + \
            ['chilled_water_loop_flow_rate',
             'condenser_water_loop_flow_rate']
        self.data = data[self.feature_columns]
    
    def reset(self):
        self.current_step = 0
        return self._get_state()
    
    def step(self, action):
        chiller_modes, chiller_setpoints, chw_flow, condw_flow = self._decode_action(action)
        
        state = self._get_state()

        input_data = np.concatenate([state, chiller_modes, chiller_setpoints, [chw_flow], [condw_flow]]).reshape(1, -1)
        input_data_df = pd.DataFrame(input_data, columns=self.feature_columns)
        power = self.power_model.predict(input_data_df)[0]
        
        cooling_capacity = self._calculate_cooling_capacity(chiller_modes)
        constraints_violated = self._check_constraints(chiller_modes, chw_flow, condw_flow)
        
        reward = -power if not constraints_violated else -1000
        
        self.current_step += 1
        done = self.current_step >= self.max_steps
        
        return self._get_state(), reward, done
    
    def _get_state(self):
        row = self.data.iloc[self.current_step]
        return np.array([
            row['plant_cooling_rate'],
            row['outdoor_weather_station_wetbulb_temperature'],
            row['outdoor_weather_station_relative_humidity'],
            row['chilled_water_loop_return_water_temperature'],
            row['chilled_water_loop_supply_water_temperature'],
        ])
    
    def _decode_action(self, action):
        chiller_modes = [int(action[i] > 0) for i in range(5)]
        chiller_setpoints = [42 + 9 * action[i+5] for i in range(5)]
        chw_flow = 400 + 2100 * action[10]
        condw_flow = 500 + 2500 * action[11]
        
        return chiller_modes, chiller_setpoints, chw_flow, condw_flow
    
    def _calculate_cooling_capacity(self, chiller_modes):
        return sum(chiller_modes) * 800
    
    def _check_constraints(self, chiller_modes, chw_flow, condw_flow):
        cooling_load = self.data.iloc[self.current_step]['plant_cooling_rate']
        cooling_capacity = self._calculate_cooling_capacity(chiller_modes)
        chwr_temp = self.data.iloc[self.current_step]['chilled_water_loop_return_water_temperature']
        constraints = [
            cooling_capacity < cooling_load,
            chw_flow < 400 or chw_flow > 2500,
            condw_flow < 500 or condw_flow > 3000,
            chwr_temp > 59,
        ]
        return any(constraints)
    
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

class DQNAgent:
    def __init__(self, state_space, action_space):
        self.state_space = state_space
        self.action_space = action_space
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = DQN(state_space, action_space).to(self.device)
        self.target_model = DQN(state_space, action_space).to(self.device)
        self.update_target_model()
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.criterion = nn.MSELoss()

    def update_target_model(self):
        self.target_model.load_state_dict(self.model.state_dict())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.uniform(-1, 1, 5).tolist() + np.random.uniform(0, 1, 7).tolist()
        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        with torch.no_grad():
            act_values = self.model(state)
        return act_values.cpu().data.numpy()[0]

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
            next_state = torch.FloatTensor(next_state).unsqueeze(0).to(self.device)
            action = torch.FloatTensor(action).unsqueeze(0).to(self.device)
            
            target = reward
            if not done:
                with torch.no_grad():
                    target = reward + self.gamma * torch.max(self.target_model(next_state))
            
            target_f = self.model(state)
            target_f[0][np.argmax(action)] = target
            
            self.optimizer.zero_grad()
            loss = self.criterion(target_f, self.model(state))
            loss.backward()
            self.optimizer.step()
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

def train_agent(episodes, batch_size):
    data = pd.read_parquet('chiller_data_pre.parquet')
    data = data[:int(0.6*len(data))]
    
    env = ChillerPlantEnvironment(data)
    agent = DQNAgent(env.state_space, env.action_space)
    
    for e in range(episodes):
        state = env.reset()
        total_reward = 0
        
        for time in range(env.max_steps):
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
            
            if done:
                print(f"Episode: {e+1}/{episodes}, Total Reward: {total_reward}")
                break
            
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)
        
        agent.update_target_model()
        
        if e % 100 == 0:
            torch.save(agent.model.state_dict(), f'chiller_dqn_model_{e}.pth')
    
    return agent

def evaluate_agent(agent, episodes):
    data = pd.read_parquet('chiller_data_pre.parquet')
    data = data[int(0.6*len(data)):]
    
    env = ChillerPlantEnvironment(data)
    agent.epsilon = 0
    
    total_rewards = []
    for e in range(episodes):
        state = env.reset()
        total_reward = 0
        actions = []
        
        for time in range(env.max_steps):
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            state = next_state
            total_reward += reward
            actions.append(action)
            
            if done:
                break
        
        total_rewards.append(total_reward)
        print(f"Evaluation Episode: {e+1}/{episodes}, Total Reward: {total_reward}")
    
    print(f"Average Reward over {episodes} episodes: {np.mean(total_rewards)}")
    return actions

# Train the agent
trained_agent = train_agent(episodes=1000, batch_size=32)

# Evaluate the trained agent
optimal_actions = evaluate_agent(trained_agent, episodes=10)
