In [None]:
import pandas as pd
import json
import plotly.express as px
import numpy as np

# Prepare data

In [None]:
# Load chiller_plant_dataset.parquet
chiller_data = pd.read_parquet('../data/chiller_plant_dataset.parquet')

with open('../data/columns_info.json') as f:
    columns_info = json.load(f)

In [None]:
# Drop columns
import ast

column_list = chiller_data.columns.to_list()
columns_info_list = []
for key in columns_info:
    tup = ast.literal_eval(key)
    columns_info_list.append(tup)
columns_to_drop = [col for col in column_list if col not in columns_info_list]
columns_to_drop.append(('chiller_6', 'power'))
chiller_data = chiller_data.drop(columns=columns_to_drop)
chiller_data.shape

In [None]:
# Remove NaN
chiller_data.ffill(inplace=True)
chiller_data.bfill(inplace=True)
chiller_data.isnull().sum().sum()

In [None]:
# Remove negative values
# Check which columns have negative values
negative_columns = chiller_data.columns[chiller_data.lt(0).any()]

# Count negative values in each column
negative_counts = chiller_data[negative_columns].lt(0).sum().sum()

print("negative values:")
print(negative_counts)

# Replace negative values with NaN
chiller_data[negative_columns] = chiller_data[negative_columns].mask(chiller_data[negative_columns] < 0)
# Forward fill NaN values
chiller_data.ffill(inplace=True)
chiller_data.bfill(inplace=True)

# Verify if negative values are replaced
negative_counts_after_fill = chiller_data[negative_columns].lt(0).sum().sum()

print("negative values after forward filling:")
print(negative_counts_after_fill)

In [None]:
# Change Multidexing to normal columns name
chiller_data.columns = ['_'.join(col) if isinstance(col, tuple) else col for col in chiller_data.columns]

# Load pre-processed data

In [None]:
chiller_data = pd.read_parquet('chiller_data_pre.parquet')

In [None]:
chiller_data[['plant_individual_cdw_flow','condenser_water_loop_flow_rate']].describe()

# EDA

# Chiller model (Cooling rate)

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


chillers_feature = [f'chiller_{i}_cond_water_flow_rate' for i in range(1, 6)] + \
                [f'chiller_{i}_evap_water_flow_rate' for i in range(1, 6)] + \
                [f'chiller_{i}_status_read' for i in range(1, 6)]

chillers_target = [f'chiller_{i}_cooling_rate' for i in range(1, 6)]

X = chiller_data[chillers_feature]
y = chiller_data[chillers_target]

train_size = int(0.6 * len(X))
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]

# Train the SARIMAX model
sarimax_model = SARIMAX(y_train, exog=X_train, order=(2, 1, 3), seasonal_order=(0, 0, 0, 0))
sarimax_result = sarimax_model.fit()

# Predict
y_pred_sarimax = sarimax_result.forecast(steps=len(X_test), exog=X_test)
y_pred_sarimax = np.maximum(y_pred_sarimax, 0)  # Clip negative predictions to zero

# Performance Metrics for SARIMAX
mse_sarimax = mean_squared_error(y_test, y_pred_sarimax)
mae_sarimax = mean_absolute_error(y_test, y_pred_sarimax)
r2_sarimax = r2_score(y_test, y_pred_sarimax)

def cvrmse(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return rmse / np.mean(y_true)

cvrmse_sarimax = cvrmse(y_test, y_pred_sarimax)

# Print metrics for SARIMAX
print(f"SARIMAX MSE: {mse_sarimax}")
print(f"SARIMAX MAE: {mae_sarimax}")
print(f"SARIMAX R²: {r2_sarimax}")
print(f"SARIMAX CvRMSE: {cvrmse_sarimax}")

# Plotting
y_pred_sarimax_df = pd.DataFrame(y_pred_sarimax.values, index=y_test.index, columns=[chillers_target[0]])
fig = px.line(x=X_test.index, y=y_pred_sarimax_df[chillers_target[0]], title=f'{chillers_target[0]} (SARIMAX)')
fig.update_traces(line_color='red', name=f'{chillers_target[0]} (SARIMAX)')
fig.add_trace(px.line(x=X_test.index, y=y_test[chillers_target[0]]).data[0])
fig.show()

# Cdps model (Condenser water flow)

## Ridge regression

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Assuming chiller_data is already loaded

cdps_feature = ['plant_cooling_rate'] + \
                [f'chiller_{i}_cond_water_flow_rate' for i in range(1, 6)]

cdps_target = ['plant_power_all_cdps']

X = chiller_data[cdps_feature]
y = chiller_data[cdps_target]

train_size = int(0.6 * len(X))
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]

# Train the Ridge model
ridge_model = Ridge(alpha=1.0)  # You can adjust the regularization parameter alpha as needed
ridge_model.fit(X_train, y_train)

# Predict
y_pred_ridge = ridge_model.predict(X_test)
y_pred_ridge = y_pred_ridge.clip(min=0)

# Performance Metrics for Ridge Regression
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
mae_ridge = mean_absolute_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

def cvrmse(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return rmse / np.mean(y_true)

cvrmse_ridge = cvrmse(y_test, y_pred_ridge)

# Print metrics for Ridge Regression
print(f"Ridge Regression MSE: {mse_ridge}")
print(f"Ridge Regression MAE: {mae_ridge}")
print(f"Ridge Regression R²: {r2_ridge}")
print(f"Ridge Regression CvRMSE: {cvrmse_ridge}")

# Plotting
y_pred_ridge_df = pd.DataFrame(y_pred_ridge, index=y_test.index, columns=y_test.columns)
fig = px.line(x=X_test.index, y=y_pred_ridge_df[cdps_target[0]], title=f'{cdps_target[0]} (Ridge Regression)')
fig.update_traces(line_color='red', name=f'{cdps_target[0]} (Ridge)')
fig.add_trace(px.line(x=X_test.index, y=y_test[cdps_target[0]]).data[0])
fig.show()

## SARIMAX

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


cdps_feature = ['plant_cooling_rate'] + \
                [f'chiller_{i}_cond_water_flow_rate' for i in range(1, 6)]

cdps_target = ['plant_power_all_cdps']

X = chiller_data[cdps_feature]
y = chiller_data[cdps_target]

train_size = int(0.6 * len(X))
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]

# Train the SARIMAX model
sarimax_model = SARIMAX(y_train, exog=X_train, order=(2, 1, 3), seasonal_order=(0, 0, 0, 0))
sarimax_result = sarimax_model.fit()

# Predict
y_pred_sarimax = sarimax_result.forecast(steps=len(X_test), exog=X_test)
y_pred_sarimax = np.maximum(y_pred_sarimax, 0)  # Clip negative predictions to zero

# Performance Metrics for SARIMAX
mse_sarimax = mean_squared_error(y_test, y_pred_sarimax)
mae_sarimax = mean_absolute_error(y_test, y_pred_sarimax)
r2_sarimax = r2_score(y_test, y_pred_sarimax)

def cvrmse(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return rmse / np.mean(y_true)

cvrmse_sarimax = cvrmse(y_test, y_pred_sarimax)

# Print metrics for SARIMAX
print(f"SARIMAX MSE: {mse_sarimax}")
print(f"SARIMAX MAE: {mae_sarimax}")
print(f"SARIMAX R²: {r2_sarimax}")
print(f"SARIMAX CvRMSE: {cvrmse_sarimax}")

# Plotting
y_pred_sarimax_df = pd.DataFrame(y_pred_sarimax.values, index=y_test.index, columns=[cdps_target[0]])
fig = px.line(x=X_test.index, y=y_pred_sarimax_df[cdps_target[0]], title=f'{cdps_target[0]} (SARIMAX)')
fig.update_traces(line_color='red', name=f'{cdps_target[0]} (SARIMAX)')
fig.add_trace(px.line(x=X_test.index, y=y_test[cdps_target[0]]).data[0])
fig.show()

# Chps model

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Assuming chiller_data is already loaded

chps_feature = ['plant_cooling_rate'] + \
                [f'chiller_{i}_evap_water_flow_rate' for i in range(1, 6)]

chps_target = ['plant_power_all_chps']

X = chiller_data[chps_feature]
y = chiller_data[chps_target]

train_size = int(0.6 * len(X))
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]

# Train the Ridge model
ridge_model = Ridge(alpha=1)  # You can adjust the regularization parameter alpha as needed
ridge_model.fit(X_train, y_train)

# Predict
y_pred_ridge = ridge_model.predict(X_test)
y_pred_ridge = y_pred_ridge.clip(min=0)

# Performance Metrics for Ridge Regression
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
mae_ridge = mean_absolute_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

def cvrmse(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return rmse / np.mean(y_true)

cvrmse_ridge = cvrmse(y_test, y_pred_ridge)

# Print metrics for Ridge Regression
print(f"Ridge Regression MSE: {mse_ridge}")
print(f"Ridge Regression MAE: {mae_ridge}")
print(f"Ridge Regression R²: {r2_ridge}")
print(f"Ridge Regression CvRMSE: {cvrmse_ridge}")

# Plotting
y_pred_ridge_df = pd.DataFrame(y_pred_ridge, index=y_test.index, columns=y_test.columns)
fig = px.line(x=X_test.index, y=y_pred_ridge_df[chps_target[0]], title=f'{chps_target[0]} (Ridge Regression)')
fig.update_traces(line_color='red', name=f'{chps_target[0]} (Ridge)')
fig.add_trace(px.line(x=X_test.index, y=y_test[chps_target[0]]).data[0])
fig.show()

# Linear models for DQN

## Pumps

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

pumps_feature = ['plant_cooling_rate',
                'chilled_water_loop_flow_rate',
                'condenser_water_loop_flow_rate']

pumps_target = ['plant_power_all_pumps']

X = chiller_data[pumps_feature]
y = chiller_data[pumps_target]

train_size = int(0.6 * len(X))
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]

# Train the model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Save the model
joblib.dump(lr_model, 'pumps_power_model.pkl')

# Predict
y_pred_lr = lr_model.predict(X_test)
y_pred_lr = y_pred_lr.clip(min=0)

# Performance Metrics
mse_lr = mean_squared_error(y_test, y_pred_lr)
mae_lr = mean_absolute_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

def cvrmse(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return rmse / np.mean(y_true)

cvrmse_lr = cvrmse(y_test, y_pred_lr)

# Print metrics
print(f"Linear Regression MSE: {mse_lr}")
print(f"Linear Regression MAE: {mae_lr}")
print(f"Linear Regression R²: {r2_lr}")
print(f"Linear Regression CvRMSE: {cvrmse_lr}")

# Plotting
y_pred_lr_df = pd.DataFrame(y_pred_lr, index=y_test.index, columns=y_test.columns)
fig = px.line(x=X_test.index, y=y_pred_lr_df[pumps_target[0]], title=f'{pumps_target[0]}')
fig.update_traces(line_color='red', name=f'{pumps_target[0]}')
fig.add_trace(px.line(x=X_test.index, y=y_test[pumps_target[0]]).data[0])
fig.show()

## CTs

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

cts_feature = ['plant_cooling_rate',
            'outdoor_weather_station_wetbulb_temperature',
            'outdoor_weather_station_relative_humidity',
            # 'chilled_water_loop_return_water_temperature',
            # 'chilled_water_loop_supply_water_temperature',
            'chilled_water_loop_flow_rate',
            'condenser_water_loop_flow_rate']

cts_target = ['plant_power_all_cts']

X = chiller_data[cts_feature]
y = chiller_data[cts_target]

train_size = int(0.6 * len(X))
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]

# Train the model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Save the model
joblib.dump(lr_model, 'cts_power_model.pkl')

# Predict
y_pred_lr = lr_model.predict(X_test)
y_pred_lr = y_pred_lr.clip(min=0)

# Performance Metrics
mse_lr = mean_squared_error(y_test, y_pred_lr)
mae_lr = mean_absolute_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

def cvrmse(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return rmse / np.mean(y_true)

cvrmse_lr = cvrmse(y_test, y_pred_lr)

# Print metrics
print(f"Linear Regression MSE: {mse_lr}")
print(f"Linear Regression MAE: {mae_lr}")
print(f"Linear Regression R²: {r2_lr}")
print(f"Linear Regression CvRMSE: {cvrmse_lr}")

# Plotting
y_pred_lr_df = pd.DataFrame(y_pred_lr, index=y_test.index, columns=y_test.columns)
fig = px.line(x=X_test.index, y=y_pred_lr_df[cts_target[0]], title=f'{cts_target[0]}')
fig.update_traces(line_color='red', name=f'{cts_target[0]}')
fig.add_trace(px.line(x=X_test.index, y=y_test[cts_target[0]]).data[0])
fig.show()

## Chillers (return temp and power)

### return temp

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

for chiller_id in range (1,6):
    chillers_feature = [f'chiller_{chiller_id}_cooling_rate',
            f'chiller_{chiller_id}_evap_leaving_water_temperature',
            f'chiller_{chiller_id}_evap_water_flow_rate',
            f'chiller_{chiller_id}_cond_water_flow_rate',
            f'chiller_{chiller_id}_status_read']

    chillers_target = [f'chiller_{chiller_id}_evap_entering_water_temperature']

    X = chiller_data[chillers_feature]
    y = chiller_data[chillers_target]

    train_size = int(0.6 * len(X))
    X_train, y_train = X[:train_size], y[:train_size]
    X_test, y_test = X[train_size:], y[train_size:]

    # Train the model
    lr_model = LinearRegression()
    lr_model.fit(X_train, y_train)

    # Save the model
    joblib.dump(lr_model, f'chiller_{chiller_id}_chwrt_lr_model.pkl')

    # Predict
    y_pred_lr = lr_model.predict(X_test)
    y_pred_lr = y_pred_lr.clip(min=0)

    # Performance Metrics
    mse_lr = mean_squared_error(y_test, y_pred_lr)
    mae_lr = mean_absolute_error(y_test, y_pred_lr)
    r2_lr = r2_score(y_test, y_pred_lr)

    def cvrmse(y_true, y_pred):
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        return rmse / np.mean(y_true)

    cvrmse_lr = cvrmse(y_test, y_pred_lr)

    # Print metrics
    print(f"Linear Regression MSE: {mse_lr}")
    print(f"Linear Regression MAE: {mae_lr}")
    print(f"Linear Regression R²: {r2_lr}")
    print(f"Linear Regression CvRMSE: {cvrmse_lr}")

    # Plotting
    y_pred_lr_df = pd.DataFrame(y_pred_lr, index=y_test.index, columns=y_test.columns)
    fig = px.line(x=X_test.index, y=y_pred_lr_df[chillers_target[0]], title=f'{chillers_target[0]}')
    fig.update_traces(line_color='red', name=f'{chillers_target[0]}')
    fig.add_trace(px.line(x=X_test.index, y=y_test[chillers_target[0]]).data[0])
    fig.show()

### Power

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

for chiller_id in range (1,6):
    chillers_feature = [f'chiller_{chiller_id}_cooling_rate',
            f'chiller_{chiller_id}_evap_leaving_water_temperature',
            f'chiller_{chiller_id}_evap_entering_water_temperature',
            f'chiller_{chiller_id}_evap_water_flow_rate',
            f'chiller_{chiller_id}_cond_water_flow_rate',
            f'chiller_{chiller_id}_status_read']

    chillers_target = [f'chiller_{chiller_id}_power']

    X = chiller_data[chillers_feature]
    y = chiller_data[chillers_target]

    train_size = int(0.6 * len(X))
    X_train, y_train = X[:train_size], y[:train_size]
    X_test, y_test = X[train_size:], y[train_size:]

    # Train the model
    lr_model = LinearRegression()
    lr_model.fit(X_train, y_train)

    # Save the model
    joblib.dump(lr_model, f'chiller_{chiller_id}_power_lr_model.pkl')

    # Predict
    y_pred_lr = lr_model.predict(X_test)
    y_pred_lr = y_pred_lr.clip(min=0)

    # Performance Metrics
    mse_lr = mean_squared_error(y_test, y_pred_lr)
    mae_lr = mean_absolute_error(y_test, y_pred_lr)
    r2_lr = r2_score(y_test, y_pred_lr)

    def cvrmse(y_true, y_pred):
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        return rmse / np.mean(y_true)

    cvrmse_lr = cvrmse(y_test, y_pred_lr)

    # Print metrics
    print(f"Linear Regression MSE: {mse_lr}")
    print(f"Linear Regression MAE: {mae_lr}")
    print(f"Linear Regression R²: {r2_lr}")
    print(f"Linear Regression CvRMSE: {cvrmse_lr}")

    # Plotting
    y_pred_lr_df = pd.DataFrame(y_pred_lr, index=y_test.index, columns=y_test.columns)
    fig = px.line(x=X_test.index, y=y_pred_lr_df[chillers_target[0]], title=f'{chillers_target[0]}')
    fig.update_traces(line_color='red', name=f'{chillers_target[0]}')
    fig.add_trace(px.line(x=X_test.index, y=y_test[chillers_target[0]]).data[0])
    fig.show()