In [None]:
import pandas as pd
import json
import plotly.express as px
import numpy as np

In [None]:
chiller_data = pd.read_parquet('chiller_data_pre.parquet')

In [None]:
chiller_data.info()

In [None]:
chiller_data['new_day'].describe()

In [None]:
fig = px.line(chiller_data, x=chiller_data.index, y=['plant_target_chw_setpoint','chilled_water_loop_supply_water_temperature'],
              labels={'value': 'Temperature (°C)', 'variable': 'Temperature Type'},
              title='Setpoint vs CHWS Temperature')
fig.show()

# Create predictive model

## Chiller cooling rate

### EDA

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

corr_metrix = chiller_data.corr()

#### Chiller_cooling_rate

In [None]:
for chiller_id in range (1,6):
    main_feature = f'chiller_{chiller_id}_cooling_rate'
    excluded_keywords = [f'chiller_{i}' for i in range(1, 6) if i != chiller_id] + \
                        ['cdp_', 'chp_', 'ct']
    exclude_features = [col for col in chiller_data.columns if any(keyword in col for keyword in excluded_keywords)]
    correlations = corr_metrix[main_feature].drop(exclude_features).drop(main_feature)
    top_20_correlations = correlations.abs().sort_values(ascending=False).head(20)
    fig = px.bar(top_20_correlations, x=top_20_correlations.index, y=top_20_correlations.values, title=f'Top 20 Correlations with {main_feature}', labels={'x':'Features', 'y':'Correlation'})
    fig.update_layout(xaxis_tickangle=-45)
    fig.show()
    # Print the top 20 correlations for reference
    print(f"Top 20 Correlations with {main_feature}:")
    print(top_20_correlations)

#### Chiller_cond_leaving_temp

In [None]:
for chiller_id in range (1,6):
    main_feature = f'chiller_{chiller_id}_cond_leaving_water_temperature'
    excluded_keywords = [f'chiller_{i}' for i in range(1, 6) if i != chiller_id] + \
                        ['cdp_', 'chp_', 'ct']
    exclude_features = [col for col in chiller_data.columns if any(keyword in col for keyword in excluded_keywords)]
    correlations = corr_metrix[main_feature].drop(exclude_features).drop(main_feature)
    top_20_correlations = correlations.abs().sort_values(ascending=False).head(20)
    fig = px.bar(top_20_correlations, x=top_20_correlations.index, y=top_20_correlations.values, title=f'Top 20 Correlations with {main_feature}', labels={'x':'Features', 'y':'Correlation'})
    fig.update_layout(xaxis_tickangle=-45)
    fig.show()
    # Print the top 20 correlations for reference
    print(f"Top 20 Correlations with {main_feature}:")
    print(top_20_correlations)

### Model

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

for chiller_id in range (1,6):
    chiller_data_i = chiller_data.copy()
    chiller_data_i #drop row that status_read = 0
    chillers_feature = [f'chiller_{chiller_id}_evap_leaving_water_temperature',
            f'chiller_{chiller_id}_evap_water_flow_rate',
            f'chiller_{chiller_id}_cond_water_flow_rate',
            f'chiller_{chiller_id}_status_read',
            'outdoor_weather_station_wetbulb_temperature',
            'outdoor_weather_station_drybulb_temperature',
            'outdoor_weather_station_relative_humidity']
    
    chillers_target = [f'chiller_{chiller_id}_cooling_rate']

    X = chiller_data[chillers_feature]
    y = chiller_data[chillers_target]

    train_size = int(0.6 * len(X))
    X_train, y_train = X[:train_size], y[:train_size]
    X_test, y_test = X[train_size:], y[train_size:]

In [None]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance

# Function to train and evaluate MLP model
def train_and_evaluate_mlp(chiller_id, data, save_path):

    chillers_feature = [
        f'chiller_{chiller_id}_evap_leaving_water_temperature',
        f'chiller_{chiller_id}_evap_water_flow_rate',
        f'chiller_{chiller_id}_cond_water_flow_rate',
    ]
    
    # Add new features to the dataset
    data[f'chiller_{chiller_id}_evap_leaving_water_temperature_squared'] = data[f'chiller_{chiller_id}_evap_leaving_water_temperature'] ** 2
    data[f'chiller_{chiller_id}_evap_leaving_water_temperature_cond_flow'] = data[f'chiller_{chiller_id}_evap_leaving_water_temperature'] * data[f'chiller_{chiller_id}_cond_water_flow_rate']
    data[f'chiller_{chiller_id}_cond_water_flow_rate_squared'] = data[f'chiller_{chiller_id}_cond_water_flow_rate'] ** 2

    # Add new features to the feature list
    chillers_feature.extend([
        f'chiller_{chiller_id}_evap_leaving_water_temperature_squared',
        f'chiller_{chiller_id}_evap_leaving_water_temperature_cond_flow',
        f'chiller_{chiller_id}_cond_water_flow_rate_squared',
    ])

    chillers_target = [f'chiller_{chiller_id}_cooling_rate']

    # Filter data to drop rows where any feature or target is 0
    chiller_data_i = data[(data[f'chiller_{chiller_id}_status_read'] != 0)].copy()
    chiller_data_i = chiller_data_i[(chiller_data_i[chillers_feature + chillers_target] != 0).all(axis=1)]

    X = chiller_data_i[chillers_feature]
    y = chiller_data_i[chillers_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the MLP model
    mlp_model = MLPRegressor(hidden_layer_sizes=(200,), max_iter=2000, random_state=42)
    mlp_model.fit(X_train, y_train.values.ravel())

    # Predict
    y_pred_mlp = mlp_model.predict(X_test)

    # Performance Metrics
    mse_mlp = mean_squared_error(y_test, y_pred_mlp)
    mae_mlp = mean_absolute_error(y_test, y_pred_mlp)
    r2_mlp = r2_score(y_test, y_pred_mlp)

    # Print metrics
    print(f"Chiller {chiller_id} - MLP Regression MSE: {mse_mlp}")
    print(f"Chiller {chiller_id} - MLP Regression MAE: {mae_mlp}")
    print(f"Chiller {chiller_id} - MLP Regression R²: {r2_mlp}")

    # Plotting
    y_pred_mlp_df = pd.DataFrame(y_pred_mlp, index=y_test.index, columns=y_test.columns)
    fig = px.line(x=X_test.index, y=y_pred_mlp_df[chillers_target[0]], title=f'Chiller {chiller_id} - {chillers_target[0]}')
    fig.update_traces(line_color='red', name=f'{chillers_target[0]} Prediction')
    fig.add_trace(px.line(x=X_test.index, y=y_test[chillers_target[0]]).data[0])
    fig.update_layout(
        xaxis_title='Time',
        yaxis_title=f'{chillers_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/cooling_rate_model/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/cooling_rate_model/chiller_{chiller_id}_{chillers_target[0]}_prediction.html")

    # Permutation Feature Importance
    perm_importance = permutation_importance(mlp_model, X_test, y_test, n_repeats=10, random_state=42)
    sorted_idx = perm_importance.importances_mean.argsort()

    # Plot Permutation Importance
    importance_df = pd.DataFrame({
        'Feature': X_test.columns[sorted_idx],
        'Importance': perm_importance.importances_mean[sorted_idx]
    })

    fig_importance = px.bar(importance_df, x='Importance', y='Feature', orientation='h',
                            title=f'Chiller {chiller_id} - Feature Importance')
    fig_importance.update_layout(
        xaxis_title='Importance',
        yaxis_title='Feature',
        legend_title='Legend'
    )
    fig_importance.write_image(f"{save_path}/feature_importance/chiller_{chiller_id}_feature_importance.png")

# Loop through each chiller and perform the MLP analysis
save_path = "../visualizations"  # Set your desired save path
for chiller_id in range(1, 6):
    train_and_evaluate_mlp(chiller_id, chiller_data, save_path)


In [None]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance

# Function to train and evaluate Linear Regression model
def train_and_evaluate_linear_regression(chiller_id, data, save_path):

    chillers_feature = [
        f'chiller_{chiller_id}_evap_leaving_water_temperature',
        f'chiller_{chiller_id}_evap_water_flow_rate',
        f'chiller_{chiller_id}_cond_water_flow_rate',
    ]
    
    # Add new features to the dataset
    data[f'chiller_{chiller_id}_evap_leaving_water_temperature_squared'] = data[f'chiller_{chiller_id}_evap_leaving_water_temperature'] ** 2
    data[f'chiller_{chiller_id}_evap_leaving_water_temperature_cond_flow'] = data[f'chiller_{chiller_id}_evap_leaving_water_temperature'] * data[f'chiller_{chiller_id}_cond_water_flow_rate']
    data[f'chiller_{chiller_id}_cond_water_flow_rate_squared'] = data[f'chiller_{chiller_id}_cond_water_flow_rate'] ** 2

    # Add new features to the feature list
    chillers_feature.extend([
        f'chiller_{chiller_id}_evap_leaving_water_temperature_squared',
        f'chiller_{chiller_id}_evap_leaving_water_temperature_cond_flow',
        f'chiller_{chiller_id}_cond_water_flow_rate_squared',
    ])

    chillers_target = [f'chiller_{chiller_id}_cooling_rate']

    # Filter data to drop rows where any feature or target is 0
    chiller_data_i = data[(data[f'chiller_{chiller_id}_status_read'] != 0)].copy()
    chiller_data_i = chiller_data_i[(chiller_data_i[chillers_feature + chillers_target] != 0).all(axis=1)]

    X = chiller_data_i[chillers_feature]
    y = chiller_data_i[chillers_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the Linear Regression model
    lr_model = LinearRegression()
    lr_model.fit(X_train, y_train)

    # Predict
    y_pred_lr = lr_model.predict(X_test)

    # Performance Metrics
    mse_lr = mean_squared_error(y_test, y_pred_lr)
    mae_lr = mean_absolute_error(y_test, y_pred_lr)
    r2_lr = r2_score(y_test, y_pred_lr)

    # Print metrics
    print(f"Chiller {chiller_id} - Linear Regression MSE: {mse_lr}")
    print(f"Chiller {chiller_id} - Linear Regression MAE: {mae_lr}")
    print(f"Chiller {chiller_id} - Linear Regression R²: {r2_lr}")

    # Plotting
    y_pred_lr_df = pd.DataFrame(y_pred_lr, index=y_test.index, columns=y_test.columns)
    fig = px.line(x=X_test.index, y=y_pred_lr_df[chillers_target[0]], title=f'Chiller {chiller_id} - {chillers_target[0]}')
    fig.update_traces(line_color='red', name=f'{chillers_target[0]} Prediction')
    fig.add_trace(px.line(x=X_test.index, y=y_test[chillers_target[0]]).data[0])
    fig.update_layout(
        xaxis_title='Time',
        yaxis_title=f'{chillers_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/cooling_rate_model/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/cooling_rate_model_lr/chiller_{chiller_id}_{chillers_target[0]}_prediction.html")

    # Permutation Feature Importance
    perm_importance = permutation_importance(lr_model, X_test, y_test, n_repeats=10, random_state=42)
    sorted_idx = perm_importance.importances_mean.argsort()

    # Plot Permutation Importance
    importance_df = pd.DataFrame({
        'Feature': X_test.columns[sorted_idx],
        'Importance': perm_importance.importances_mean[sorted_idx]
    })

    fig_importance = px.bar(importance_df, x='Importance', y='Feature', orientation='h',
                            title=f'Chiller {chiller_id} - Feature Importance')
    fig_importance.update_layout(
        xaxis_title='Importance',
        yaxis_title='Feature',
        legend_title='Legend'
    )
    fig_importance.write_image(f"{save_path}/feature_importance/chiller_{chiller_id}_feature_importance.png")

# Loop through each chiller and perform the Linear Regression analysis
save_path = "../visualizations"  # Set your desired save path
for chiller_id in range(1, 6):
    train_and_evaluate_linear_regression(chiller_id, chiller_data, save_path)


## Chiller power

### Linear Regression

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import joblib
import plotly.express as px
import plotly.graph_objects as go

# Function to train and evaluate Linear Regression model
def train_and_evaluate_linear_regression(chiller_id, data, save_path):
    
    chillers_feature = [
        f'chiller_{chiller_id}_evap_leaving_water_temperature',
        f'chiller_{chiller_id}_evap_water_flow_rate',
        f'chiller_{chiller_id}_cond_water_flow_rate',
        f'chiller_{chiller_id}_cooling_rate'
    ]

    feature_mapping = {
        f'chiller_{chiller_id}_evap_leaving_water_temperature': 'CHST',
        f'chiller_{chiller_id}_evap_water_flow_rate': 'CHWF',
        f'chiller_{chiller_id}_cond_water_flow_rate': 'CDWF',
        f'chiller_{chiller_id}_cooling_rate': 'CR',
        f'chiller_{chiller_id}_evap_leaving_water_temperature_squared': 'CHST²',
        f'chiller_{chiller_id}_evap_leaving_water_temperature_cond_flow': 'CHST*CDWF',
        f'chiller_{chiller_id}_cond_water_flow_rate_squared': 'CDWF²',
        f'chiller_{chiller_id}_evap_water_flow_rate_squared': 'CHWF²',
        f'chiller_{chiller_id}_evap_leaving_water_temperature_evap_water_flow_rate': 'CHST*CHWF',
        f'chiller_{chiller_id}_cooling_rate_squared': 'CR²'
    }
    
    # Add new features to the dataset
    data[f'chiller_{chiller_id}_evap_leaving_water_temperature_squared'] = data[f'chiller_{chiller_id}_evap_leaving_water_temperature'] ** 2
    data[f'chiller_{chiller_id}_evap_leaving_water_temperature_cond_flow'] = data[f'chiller_{chiller_id}_evap_leaving_water_temperature'] * data[f'chiller_{chiller_id}_cond_water_flow_rate']
    data[f'chiller_{chiller_id}_cond_water_flow_rate_squared'] = data[f'chiller_{chiller_id}_cond_water_flow_rate'] ** 2
    data[f'chiller_{chiller_id}_evap_water_flow_rate_squared'] = data[f'chiller_{chiller_id}_evap_water_flow_rate'] ** 2
    data[f'chiller_{chiller_id}_evap_leaving_water_temperature_evap_water_flow_rate'] = data[f'chiller_{chiller_id}_evap_leaving_water_temperature'] * data[f'chiller_{chiller_id}_evap_water_flow_rate']
    data[f'chiller_{chiller_id}_cooling_rate_squared'] = data[f'chiller_{chiller_id}_cooling_rate'] ** 2
    # Add new features to the feature list
    chillers_feature.extend([
        f'chiller_{chiller_id}_evap_leaving_water_temperature_squared',
        f'chiller_{chiller_id}_evap_leaving_water_temperature_cond_flow',
        f'chiller_{chiller_id}_cond_water_flow_rate_squared',
        f'chiller_{chiller_id}_evap_water_flow_rate_squared',
        f'chiller_{chiller_id}_evap_leaving_water_temperature_evap_water_flow_rate',
        f'chiller_{chiller_id}_cooling_rate_squared'
    ])

    chillers_target = [f'chiller_{chiller_id}_power']

    # Filter data to drop rows where any feature or target is 0
    chiller_data_i = data[(data[f'chiller_{chiller_id}_status_read'] != 0)].copy()
    chiller_data_i = chiller_data_i[(chiller_data_i[chillers_feature + chillers_target] != 0).all(axis=1)]

    X = chiller_data_i[chillers_feature]
    y = chiller_data_i[chillers_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the Linear Regression model
    lr_model = LinearRegression()
    lr_model.fit(X_train, y_train.values.ravel())

    # Save the trained model
    os.makedirs(f"{save_path}/models/", exist_ok=True)
    os.makedirs(f"{save_path}/scalers/", exist_ok=True)
    joblib.dump(lr_model, f"{save_path}/models/chiller_{chiller_id}_linear_regression_model.pkl")
    joblib.dump(scaler, f"{save_path}/scalers/chiller_{chiller_id}_linear_regression_scaler.pkl")

    # Predict
    y_pred_lr = lr_model.predict(X_test)

    # Performance Metrics
    mse_lr = mean_squared_error(y_test, y_pred_lr)
    mae_lr = mean_absolute_error(y_test, y_pred_lr)
    r2_lr = r2_score(y_test, y_pred_lr)

    # Print metrics
    print(f"Chiller {chiller_id} - Linear Regression MSE: {mse_lr}")
    print(f"Chiller {chiller_id} - Linear Regression MAE: {mae_lr}")
    print(f"Chiller {chiller_id} - Linear Regression R²: {r2_lr}")

    # Plotting
    y_pred_lr_df = pd.DataFrame(y_pred_lr, index=y_test.index, columns=y_test.columns)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X_test.index, y=y_test[chillers_target[0]], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=X_test.index, y=y_pred_lr_df[chillers_target[0]], mode='lines', name='Prediction'))
    fig.update_layout(
        title=f'Chiller {chiller_id} - {chillers_target[0]} Prediction',
        xaxis_title='Time',
        yaxis_title=f'{chillers_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/plot/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/plot/chiller_{chiller_id}_{chillers_target[0]}_prediction.html")

    # Correlation Coefficients
    correlation_matrix = data[chillers_feature + chillers_target].corr()
    correlation_with_target = correlation_matrix[chillers_target[0]].drop(chillers_target[0]).sort_values(ascending=True)

    # Plot Correlation Coefficients
    # Map long feature names to shorter names
    correlation_with_target.index = [feature_mapping.get(feat, feat) for feat in correlation_with_target.index]
    
    fig_corr = px.bar(x=correlation_with_target.values, y=correlation_with_target.index, orientation='h',
                      title=f'Chiller {chiller_id} - Correlation Coefficients')
    fig_corr.update_layout(
        xaxis_title='Correlation Coefficient',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/feature/", exist_ok=True)
    fig_corr.write_image(f"{save_path}/feature/chiller_{chiller_id}_feature_importance.png")

# Loop through each chiller and perform the Linear Regression analysis
save_path = "../visualizations/power_model_lr"  # Set your desired save path
# Ensure the directory exists for saving models
os.makedirs(f"{save_path}/models/", exist_ok=True)
for chiller_id in range(1, 6):
    train_and_evaluate_linear_regression(chiller_id, chiller_data, save_path)


### Random Forest

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import joblib
import plotly.express as px

# Function to train and evaluate Random Forest model
def train_and_evaluate_random_forest(chiller_id, data, save_path):
    
    chillers_feature = [
        f'chiller_{chiller_id}_evap_leaving_water_temperature',
        f'chiller_{chiller_id}_evap_water_flow_rate',
        f'chiller_{chiller_id}_cond_water_flow_rate',
        f'chiller_{chiller_id}_cooling_rate'
    ]

    chillers_target = [f'chiller_{chiller_id}_power']

    feature_mapping = {
        f'chiller_{chiller_id}_evap_leaving_water_temperature': 'CHST',
        f'chiller_{chiller_id}_evap_water_flow_rate': 'CHWF',
        f'chiller_{chiller_id}_cond_water_flow_rate': 'CDWF',
        f'chiller_{chiller_id}_cooling_rate': 'CR',
    }

    # Filter data to drop rows where any feature or target is 0
    chiller_data_i = data[(data[f'chiller_{chiller_id}_status_read'] != 0)].copy()
    chiller_data_i = chiller_data_i[(chiller_data_i[chillers_feature + chillers_target] != 0).all(axis=1)]

    X = chiller_data_i[chillers_feature]
    y = chiller_data_i[chillers_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the Random Forest model
    rf_model = RandomForestRegressor()
    rf_model.fit(X_train, y_train.values.ravel())

    # Save the trained model
    os.makedirs(f"{save_path}/models/", exist_ok=True)
    joblib.dump(rf_model, f"{save_path}/models/chiller_{chiller_id}_random_forest_model.pkl")
    os.makedirs(f"{save_path}/scalers/", exist_ok=True)
    joblib.dump(scaler, f"{save_path}/scalers/chiller_{chiller_id}_random_forest_scaler.pkl")

    # Predict
    y_pred_rf = rf_model.predict(X_test)

    # Performance Metrics
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    mae_rf = mean_absolute_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)

    # Print metrics
    print(f"Chiller {chiller_id} - Random Forest MSE: {mse_rf}")
    print(f"Chiller {chiller_id} - Random Forest MAE: {mae_rf}")
    print(f"Chiller {chiller_id} - Random Forest R²: {r2_rf}")

    # Plotting
    y_pred_rf_df = pd.DataFrame(y_pred_rf, index=y_test.index, columns=y_test.columns)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X_test.index, y=y_test[chillers_target[0]], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=X_test.index, y=y_pred_rf_df[chillers_target[0]], mode='lines', name='Prediction'))
    fig.update_layout(
        title=f'Chiller {chiller_id} - {chillers_target[0]} Prediction',
        xaxis_title='Time',
        yaxis_title=f'{chillers_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/plot/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/plot/chiller_{chiller_id}_{chillers_target[0]}_prediction.html")

    # Feature Importances
    feature_importances = pd.Series(rf_model.feature_importances_, index=X_train.columns)
    feature_importances = feature_importances.sort_values(ascending=True)  # Ascending order

    feature_importances.index = feature_importances.index.map(feature_mapping)

    fig_corr = px.bar(x=feature_importances.values, y=feature_importances.index, orientation='h',
                      title=f'Chiller {chiller_id} - Feature Importances')
    fig_corr.update_layout(
        xaxis_title='Importance',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/feature/", exist_ok=True)
    fig_corr.write_image(f"{save_path}/feature/chiller_{chiller_id}_feature_importance.png")

    # Correlation coefficients
    corr_matrix = chiller_data_i[chillers_feature + chillers_target].corr()
    corr_target = corr_matrix[chillers_target[0]].drop(chillers_target[0])  # Drop the target's correlation with itself
    corr_target = corr_target.sort_values(ascending=True)  # Ascending order

    corr_target.index = corr_target.index.map(feature_mapping)

    fig_corr_coef = px.bar(x=corr_target.values, y=corr_target.index, orientation='h',
                           title=f'Chiller {chiller_id} - Feature Correlation with {chillers_target[0]}')
    fig_corr_coef.update_layout(
        xaxis_title='Correlation Coefficient',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/correlation/", exist_ok=True)
    fig_corr_coef.write_image(f"{save_path}/correlation/chiller_{chiller_id}_correlation_coefficient.png")

# Loop through each chiller and perform the Random Forest analysis
save_path = "../visualizations/power_model_rf"  # Set your desired save path
# Ensure the directory exists for saving models
os.makedirs(f"{save_path}/models/", exist_ok=True)
for chiller_id in range(1, 6):
    train_and_evaluate_random_forest(chiller_id, chiller_data, save_path)


### MLP

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import joblib
import plotly.express as px

# Function to train and evaluate MLP model
def train_and_evaluate_mlp(chiller_id, data, save_path):
    
    chillers_feature = [
        f'chiller_{chiller_id}_evap_leaving_water_temperature',
        f'chiller_{chiller_id}_evap_water_flow_rate',
        f'chiller_{chiller_id}_cond_water_flow_rate',
        f'chiller_{chiller_id}_cooling_rate'
    ]

    chillers_target = [f'chiller_{chiller_id}_power']

    feature_mapping = {
        f'chiller_{chiller_id}_evap_leaving_water_temperature': 'CHST',
        f'chiller_{chiller_id}_evap_water_flow_rate': 'CHWF',
        f'chiller_{chiller_id}_cond_water_flow_rate': 'CDWF',
        f'chiller_{chiller_id}_cooling_rate': 'CR',
    }

    # Filter data to drop rows where any feature or target is 0
    chiller_data_i = data[(data[f'chiller_{chiller_id}_status_read'] != 0)].copy()
    chiller_data_i = chiller_data_i[(chiller_data_i[chillers_feature + chillers_target] != 0).all(axis=1)]

    X = chiller_data_i[chillers_feature]
    y = chiller_data_i[chillers_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the MLP model
    mlp_model = MLPRegressor(hidden_layer_sizes=(100,100), max_iter=500)
    mlp_model.fit(X_train, y_train.values.ravel())

    # Save the trained model
    os.makedirs(f"{save_path}/models/", exist_ok=True)
    joblib.dump(mlp_model, f"{save_path}/models/chiller_{chiller_id}_mlp_model.pkl")
    os.makedirs(f"{save_path}/scalers/", exist_ok=True)
    joblib.dump(scaler, f"{save_path}/scalers/chiller_{chiller_id}_mlp_scaler.pkl")

    # Predict
    y_pred_mlp = mlp_model.predict(X_test)

    # Performance Metrics
    mse_mlp = mean_squared_error(y_test, y_pred_mlp)
    mae_mlp = mean_absolute_error(y_test, y_pred_mlp)
    r2_mlp = r2_score(y_test, y_pred_mlp)

    # Print metrics
    print(f"Chiller {chiller_id} - MLP MSE: {mse_mlp}")
    print(f"Chiller {chiller_id} - MLP MAE: {mae_mlp}")
    print(f"Chiller {chiller_id} - MLP R²: {r2_mlp}")

    # Plotting
    y_pred_mlp_df = pd.DataFrame(y_pred_mlp, index=y_test.index, columns=y_test.columns)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X_test.index, y=y_test[chillers_target[0]], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=X_test.index, y=y_pred_mlp_df[chillers_target[0]], mode='lines', name='Prediction'))
    fig.update_layout(
        title=f'Chiller {chiller_id} - {chillers_target[0]} Prediction',
        xaxis_title='Time',
        yaxis_title=f'{chillers_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/plot/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/plot/chiller_{chiller_id}_{chillers_target[0]}_prediction.html")

    # Feature Importances (For MLP, we can use permutation importance as a proxy for feature importance)
    from sklearn.inspection import permutation_importance
    perm_importance = permutation_importance(mlp_model, X_test, y_test, n_repeats=10, random_state=42)
    feature_importances = pd.Series(perm_importance.importances_mean, index=X_train.columns)
    feature_importances = feature_importances.sort_values(ascending=True)
    feature_importances.index = feature_importances.index.map(feature_mapping)
    fig_corr = px.bar(x=feature_importances.values, y=feature_importances.index, orientation='h',
                      title=f'Chiller {chiller_id} - Feature Importances')
    fig_corr.update_layout(
        xaxis_title='Importance',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/feature/", exist_ok=True)
    fig_corr.write_image(f"{save_path}/feature/chiller_{chiller_id}_feature_importance.png")

    # Correlation coefficients
    corr_matrix = chiller_data_i[chillers_feature + chillers_target].corr()
    corr_target = corr_matrix[chillers_target[0]].drop(chillers_target[0])  # Drop the target's correlation with itself
    corr_target = corr_target.sort_values(ascending=True)

    corr_target.index = corr_target.index.map(feature_mapping)
    fig_corr_coef = px.bar(x=corr_target.values, y=corr_target.index, orientation='h',
                           title=f'Chiller {chiller_id} - Feature Correlation with {chillers_target[0]}')
    fig_corr_coef.update_layout(
        xaxis_title='Correlation Coefficient',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/correlation/", exist_ok=True)
    fig_corr_coef.write_image(f"{save_path}/correlation/chiller_{chiller_id}_correlation_coefficient.png")

# Loop through each chiller and perform the MLP analysis
save_path = "../visualizations/power_model_mlp"  # Set your desired save path
# Ensure the directory exists for saving models
os.makedirs(f"{save_path}/models/", exist_ok=True)
for chiller_id in range(1, 6):
    train_and_evaluate_mlp(chiller_id, chiller_data, save_path)


### ARMIAX

## CHP

### Linear Regression

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance
import joblib
import plotly.express as px

# Function to train and evaluate Linear Regression model for CHP
def train_and_evaluate_linear_regression_chp(chiller_data, save_path):

    chp_features = [
        'plant_cooling_rate',
        'chilled_water_loop_flow_rate'
    ]
    
    # Add new features to the dataset
    chiller_data['chilled_water_loop_flow_rate_squared'] = chiller_data['chilled_water_loop_flow_rate'] ** 2
    chiller_data['chilled_water_loop_flow_rate_cubed'] = chiller_data['chilled_water_loop_flow_rate'] ** 3

    # Add new features to the feature list
    chp_features.extend([
        'chilled_water_loop_flow_rate_squared',
        'chilled_water_loop_flow_rate_cubed',
    ])

    chp_target = ['plant_power_all_chps']

    feature_mapping = {
        'plant_cooling_rate': 'PCR',
        'chilled_water_loop_flow_rate': 'PCHWF',
        'chilled_water_loop_flow_rate_squared': 'PCHWF²',
        'chilled_water_loop_flow_rate_cubed': 'PCHWF³',
    }

    # Filter data to drop rows where 'plant_number_of_running_chps' is 0
    chiller_data_filtered = chiller_data[chiller_data['plant_number_of_running_chps'] != 0]

    # Filter data to drop rows where any feature or target is 0
    chp_data_filtered = chiller_data_filtered[(chiller_data_filtered[chp_features + chp_target] != 0).all(axis=1)].copy()

    X = chp_data_filtered[chp_features]
    y = chp_data_filtered[chp_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the Linear Regression model
    lr_model = LinearRegression()
    lr_model.fit(X_train, y_train.values.ravel())

    # Save the trained model
    os.makedirs(f"{save_path}/models/", exist_ok=True)
    model_save_path = os.path.join(save_path, "models", "chp_linear_regression_model.pkl")
    joblib.dump(lr_model, model_save_path)
    os.makedirs(f"{save_path}/scalers/", exist_ok=True)
    joblib.dump(scaler, f"{save_path}/scalers/chp_linear_regression_scaler.pkl")

    # Predict
    y_pred_lr = lr_model.predict(X_test)

    # Performance Metrics
    mse_lr = mean_squared_error(y_test, y_pred_lr)
    mae_lr = mean_absolute_error(y_test, y_pred_lr)
    r2_lr = r2_score(y_test, y_pred_lr)

    # Print metrics
    print("CHP - Linear Regression MSE:", mse_lr)
    print("CHP - Linear Regression MAE:", mae_lr)
    print("CHP - Linear Regression R²:", r2_lr)

    # Plotting
    y_pred_lr_df = pd.DataFrame(y_pred_lr, index=y_test.index, columns=y_test.columns)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X_test.index, y=y_test[chp_target[0]], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=X_test.index, y=y_pred_lr_df[chp_target[0]], mode='lines', name='Prediction'))
    fig.update_layout(
        title=f'CHP - {chp_target[0]} Prediction',
        xaxis_title='Time',
        yaxis_title=f'{chp_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/plot/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/plot/chp_{chp_target[0]}_prediction.html")

    # # Feature Importances
    # feature_importances = pd.Series(lr_model.feature_importances_, index=X_train.columns)
    # feature_importances = feature_importances.sort_values(ascending=True)  # Ascending order

    # feature_importances.index = feature_importances.index.map(feature_mapping)

    # fig_corr = px.bar(x=feature_importances.values, y=feature_importances.index, orientation='h',
    #                   title=f'CHP - Feature Importances')
    # fig_corr.update_layout(
    #     xaxis_title='Importance',
    #     yaxis_title='Feature'
    # )

    # # Ensure the directory exists for saving plots
    # os.makedirs(f"{save_path}/feature/", exist_ok=True)
    # fig_corr.write_image(f"{save_path}/feature/chp_feature_importance.png")

    # Correlation coefficients
    corr_matrix = chp_data_filtered[chp_features + chp_target].corr()
    corr_target = corr_matrix[chp_target[0]].drop(chp_target[0])  # Drop the target's correlation with itself
    corr_target = corr_target.sort_values(ascending=True)  # Ascending order

    corr_target.index = corr_target.index.map(feature_mapping)

    fig_corr_coef = px.bar(x=corr_target.values, y=corr_target.index, orientation='h',
                           title=f'CHP - Feature Correlation with {chp_target[0]}')
    fig_corr_coef.update_layout(
        xaxis_title='Correlation Coefficient',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/correlation/", exist_ok=True)
    fig_corr_coef.write_image(f"{save_path}/correlation/chp_correlation_coefficient.png")

# Set your desired save path
save_path = "../visualizations/chp_power_model_lr"

# Ensure the directory exists for saving models
os.makedirs(f"{save_path}/models/", exist_ok=True)

# Train and evaluate Linear Regression model for the CHP power prediction
train_and_evaluate_linear_regression_chp(chiller_data, save_path)


### Random forest

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance
import joblib
import plotly.express as px

# Function to train and evaluate Random Forest model for CHP
def train_and_evaluate_random_forest_chp(chiller_data, save_path):

    chp_features = [
        'plant_cooling_rate',
        'chilled_water_loop_flow_rate'
    ]
    
    # Add new features to the dataset
    chiller_data['chilled_water_loop_flow_rate_squared'] = chiller_data['chilled_water_loop_flow_rate'] ** 2
    chiller_data['chilled_water_loop_flow_rate_cubed'] = chiller_data['chilled_water_loop_flow_rate'] ** 3

    # Add new features to the feature list
    chp_features.extend([
        'chilled_water_loop_flow_rate_squared',
        'chilled_water_loop_flow_rate_cubed',
    ])

    feature_mapping = {
        'plant_cooling_rate': 'PCR',
        'chilled_water_loop_flow_rate': 'PCHWF',
        'chilled_water_loop_flow_rate_squared': 'PCHWF²',
        'chilled_water_loop_flow_rate_cubed': 'PCHWF³',
    }

    chp_target = ['plant_power_all_chps']

    # Filter data to drop rows where 'plant_number_of_running_chps' is 0
    chiller_data_filtered = chiller_data[chiller_data['plant_number_of_running_chps'] != 0]

    # Filter data to drop rows where any feature or target is 0
    chp_data_filtered = chiller_data_filtered[(chiller_data_filtered[chp_features + chp_target] != 0).all(axis=1)].copy()

    X = chp_data_filtered[chp_features]
    y = chp_data_filtered[chp_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the Random Forest model
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train.values.ravel())

    # Save the trained model
    os.makedirs(f"{save_path}/models/", exist_ok=True)
    model_save_path = os.path.join(save_path, "models", "chp_random_forest_model.pkl")
    joblib.dump(rf_model, model_save_path)
    os.makedirs(f"{save_path}/scalers/", exist_ok=True)
    joblib.dump(scaler, f"{save_path}/scalers/chp_random_forest_scaler.pkl")

    # Predict
    y_pred_rf = rf_model.predict(X_test)

    # Performance Metrics
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    mae_rf = mean_absolute_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)

    # Print metrics
    print("CHP - Random Forest MSE:", mse_rf)
    print("CHP - Random Forest MAE:", mae_rf)
    print("CHP - Random Forest R²:", r2_rf)

    # Plotting
    y_pred_rf_df = pd.DataFrame(y_pred_rf, index=y_test.index, columns=y_test.columns)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X_test.index, y=y_test[chp_target[0]], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=X_test.index, y=y_pred_rf_df[chp_target[0]], mode='lines', name='Prediction'))
    fig.update_layout(
        title=f'CHP - {chp_target[0]} Prediction',
        xaxis_title='Time',
        yaxis_title=f'{chp_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/plot/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/plot/chp_{chp_target[0]}_prediction.html")

    # Feature Importances
    feature_importances = pd.Series(rf_model.feature_importances_, index=X_train.columns)
    feature_importances = feature_importances.sort_values(ascending=True)  # Ascending order

    feature_importances.index = feature_importances.index.map(feature_mapping)

    fig_corr = px.bar(x=feature_importances.values, y=feature_importances.index, orientation='h',
                      title=f'CHP - Feature Importances')
    fig_corr.update_layout(
        xaxis_title='Importance',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/feature/", exist_ok=True)
    fig_corr.write_image(f"{save_path}/feature/chp_feature_importance.png")

    # Correlation coefficients
    corr_matrix = chp_data_filtered[chp_features + chp_target].corr()
    corr_target = corr_matrix[chp_target[0]].drop(chp_target[0])  # Drop the target's correlation with itself
    corr_target = corr_target.sort_values(ascending=True)  # Ascending order

    corr_target.index = corr_target.index.map(feature_mapping)

    fig_corr_coef = px.bar(x=corr_target.values, y=corr_target.index, orientation='h',
                           title=f'CHP - Feature Correlation with {chp_target[0]}')
    fig_corr_coef.update_layout(
        xaxis_title='Correlation Coefficient',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/correlation/", exist_ok=True)
    fig_corr_coef.write_image(f"{save_path}/correlation/chp_correlation_coefficient.png")

# Set your desired save path
save_path = "../visualizations/chp_power_model_rf"

# Ensure the directory exists for saving models
os.makedirs(f"{save_path}/models/", exist_ok=True)

# Train and evaluate Random Forest model for the CHP power prediction
train_and_evaluate_random_forest_chp(chiller_data, save_path)


### MLP

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import joblib
import plotly.express as px

# Function to train and evaluate MLP model for CHP
def train_and_evaluate_mlp_chp(chiller_data, save_path):

    chp_features = [
        'plant_cooling_rate',
        'chilled_water_loop_flow_rate'
    ]

    chp_target = ['plant_power_all_chps']

    # Filter data to drop rows where 'plant_number_of_running_chps' is 0
    chiller_data_filtered = chiller_data[chiller_data['plant_number_of_running_chps'] != 0]

    X = chiller_data_filtered[chp_features]
    y = chiller_data_filtered[chp_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the MLP model
    mlp_model = MLPRegressor(hidden_layer_sizes=(100, 100), max_iter=500, random_state=42)
    mlp_model.fit(X_train, y_train.values.ravel())

    # Save the trained model
    os.makedirs(f"{save_path}/models/", exist_ok=True)
    model_save_path = os.path.join(save_path, "models", "chp_mlp_model.pkl")
    joblib.dump(mlp_model, model_save_path)
    os.makedirs(f"{save_path}/scalers/", exist_ok=True)
    joblib.dump(scaler, f"{save_path}/scalers/chp_mlp_scaler.pkl")

    # Predict
    y_pred_mlp = mlp_model.predict(X_test)

    # Performance Metrics
    mse_mlp = mean_squared_error(y_test, y_pred_mlp)
    mae_mlp = mean_absolute_error(y_test, y_pred_mlp)
    r2_mlp = r2_score(y_test, y_pred_mlp)

    # Print metrics
    print("CHP - MLP MSE:", mse_mlp)
    print("CHP - MLP MAE:", mae_mlp)
    print("CHP - MLP R²:", r2_mlp)

    # Plotting
    y_pred_mlp_df = pd.DataFrame(y_pred_mlp, index=y_test.index, columns=y_test.columns)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X_test.index, y=y_test[chp_target[0]], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=X_test.index, y=y_pred_mlp_df[chp_target[0]], mode='lines', name='Prediction'))
    fig.update_layout(
        title=f'CHP - {chp_target[0]} Prediction',
        xaxis_title='Time',
        yaxis_title=f'{chp_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/plot/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/plot/chp_{chp_target[0]}_prediction.html")

    # Correlation coefficients
    corr_matrix = chiller_data_filtered[chp_features + chp_target].corr()
    corr_target = corr_matrix[chp_target[0]].drop(chp_target[0])  # Drop the target's correlation with itself

    fig_corr_coef = px.bar(x=corr_target.values, y=corr_target.index, orientation='h',
                           title=f'CHP - Feature Correlation with {chp_target[0]}')
    fig_corr_coef.update_layout(
        xaxis_title='Correlation Coefficient',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/correlation/", exist_ok=True)
    fig_corr_coef.write_image(f"{save_path}/correlation/chp_correlation_coefficient.png")

# Set your desired save path
save_path = "../visualizations/chp_power_model_mlp"

# Ensure the directory exists for saving models
os.makedirs(f"{save_path}/models/", exist_ok=True)

# Train and evaluate MLP model for the CHP power prediction
train_and_evaluate_mlp_chp(chiller_data, save_path)


## CDP

### Linear Regression

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance
import joblib
import plotly.express as px

# Function to train and evaluate Linear Regression model for CDP
def train_and_evaluate_linear_regression_cdp(chiller_data, save_path):

    cdp_features = [
        'plant_cooling_rate',
        'condenser_water_loop_flow_rate'
    ]
    
    # Add new features to the dataset
    chiller_data['condenser_water_loop_flow_rate_squared'] = chiller_data['condenser_water_loop_flow_rate'] ** 2
    chiller_data['condenser_water_loop_flow_rate_cubed'] = chiller_data['condenser_water_loop_flow_rate'] ** 3

    # Add new features to the feature list
    cdp_features.extend([
        'condenser_water_loop_flow_rate_squared',
        'condenser_water_loop_flow_rate_cubed',
    ])

    cdp_target = ['plant_power_all_cdps']

    feature_mapping = {
        'plant_cooling_rate': 'PCR',
        'condenser_water_loop_flow_rate': 'PCDWF',
        'condenser_water_loop_flow_rate_squared': 'PCDWF²',
        'condenser_water_loop_flow_rate_cubed': 'PCDWF³',
    }

    # Filter data to drop rows where 'plant_number_of_running_cdps' is 0
    chiller_data_filtered = chiller_data[chiller_data['plant_number_of_running_cdps'] != 0]

    # Filter data to drop rows where any feature or target is 0
    cdp_data_filtered = chiller_data_filtered[(chiller_data_filtered[cdp_features + cdp_target] != 0).all(axis=1)].copy()

    X = cdp_data_filtered[cdp_features]
    y = cdp_data_filtered[cdp_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the Linear Regression model
    lr_model = LinearRegression()
    lr_model.fit(X_train, y_train.values.ravel())

    # Save the trained model
    os.makedirs(f"{save_path}/models/", exist_ok=True)
    model_save_path = os.path.join(save_path, "models", "cdp_linear_regression_model.pkl")
    joblib.dump(lr_model, model_save_path)
    os.makedirs(f"{save_path}/scalers/", exist_ok=True)
    joblib.dump(scaler, f"{save_path}/scalers/cdp_linear_regression_scaler.pkl")

    # Predict
    y_pred_lr = lr_model.predict(X_test)

    # Performance Metrics
    mse_lr = mean_squared_error(y_test, y_pred_lr)
    mae_lr = mean_absolute_error(y_test, y_pred_lr)
    r2_lr = r2_score(y_test, y_pred_lr)

    # Print metrics
    print("CDP - Linear Regression MSE:", mse_lr)
    print("CDP - Linear Regression MAE:", mae_lr)
    print("CDP - Linear Regression R²:", r2_lr)

    # Plotting
    y_pred_lr_df = pd.DataFrame(y_pred_lr, index=y_test.index, columns=y_test.columns)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X_test.index, y=y_test[cdp_target[0]], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=X_test.index, y=y_pred_lr_df[cdp_target[0]], mode='lines', name='Prediction'))
    fig.update_layout(
        title=f'CDP - {cdp_target[0]} Prediction',
        xaxis_title='Time',
        yaxis_title=f'{cdp_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/plot/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/plot/cdp_{cdp_target[0]}_prediction.html")

    # Permutation Feature Importance
    perm_importance = permutation_importance(lr_model, X_test, y_test, n_repeats=10, random_state=42)
    sorted_idx = perm_importance.importances_mean.argsort()

    # Plot Permutation Importance
    importance_df = pd.DataFrame({
        'Feature': X_test.columns[sorted_idx],
        'Importance': perm_importance.importances_mean[sorted_idx]
    })

    fig_importance = px.bar(importance_df, x='Importance', y='Feature', orientation='h',
                            title='CDP - Feature Importance')
    fig_importance.update_layout(
        xaxis_title='Importance',
        yaxis_title='Feature',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/feature/", exist_ok=True)
    fig_importance.write_image(f"{save_path}/feature/cdp_feature_importance.png")

    # Correlation coefficients
    corr_matrix = cdp_data_filtered[cdp_features + cdp_target].corr()
    corr_target = corr_matrix[cdp_target[0]].drop(cdp_target[0])  # Drop the target's correlation with itself

    fig_corr_coef = px.bar(x=corr_target.values, y=corr_target.index, orientation='h',
                           title=f'CDP - Feature Correlation with {cdp_target[0]}')
    fig_corr_coef.update_layout(
        xaxis_title='Correlation Coefficient',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/correlation/", exist_ok=True)
    fig_corr_coef.write_image(f"{save_path}/correlation/cdp_feature_correlation.png")

# Set your desired save path
save_path = "../visualizations/cdp_power_model_lr"

# Ensure the directory exists for saving models
os.makedirs(f"{save_path}/models/", exist_ok=True)

# Train and evaluate Linear Regression model for the CDP power prediction
train_and_evaluate_linear_regression_cdp(chiller_data, save_path)


### Random forest

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance
import joblib
import plotly.express as px

# Function to train and evaluate Random Forest model for CDP
def train_and_evaluate_random_forest_cdp(chiller_data, save_path):

    cdp_features = [
        'plant_cooling_rate',
        'condenser_water_loop_flow_rate'
    ]
    
    # Add new features to the dataset
    chiller_data['condenser_water_loop_flow_rate_squared'] = chiller_data['condenser_water_loop_flow_rate'] ** 2
    chiller_data['condenser_water_loop_flow_rate_cubed'] = chiller_data['condenser_water_loop_flow_rate'] ** 3

    # Add new features to the feature list
    cdp_features.extend([
        'condenser_water_loop_flow_rate_squared',
        'condenser_water_loop_flow_rate_cubed',
    ])

    cdp_target = ['plant_power_all_cdps']

    feature_mapping = {
        'plant_cooling_rate': 'PCR',
        'condenser_water_loop_flow_rate': 'PCDWF',
        'condenser_water_loop_flow_rate_squared': 'PCDWF²',
        'condenser_water_loop_flow_rate_cubed': 'PCDWF³',
    }

    # Filter data to drop rows where 'plant_number_of_running_cdps' is 0
    chiller_data_filtered = chiller_data[chiller_data['plant_number_of_running_cdps'] != 0]

    # Filter data to drop rows where any feature or target is 0
    cdp_data_filtered = chiller_data_filtered[(chiller_data_filtered[cdp_features + cdp_target] != 0).all(axis=1)].copy()

    X = cdp_data_filtered[cdp_features]
    y = cdp_data_filtered[cdp_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the Random Forest model
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train.values.ravel())

    # Save the trained model
    os.makedirs(f"{save_path}/models/", exist_ok=True)
    model_save_path = os.path.join(save_path, "models", "cdp_random_forest_model.pkl")
    joblib.dump(rf_model, model_save_path)
    os.makedirs(f"{save_path}/scalers/", exist_ok=True)
    joblib.dump(scaler, f"{save_path}/scalers/cdp_random_forest_scaler.pkl")

    # Predict
    y_pred_rf = rf_model.predict(X_test)

    # Performance Metrics
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    mae_rf = mean_absolute_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)

    # Print metrics
    print("CDP - Random Forest MSE:", mse_rf)
    print("CDP - Random Forest MAE:", mae_rf)
    print("CDP - Random Forest R²:", r2_rf)

    # Plotting
    y_pred_rf_df = pd.DataFrame(y_pred_rf, index=y_test.index, columns=y_test.columns)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X_test.index, y=y_test[cdp_target[0]], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=X_test.index, y=y_pred_rf_df[cdp_target[0]], mode='lines', name='Prediction'))
    fig.update_layout(
        title=f'CDP - {cdp_target[0]} Prediction',
        xaxis_title='Time',
        yaxis_title=f'{cdp_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/plot/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/plot/cdp_{cdp_target[0]}_prediction.html")

    # Feature Importances
    feature_importances = pd.Series(rf_model.feature_importances_, index=X_train.columns)
    feature_importances = feature_importances.sort_values(ascending=True)  # Ascending order

    feature_importances.index = feature_importances.index.map(feature_mapping)

    fig_corr = px.bar(x=feature_importances.values, y=feature_importances.index, orientation='h',
                      title=f'CDP - Feature Importances')
    fig_corr.update_layout(
        xaxis_title='Importance',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/feature/", exist_ok=True)
    fig_corr.write_image(f"{save_path}/feature/chp_feature_importance.png")

    # Correlation coefficients
    corr_matrix = cdp_data_filtered[cdp_features + cdp_target].corr()
    corr_target = corr_matrix[cdp_target[0]].drop(cdp_target[0])  # Drop the target's correlation with itself
    corr_target = corr_target.sort_values(ascending=True)  # Ascending order

    corr_target.index = corr_target.index.map(feature_mapping)

    fig_corr_coef = px.bar(x=corr_target.values, y=corr_target.index, orientation='h',
                           title=f'CDP - Feature Correlation with {cdp_target[0]}')
    fig_corr_coef.update_layout(
        xaxis_title='Correlation Coefficient',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/correlation/", exist_ok=True)
    fig_corr_coef.write_image(f"{save_path}/correlation/cdp_feature_correlation.png")

# Set your desired save path
save_path = "../visualizations/cdp_power_model_rf"

# Ensure the directory exists for saving models
os.makedirs(f"{save_path}/models/", exist_ok=True)

# Train and evaluate Random Forest model for the CDP power prediction
train_and_evaluate_random_forest_cdp(chiller_data, save_path)


### MLP

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance
import joblib
import plotly.express as px

# Function to train and evaluate MLP model for CDP
def train_and_evaluate_mlp_cdp(chiller_data, save_path):

    cdp_features = [
        'plant_cooling_rate',
        'condenser_water_loop_flow_rate'
    ]

    cdp_target = ['plant_power_all_cdps']

    # Filter data to drop rows where 'plant_number_of_running_cdps' is 0
    chiller_data_filtered = chiller_data[chiller_data['plant_number_of_running_cdps'] != 0]

    # Filter data to drop rows where any feature or target is 0
    cdp_data_filtered = chiller_data_filtered[(chiller_data_filtered[cdp_features + cdp_target] != 0).all(axis=1)].copy()

    X = cdp_data_filtered[cdp_features]
    y = cdp_data_filtered[cdp_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the MLP model
    mlp_model = MLPRegressor(hidden_layer_sizes=(100, 100), activation='relu', solver='adam', random_state=42)
    mlp_model.fit(X_train, y_train.values.ravel())

    # Save the trained model
    os.makedirs(f"{save_path}/models/", exist_ok=True)
    model_save_path = os.path.join(save_path, "models", "cdp_mlp_model.pkl")
    joblib.dump(mlp_model, model_save_path)
    os.makedirs(f"{save_path}/scalers/", exist_ok=True)
    joblib.dump(scaler, f"{save_path}/scalers/cdp_mlp_scaler.pkl")

    # Predict
    y_pred_mlp = mlp_model.predict(X_test)

    # Performance Metrics
    mse_mlp = mean_squared_error(y_test, y_pred_mlp)
    mae_mlp = mean_absolute_error(y_test, y_pred_mlp)
    r2_mlp = r2_score(y_test, y_pred_mlp)

    # Print metrics
    print("CDP - MLP MSE:", mse_mlp)
    print("CDP - MLP MAE:", mae_mlp)
    print("CDP - MLP R²:", r2_mlp)

    # Plotting
    y_pred_mlp_df = pd.DataFrame(y_pred_mlp, index=y_test.index, columns=y_test.columns)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X_test.index, y=y_test[cdp_target[0]], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=X_test.index, y=y_pred_mlp_df[cdp_target[0]], mode='lines', name='Prediction'))
    fig.update_layout(
        title=f'CDP - {cdp_target[0]} Prediction',
        xaxis_title='Time',
        yaxis_title=f'{cdp_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/plot/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/plot/cdp_{cdp_target[0]}_prediction.html")

    # Permutation Feature Importance
    perm_importance = permutation_importance(mlp_model, X_test, y_test, n_repeats=10, random_state=42)
    sorted_idx = perm_importance.importances_mean.argsort()

    # Plot Permutation Importance
    importance_df = pd.DataFrame({
        'Feature': X_test.columns[sorted_idx],
        'Importance': perm_importance.importances_mean[sorted_idx]
    })

    fig_importance = px.bar(importance_df, x='Importance', y='Feature', orientation='h',
                            title='CDP - Feature Importance')
    fig_importance.update_layout(
        xaxis_title='Importance',
        yaxis_title='Feature',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/feature/", exist_ok=True)
    fig_importance.write_image(f"{save_path}/feature/cdp_feature_importance.png")

    # Correlation coefficients
    corr_matrix = cdp_data_filtered[cdp_features + cdp_target].corr()
    corr_target = corr_matrix[cdp_target[0]].drop(cdp_target[0])  # Drop the target's correlation with itself

    fig_corr_coef = px.bar(x=corr_target.values, y=corr_target.index, orientation='h',
                           title=f'CDP - Feature Correlation with {cdp_target[0]}')
    fig_corr_coef.update_layout(
        xaxis_title='Correlation Coefficient',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/correlation/", exist_ok=True)
    fig_corr_coef.write_image(f"{save_path}/correlation/cdp_feature_correlation.png")

# Set your desired save path
save_path = "../visualizations/cdp_power_model_mlp"

# Ensure the directory exists for saving models
os.makedirs(f"{save_path}/models/", exist_ok=True)

# Train and evaluate MLP model for the CDP power prediction
train_and_evaluate_mlp_cdp(chiller_data, save_path)

## CT

### Linear Regression

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance
import joblib
import plotly.express as px


# Function to train and evaluate Linear Regression model for CT
def train_and_evaluate_linear_regression_ct(chiller_data, save_path):

    # Define feature mapping for clarity in plots
    feature_mapping = {
        'plant_cooling_rate': 'PCR',
        'condenser_water_loop_flow_rate': 'PCDWF',
        'outdoor_weather_station_wetbulb_temperature': 'WBT',
        'outdoor_weather_station_drybulb_temperature': 'DBT',
        'outdoor_weather_station_relative_humidity': 'RH',
        'outdoor_weather_station_wetbulb_temperature_squared': 'WBT²',
        'outdoor_weather_station_wetbulb_temperature_cubed': 'WBT³',
        'outdoor_weather_station_wetbulb_temperature_fourth': 'WBT4'
    }
    
    ct_features = list(feature_mapping.keys())


    # Add new features to the dataset
    chiller_data['outdoor_weather_station_wetbulb_temperature_squared'] = chiller_data['outdoor_weather_station_wetbulb_temperature'] ** 2
    chiller_data['outdoor_weather_station_wetbulb_temperature_cubed'] = chiller_data['outdoor_weather_station_wetbulb_temperature'] ** 3
    chiller_data['outdoor_weather_station_wetbulb_temperature_fourth'] = chiller_data['outdoor_weather_station_wetbulb_temperature'] ** 4

    ct_target = ['plant_power_all_cts']

    # Filter data to drop rows where 'plant_number_of_running_cts' is 0
    chiller_data_filtered = chiller_data[chiller_data['plant_number_of_running_cts'] != 0]

    # Filter data to drop rows where any feature or target is 0
    ct_data_filtered = chiller_data_filtered[(chiller_data_filtered[ct_features + ct_target] != 0).all(axis=1)].copy()

    X = ct_data_filtered[ct_features]
    y = ct_data_filtered[ct_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the Linear Regression model
    lr_model = LinearRegression()
    lr_model.fit(X_train, y_train.values.ravel())

    # Save the trained model
    os.makedirs(f"{save_path}/models/", exist_ok=True)
    model_save_path = os.path.join(save_path, "models", "ct_linear_regression_model.pkl")
    joblib.dump(lr_model, model_save_path)
    os.makedirs(f"{save_path}/scalers/", exist_ok=True)
    joblib.dump(scaler, f"{save_path}/scalers/ct_linear_regression_scaler.pkl")

    # Predict
    y_pred_lr = lr_model.predict(X_test)

    # Performance Metrics
    mse_lr = mean_squared_error(y_test, y_pred_lr)
    mae_lr = mean_absolute_error(y_test, y_pred_lr)
    r2_lr = r2_score(y_test, y_pred_lr)

    # Print metrics
    print("CT - Linear Regression MSE:", mse_lr)
    print("CT - Linear Regression MAE:", mae_lr)
    print("CT - Linear Regression R²:", r2_lr)

    # Plotting
    y_pred_lr_df = pd.DataFrame(y_pred_lr, index=y_test.index, columns=y_test.columns)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X_test.index, y=y_test[ct_target[0]], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=X_test.index, y=y_pred_lr_df[ct_target[0]], mode='lines', name='Prediction'))
    fig.update_layout(
        title=f'CT - {ct_target[0]} Prediction',
        xaxis_title='Time',
        yaxis_title=f'{ct_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/plot/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/plot/ct_{ct_target[0]}_prediction.html")

    # Permutation Feature Importance
    perm_importance = permutation_importance(lr_model, X_test, y_test, n_repeats=10, random_state=42)
    sorted_idx = perm_importance.importances_mean.argsort()

    # Plot Permutation Importance
    importance_df = pd.DataFrame({
        'Feature': [feature_mapping[feature] for feature in X.columns[sorted_idx]],
        'Importance': perm_importance.importances_mean[sorted_idx]
    })

    fig_importance = px.bar(importance_df, x='Importance', y='Feature', orientation='h',
                            title='CT - Feature Importance')
    fig_importance.update_layout(
        xaxis_title='Importance',
        yaxis_title='Feature',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/feature/", exist_ok=True)
    fig_importance.write_image(f"{save_path}/feature/ct_feature_importance.png")

    # Correlation Coefficient Plot
    corr_coef = X.corrwith(y.iloc[:, 0])
    corr_df = pd.DataFrame({
        'Feature': [feature_mapping[feature] for feature in X.columns],
        'Correlation Coefficient': corr_coef.values
    })
    corr_df = corr_df.sort_values(by='Correlation Coefficient', ascending=True)
    fig_corr = px.bar(corr_df, x='Correlation Coefficient', y='Feature', orientation='h',
                      title='CT - Correlation Coefficients')
    fig_corr.update_layout(
        xaxis_title='Correlation Coefficient',
        yaxis_title='Feature',
        legend_title='Legend'
    )
    fig_corr.write_image(f"{save_path}/feature/ct_correlation_coefficients.png")

# Set your desired save path
save_path = "../visualizations/ct_power_model_lr"

# Ensure the directory exists for saving models
os.makedirs(f"{save_path}/models/", exist_ok=True)

# Train and evaluate Linear Regression model for the CT power prediction
train_and_evaluate_linear_regression_ct(chiller_data, save_path)


### Random forest

In [None]:
import os
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import joblib
import plotly.express as px
import plotly.graph_objects as go

# Function to train and evaluate Random Forest model for CT
def train_and_evaluate_random_forest_ct(chiller_data, save_path):

    ct_features = [
        'plant_cooling_rate',
        'condenser_water_loop_flow_rate',
        'outdoor_weather_station_wetbulb_temperature',
        'outdoor_weather_station_drybulb_temperature',
        'outdoor_weather_station_relative_humidity'
    ]
    
    ct_target = ['plant_power_all_cts']

    feature_mapping = {
        'plant_cooling_rate': 'PCR',
        'condenser_water_loop_flow_rate': 'PCDWF',
        'outdoor_weather_station_wetbulb_temperature': 'WBT',
        'outdoor_weather_station_drybulb_temperature': 'DBT',
        'outdoor_weather_station_relative_humidity': 'RH',
    }

    # Filter data to drop rows where 'plant_number_of_running_cts' is 0
    chiller_data_filtered = chiller_data[chiller_data['plant_number_of_running_cts'] != 0]

    # Filter data to drop rows where any feature or target is 0
    ct_data_filtered = chiller_data_filtered[(chiller_data_filtered[ct_features + ct_target] != 0).all(axis=1)].copy()

    X = ct_data_filtered[ct_features]
    y = ct_data_filtered[ct_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the Random Forest model
    rf_model = RandomForestRegressor(random_state=42)
    rf_model.fit(X_train, y_train.values.ravel())

    # Save the trained model
    os.makedirs(f"{save_path}/models/", exist_ok=True)
    model_save_path = os.path.join(save_path, "models", "ct_random_forest_model.pkl")
    joblib.dump(rf_model, model_save_path)
    os.makedirs(f"{save_path}/scalers/", exist_ok=True)
    joblib.dump(scaler, f"{save_path}/scalers/ct_random_forest_scaler.pkl")
    # Predict
    y_pred_rf = rf_model.predict(X_test)

    # Performance Metrics
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    mae_rf = mean_absolute_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)

    # Print metrics
    print("CT - Random Forest MSE:", mse_rf)
    print("CT - Random Forest MAE:", mae_rf)
    print("CT - Random Forest R²:", r2_rf)

    # Plotting
    y_pred_rf_df = pd.DataFrame(y_pred_rf, index=y_test.index, columns=y_test.columns)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X_test.index, y=y_test[ct_target[0]], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=X_test.index, y=y_pred_rf_df[ct_target[0]], mode='lines', name='Prediction'))
    fig.update_layout(
        title=f'CT - {ct_target[0]} Prediction',
        xaxis_title='Time',
        yaxis_title=f'{ct_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/plot/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/plot/ct_{ct_target[0]}_prediction.html")

    # Feature Importances
    feature_importances = pd.Series(rf_model.feature_importances_, index=X_train.columns)
    feature_importances = feature_importances.sort_values(ascending=True)  # Ascending order

    feature_importances.index = feature_importances.index.map(feature_mapping)

    fig_corr = px.bar(x=feature_importances.values, y=feature_importances.index, orientation='h',
                      title=f'CT - Feature Importances')
    fig_corr.update_layout(
        xaxis_title='Importance',
        yaxis_title='Feature'
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/feature/", exist_ok=True)
    fig_corr.write_image(f"{save_path}/feature/chp_feature_importance.png")

    # Correlation Coefficients
    correlation_matrix = chiller_data[ct_features + ct_target].corr()
    correlation_with_target = correlation_matrix[ct_target[0]].drop(ct_target[0]).sort_values()

    # Plot Correlation Coefficients
    fig_corr = go.Figure(go.Bar(
        x=correlation_with_target.values,
        y=correlation_with_target.index,
        orientation='h'
    ))

    fig_corr.update_layout(
        title='CT - Correlation Coefficients',
        xaxis_title='Correlation Coefficient',
        yaxis_title='Feature',
        yaxis=dict(tickmode='linear')
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/feature/", exist_ok=True)
    fig_corr.write_image(f"{save_path}/feature/ct_correlation_coefficients.png")

# Set your desired save path
save_path = "../visualizations/ct_power_model_rf"

# Ensure the directory exists for saving models
os.makedirs(f"{save_path}/models/", exist_ok=True)

# Assuming chiller_data is already loaded in your environment
# Train and evaluate Random Forest model for the CT power prediction
train_and_evaluate_random_forest_ct(chiller_data, save_path)


### MLP

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import joblib
import plotly.express as px
import plotly.graph_objects as go

# Function to train and evaluate MLP model for CT
def train_and_evaluate_mlp_ct(chiller_data, save_path):

    ct_features = [
        'plant_cooling_rate',
        'condenser_water_loop_flow_rate',
        'outdoor_weather_station_wetbulb_temperature',
        'outdoor_weather_station_drybulb_temperature',
        'outdoor_weather_station_relative_humidity'
    ]

    ct_target = ['plant_power_all_cts']

    # Filter data to drop rows where 'plant_number_of_running_cts' is 0
    chiller_data_filtered = chiller_data[chiller_data['plant_number_of_running_cts'] != 0]

    # Filter data to drop rows where any feature or target is 0
    ct_data_filtered = chiller_data_filtered[(chiller_data_filtered[ct_features + ct_target] != 0).all(axis=1)].copy()

    X = ct_data_filtered[ct_features]
    y = ct_data_filtered[ct_target]

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns)  # Convert back to DataFrame with index

    train_size = int(0.6 * len(X_scaled_df))
    X_train, y_train = X_scaled_df[:train_size], y[:train_size]
    X_test, y_test = X_scaled_df[train_size:], y[train_size:]

    # Train the MLP model
    mlp_model = MLPRegressor(hidden_layer_sizes=(100, 100), activation='relu', random_state=42, max_iter=500)
    mlp_model.fit(X_train, y_train.values.ravel())

    # Save the trained model
    os.makedirs(f"{save_path}/models/", exist_ok=True)
    model_save_path = os.path.join(save_path, "models", "ct_mlp_model.pkl")
    joblib.dump(mlp_model, model_save_path)
    os.makedirs(f"{save_path}/scalers/", exist_ok=True)
    joblib.dump(scaler, f"{save_path}/scalers/ct_mlp_scaler.pkl")
    # Predict
    y_pred_mlp = mlp_model.predict(X_test)

    # Performance Metrics
    mse_mlp = mean_squared_error(y_test, y_pred_mlp)
    mae_mlp = mean_absolute_error(y_test, y_pred_mlp)
    r2_mlp = r2_score(y_test, y_pred_mlp)

    # Print metrics
    print("CT - MLP MSE:", mse_mlp)
    print("CT - MLP MAE:", mae_mlp)
    print("CT - MLP R²:", r2_mlp)

    # Plotting
    y_pred_mlp_df = pd.DataFrame(y_pred_mlp, index=y_test.index, columns=y_test.columns)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X_test.index, y=y_test[ct_target[0]], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=X_test.index, y=y_pred_mlp_df[ct_target[0]], mode='lines', name='Prediction'))
    fig.update_layout(
        title=f'CT - {ct_target[0]} Prediction',
        xaxis_title='Time',
        yaxis_title=f'{ct_target[0]}',
        legend_title='Legend'
    )
    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/plot/", exist_ok=True)

    # Save plot as HTML
    fig.write_html(f"{save_path}/plot/ct_{ct_target[0]}_prediction.html")

    # Correlation Coefficients
    correlation_matrix = chiller_data[ct_features + ct_target].corr()
    correlation_with_target = correlation_matrix[ct_target[0]].drop(ct_target[0]).sort_values()

    # Plot Correlation Coefficients
    fig_corr = go.Figure(go.Bar(
        x=correlation_with_target.values,
        y=correlation_with_target.index,
        orientation='h'
    ))

    fig_corr.update_layout(
        title='CT - Correlation Coefficients',
        xaxis_title='Correlation Coefficient',
        yaxis_title='Feature',
        yaxis=dict(tickmode='linear')
    )

    # Ensure the directory exists for saving plots
    os.makedirs(f"{save_path}/feature/", exist_ok=True)
    fig_corr.write_image(f"{save_path}/feature/ct_correlation_coefficients.png")

# Set your desired save path
save_path = "../visualizations/ct_power_model_mlp"

# Ensure the directory exists for saving models
os.makedirs(f"{save_path}/models/", exist_ok=True)

# Load your dataset
# Assuming chiller_data is already loaded in your environment

# Train and evaluate MLP model for the CT power prediction
train_and_evaluate_mlp_ct(chiller_data, save_path)


## Visualization

In [None]:
import plotly.io as pio
import chart_studio.plotly as py
import chart_studio.tools as tls


In [None]:
# Replace with your Chart Studio credentials
username = 'puminjirapu'
api_key = 'qyvau6rmqLRuYm29W5gd'

# Set credentials
tls.set_credentials_file(username=username, api_key=api_key)

# Load the saved Plotly HTML file
fig = pio.read_html("../visualizations/ct_power_model_lr/plot/ct_plant_power_all_cts_prediction.html")
# Upload figure to Chart Studio
py.plot(fig, filename='uploaded-plot', auto_open=True)


# MPC

In [None]:
import numpy as np
import math
import pandas as pd
from scipy.optimize import minimize
from sklearn.preprocessing import StandardScaler
import random
import matplotlib.pyplot as plt
import random
import math
import numpy as np
from scipy.optimize import minimize
import itertools
import joblib

# Constants
N_CHILLERS = 5
MIN_CHW_FLOW = 400  # GPM
MAX_CHW_FLOW = 2500  # GPM
MIN_CDW_FLOW = 500  # GPM
MAX_CDW_FLOW = 3000  # GPM
MAX_CHR_TEMP = 59  # °F
CHILLER_CAPACITY = 800  # Tons

# Load data
data = pd.read_parquet('chiller_data_pre.parquet')
data = data[-100:]

def chiller_power(chiller_mode, chiller_id, chw_setpoint, chw_flow, cdw_flow, cooling_rate):
    # Check for zero input features
    if any(x == 0 for x in [chiller_mode, chw_setpoint, chw_flow, cdw_flow, cooling_rate]):
        return 0
    
    if chiller_id == 1 or chiller_id == 2:
        model_path = f"../visualizations/power_model_rf/models/chiller_{chiller_id}_random_forest_model.pkl"
    else:
        model_path = f"../visualizations/power_model_mlp/models/chiller_{chiller_id}_mlp_model.pkl"
    model = joblib.load(model_path)
    # Prepare input data as a DataFrame
    input_data = {
        f'chiller_{chiller_id}_evap_leaving_water_temperature': [chw_setpoint],
        f'chiller_{chiller_id}_evap_water_flow_rate': [chw_flow],
        f'chiller_{chiller_id}_cond_water_flow_rate': [cdw_flow],
        f'chiller_{chiller_id}_cooling_rate': [cooling_rate]
    }

    # Convert input data to DataFrame
    input_data = pd.DataFrame(input_data)

    # input_data[f'chiller_{chiller_id}_evap_leaving_water_temperature_squared'] = input_data[f'chiller_{chiller_id}_evap_leaving_water_temperature'] ** 2
    # input_data[f'chiller_{chiller_id}_evap_leaving_water_temperature_cond_flow'] = input_data[f'chiller_{chiller_id}_evap_leaving_water_temperature'] * input_data[f'chiller_{chiller_id}_cond_water_flow_rate']
    # input_data[f'chiller_{chiller_id}_cond_water_flow_rate_squared'] = input_data[f'chiller_{chiller_id}_cond_water_flow_rate'] ** 2
    # input_data[f'chiller_{chiller_id}_evap_water_flow_rate_squared'] = input_data[f'chiller_{chiller_id}_evap_water_flow_rate'] ** 2
    # input_data[f'chiller_{chiller_id}_evap_leaving_water_temperature_evap_water_flow_rate'] = input_data[f'chiller_{chiller_id}_evap_leaving_water_temperature'] * input_data[f'chiller_{chiller_id}_evap_water_flow_rate']

    # Scale input features if necessary (assuming StandardScaler was used during training)
    if chiller_id == 1 or chiller_id == 2:
        scaler = joblib.load(f"../visualizations/power_model_rf/scalers/chiller_{chiller_id}_random_forest_scaler.pkl")  # Use the same scaler as during training
    else:
        scaler = joblib.load(f"../visualizations/power_model_mlp/scalers/chiller_{chiller_id}_mlp_scaler.pkl")
    input_scaled = scaler.transform(input_data)  # fit_transform for new data
    input_scaled_df = pd.DataFrame(input_scaled, columns=input_data.columns)

    # Predict chiller power
    chiller_power_prediction = model.predict(input_scaled_df)

    return chiller_power_prediction[0]

def chp_power(chiller_modes, chw_flows, cooling_load):
    total_chw_flow = sum([flow * mode for flow, mode in zip(chw_flows, chiller_modes)])
    if cooling_load == 0 or total_chw_flow:
        return 0
    model = joblib.load('../visualizations/chp_power_model_lr/models/chp_linear_regression_model.pkl')
    # Prepare input data as a DataFrame
    input_data = {
        'plant_cooling_rate': [cooling_load],
        'chilled_water_loop_flow_rate': [total_chw_flow]
    }

    # Convert input data to DataFrame
    input_df = pd.DataFrame(input_data)

    # Add new features to the input data
    input_df['chilled_water_loop_flow_rate_squared'] = input_df['chilled_water_loop_flow_rate'] ** 2
    input_df['chilled_water_loop_flow_rate_cubed'] = input_df['chilled_water_loop_flow_rate'] ** 3

    # Scale input features if necessary (assuming StandardScaler was used during training)
    scaler = joblib.load(f"../visualizations/chp_power_model_lr/scalers/chp_linear_regression_scaler.pkl")  # Use the same scaler as during training
    input_scaled = scaler.transform(input_df)  # fit_transform for new data
    input_scaled_df = pd.DataFrame(input_scaled, columns=input_df.columns)

    # Predict CHP power
    chp_power_prediction = model.predict(input_scaled_df)

    return chp_power_prediction[0]

def cdp_power(chiller_modes, cdw_flows, cooling_load):
    total_cdw_flow = sum([flow * mode for flow, mode in zip(cdw_flows, chiller_modes)])
    if cooling_load == 0 or total_cdw_flow==0:
        return 0
    model = joblib.load('../visualizations/cdp_power_model_lr/models/cdp_linear_regression_model.pkl')
    # Prepare input data as a DataFrame
    input_data = {
        'plant_cooling_rate': [cooling_load],
        'condenser_water_loop_flow_rate': [total_cdw_flow]
    }

    # Convert input data to DataFrame
    input_df = pd.DataFrame(input_data)

    # Add new features to the input data
    input_df['condenser_water_loop_flow_rate_squared'] = input_df['condenser_water_loop_flow_rate'] ** 2
    input_df['condenser_water_loop_flow_rate_cubed'] = input_df['condenser_water_loop_flow_rate'] ** 3

    # Scale input features if necessary (assuming StandardScaler was used during training)
    scaler = joblib.load(f"../visualizations/cdp_power_model_lr/scalers/cdp_linear_regression_scaler.pkl")  # Use the same scaler as during training
    input_scaled = scaler.transform(input_df)  # fit_transform for new data
    input_scaled_df = pd.DataFrame(input_scaled, columns=input_df.columns)

    # Predict CDP power
    cdp_power_prediction = model.predict(input_scaled_df)

    return cdp_power_prediction[0]

def ct_power(chiller_modes, cdw_flows, cooling_load, state):
    total_cdw_flow = sum([flow * mode for flow, mode in zip(cdw_flows, chiller_modes)])
    if cooling_load == 0 or total_cdw_flow==0:
        return 0
    model = joblib.load('../visualizations/ct_power_model_lr/models/ct_linear_regression_model.pkl')
    # Prepare input data as a DataFrame
    input_data = {
        'plant_cooling_rate': [cooling_load],
        'condenser_water_loop_flow_rate': [total_cdw_flow],
        'outdoor_weather_station_wetbulb_temperature': [state['outdoor_weather_station_wetbulb_temperature']],
        'outdoor_weather_station_drybulb_temperature': [state['outdoor_weather_station_drybulb_temperature']],
        'outdoor_weather_station_relative_humidity': [state['outdoor_weather_station_relative_humidity']]
    }

    # Convert input data to DataFrame
    input_df = pd.DataFrame(input_data)

    # Add new features to the input data
    input_df['outdoor_weather_station_wetbulb_temperature_squared'] = input_df['outdoor_weather_station_wetbulb_temperature'] ** 2
    input_df['outdoor_weather_station_wetbulb_temperature_cubed'] = input_df['outdoor_weather_station_wetbulb_temperature'] ** 3
    input_df['outdoor_weather_station_wetbulb_temperature_fourth'] = input_df['outdoor_weather_station_wetbulb_temperature'] ** 4

    # Scale input features if necessary (assuming StandardScaler was used during training)
    scaler = joblib.load(f"../visualizations/ct_power_model_lr/scalers/ct_linear_regression_scaler.pkl")  # Use the same scaler as during training
    input_scaled = scaler.transform(input_df)  # fit_transform for new data
    input_scaled_df = pd.DataFrame(input_scaled, columns=input_df.columns)

    # Predict CT power
    ct_power_prediction = model.predict(input_scaled_df)

    return ct_power_prediction[0]

def chw_temp_out(state, chiller_modes, chw_setpoints, cooling_load, chw_flows):
    total_chw_flow = sum([flow * mode for flow, mode in zip(chw_flows, chiller_modes)])
    actual_chs = sum([flow * chw_setpoint for flow, chw_setpoint in zip(chw_flows, chw_setpoints)]) / total_chw_flow \
        if total_chw_flow > 0 else state['plant_target_chw_setpoint']
    chr_temp = actual_chs + (24 * cooling_load / total_chw_flow if total_chw_flow != 0 else 0)
    return chr_temp

def chiller_cooling_rate(chiller_mode, chw_flow, chw_setpoint, chr_temp):
    return chw_flow * (chr_temp - chw_setpoint) / 24

# Chiller change tracking
class ChillerChangeTracker:
    def __init__(self, n_chillers):
        self.n_chillers = n_chillers
        self.changes = np.zeros(n_chillers, dtype=int)
    
    def reset(self):
        self.changes.fill(0)
    
    def can_change(self, chiller_index):
        return self.changes[chiller_index] < 1
    
    def record_change(self, chiller_index):
        self.changes[chiller_index] += 1

def system_model(state, inputs, cooling_load):
    n_inputs = len(inputs)
    n_chillers = N_CHILLERS
    
    chiller_modes = [int(round(x)) for x in inputs[:n_chillers]]
    chw_setpoints = inputs[n_chillers:2*n_chillers]
    chw_flows = inputs[2*n_chillers:3*n_chillers]
    cdw_flows = inputs[3*n_chillers:] if len(inputs) > 3*n_chillers else [0] * n_chillers

    # Ensure non-zero values for active chillers and zero for inactive ones
    for i in range(n_chillers):
        if chiller_modes[i] == 0:
            chw_setpoints[i] = 0
            chw_flows[i] = 0
            cdw_flows[i] = 0
        else:
            chw_setpoints[i] = max(40, min(50, chw_setpoints[i]))
            chw_flows[i] = max(MIN_CHW_FLOW, chw_flows[i])
            cdw_flows[i] = max(MIN_CDW_FLOW, cdw_flows[i])

    print(f"Debug: chiller_modes = {chiller_modes}")
    print(f"Debug: chw_setpoints = {chw_setpoints}")
    print(f"Debug: chw_flows = {chw_flows}")
    print(f"Debug: cdw_flows = {cdw_flows}")

    chr_temp = chw_temp_out(state, chiller_modes, chw_setpoints, cooling_load, chw_flows)
    clr_chillers = [chiller_cooling_rate(chiller_modes[i], chw_flows[i], chw_setpoints[i], chr_temp) for i in range(n_chillers)]

    total_power = 0
    for i in range(n_chillers):
        if chiller_modes[i]:
            total_power += chiller_power(chiller_modes[i], i+1, chw_setpoints[i], chw_flows[i], cdw_flows[i], clr_chillers[i])
    
    total_power += chp_power(chiller_modes, chw_flows, cooling_load)
    total_power += cdp_power(chiller_modes, cdw_flows, cooling_load)
    total_power += ct_power(chiller_modes, cdw_flows, cooling_load, state)
    print(f"Debug: total_cal power = {total_power}")
    return state, total_power

def objective(inputs, current_state, cooling_load):
    _, power = system_model(current_state, inputs, cooling_load)
    return power

def constraints(inputs, current_state, cooling_load, prev_chiller_modes, chiller_tracker):
    chiller_modes = inputs[:N_CHILLERS]
    chw_setpoints = inputs[N_CHILLERS:2*N_CHILLERS]
    chw_flows = inputs[2*N_CHILLERS:3*N_CHILLERS]
    cdw_flows = inputs[3*N_CHILLERS:]
    chr_temp = chw_temp_out(current_state, chiller_modes, chw_setpoints, cooling_load, chw_flows)
    constraints = []
    
    # 1. Sufficient cooling capacity
    total_capacity = sum(chiller_modes) * CHILLER_CAPACITY
    constraints.append(total_capacity - cooling_load)
    
    # 1.a Sufficient cooling capacity each chiller
    clr_chillers = [chiller_cooling_rate(chiller_modes[i], chw_flows[i], chw_setpoints[i], chr_temp) for i in range(N_CHILLERS)]
    for i in range(N_CHILLERS):
        constraints.append(CHILLER_CAPACITY - clr_chillers[i])
    
    # 2. Chiller on/off limit (not more than once per day)
    for i in range(N_CHILLERS):
        if chiller_modes[i] != prev_chiller_modes[i] and not chiller_tracker.can_change(i):
            constraints.append(-1)  # This will make the solution invalid
        else:
            constraints.append(0)
    
    # 3. Flow rate limits
    for i in range(N_CHILLERS):
        if chiller_modes[i] != 0 and chw_flows[i] != 0:
            constraints.append(chw_flows[i] - MIN_CHW_FLOW)
            constraints.append(MAX_CHW_FLOW - chw_flows[i])
        elif chiller_modes[i] == 0 and chw_flows[i] == 0:
            constraints.append(0)
            constraints.append(0)
        else:
            constraints.append(-1)
            constraints.append(-1)

    for i in range(N_CHILLERS):
        if chiller_modes[i] != 0 and cdw_flows[i] != 0:
            constraints.append(cdw_flows[i] - MIN_CDW_FLOW)
            constraints.append(MAX_CDW_FLOW - cdw_flows[i])
        elif chiller_modes[i] == 0 and cdw_flows[i] == 0:
            constraints.append(0)
            constraints.append(0)
        else:
            constraints.append(-1)
            constraints.append(-1)
    
    #4. Returning chilled water temperature limit
    constraints.append(MAX_CHR_TEMP - chr_temp)
    
    #5. Combined chilled water supply temperature
    target_chs = current_state['plant_target_chw_setpoint']
    total_chw_flow = sum([flow * mode for flow, mode in zip(chw_flows, chiller_modes)])
    actual_chs = sum([flow * chw_setpoint for flow, chw_setpoint in zip(chw_flows, chw_setpoints)]) / total_chw_flow \
        if total_chw_flow > 0 else target_chs
    constraints.append(0.1 - abs(actual_chs - target_chs))
    
    return constraints

def get_min_chillers_required(cooling_load):
    return max(1, math.ceil(cooling_load / CHILLER_CAPACITY))

def can_use_previous_modes(prev_modes, min_chillers_required):
    active_chillers = sum(prev_modes)
    return min_chillers_required <= active_chillers <= min_chillers_required + 1

def get_valid_chiller_combinations(min_chillers_required, prev_modes, chiller_tracker):
    valid_combinations = []
    for combo in itertools.combinations(range(N_CHILLERS), min_chillers_required):
        modes = [1 if i in combo else 0 for i in range(N_CHILLERS)]
        if all(modes[i] == prev_modes[i] or chiller_tracker.can_change(i) for i in range(N_CHILLERS)):
            valid_combinations.append(modes)
    return valid_combinations

def check_constraints(inputs, current_state, cooling_load, prev_chiller_modes, chiller_tracker, tolerance=1e-6):
    constraints_values = constraints(inputs, current_state, cooling_load, prev_chiller_modes, chiller_tracker)
    violated_constraints = []
    for i, c in enumerate(constraints_values):
        if c < -tolerance:
            violated_constraints.append((i, c))
    return violated_constraints

def optimize_continuous_vars(discrete_vars, cooling_load, current_state, prev_chiller_modes, chiller_tracker):
    def objective_wrapper(x):
        full_input = create_full_input(discrete_vars, x)
        return objective(full_input, current_state, cooling_load)

    def constraint_wrapper(x):
        full_input = create_full_input(discrete_vars, x)
        return constraints(full_input, current_state, cooling_load, prev_chiller_modes, chiller_tracker)

    def create_full_input(discrete_vars, x):
        chw_setpoints = []
        chw_flows = []
        cdw_flows = []
        x_index = 0
        for i in range(N_CHILLERS):
            if discrete_vars[i] == 1:
                chw_setpoints.append(x[x_index])
                chw_flows.append(x[x_index + 1])
                cdw_flows.append(x[x_index + 2])
                x_index += 3
            else:
                chw_setpoints.append(0)
                chw_flows.append(0)
                cdw_flows.append(0)
        return discrete_vars + chw_setpoints + chw_flows + cdw_flows

    bounds = []
    initial_guess = []
    for i in range(N_CHILLERS):
        if discrete_vars[i] == 1:
            chw_setpoint = random.uniform(40, 50)
            chw_flow = random.uniform(MIN_CHW_FLOW, MAX_CHW_FLOW)
            cdw_flow = random.uniform(MIN_CDW_FLOW, MAX_CDW_FLOW)
            
            initial_guess.extend([chw_setpoint, chw_flow, cdw_flow])
            bounds.extend([(40, 50), (MIN_CHW_FLOW, MAX_CHW_FLOW), (MIN_CDW_FLOW, MAX_CDW_FLOW)])

    result = minimize(
        objective_wrapper,
        initial_guess,
        method='SLSQP',
        bounds=bounds,
        constraints={'type': 'ineq', 'fun': constraint_wrapper},
        options={'maxiter': 100, 'ftol': 1e-6}
    )

    if result.success:
        best_solution = result.x
        best_power = result.fun
    else:
        print("Optimization failed. Using initial guess.")
        best_solution = initial_guess
        best_power = objective_wrapper(initial_guess)

    chw_setpoints = [0] * N_CHILLERS
    chw_flows = [0] * N_CHILLERS
    cdw_flows = [0] * N_CHILLERS

    var_index = 0
    for i in range(N_CHILLERS):
        if discrete_vars[i] == 1:
            chw_setpoints[i] = best_solution[var_index]
            chw_flows[i] = best_solution[var_index + 1]
            cdw_flows[i] = best_solution[var_index + 2]
            var_index += 3

    full_output = chw_setpoints + chw_flows + cdw_flows

    print(f"Best solution found - Power: {best_power}")
    print("Debug: Best solution chiller modes =", discrete_vars)
    print("Debug: Best solution chw_setpoints =", chw_setpoints)
    print("Debug: Best solution chw_flows =", chw_flows)
    print("Debug: Best solution cdw_flows =", cdw_flows)
    return full_output, best_power

def mpc_control(initial_state):
    current_state = initial_state.copy()
    optimal_trajectory = []
    prev_chiller_modes = [0] * N_CHILLERS
    chiller_tracker = ChillerChangeTracker(N_CHILLERS)
    
    for t in range(len(data)):
        cooling_load = data.iloc[t]['plant_cooling_rate']
        current_state['outdoor_weather_station_wetbulb_temperature'] = data.iloc[t]['outdoor_weather_station_wetbulb_temperature']
        current_state['outdoor_weather_station_drybulb_temperature'] = data.iloc[t]['outdoor_weather_station_drybulb_temperature']
        current_state['outdoor_weather_station_relative_humidity'] = data.iloc[t]['outdoor_weather_station_relative_humidity']
        current_state['plant_target_chw_setpoint'] = data.iloc[t]['plant_target_chw_setpoint']
        current_state['new_day'] = data.iloc[t]['new_day']
        
        if current_state['new_day'] == 1:
            chiller_tracker.reset()

        print(f"\nTime step: {t}")
        print(f"Cooling load: {cooling_load}")
        print(f"Previous chiller modes: {prev_chiller_modes}")
        print(f"Current state: {current_state}")

        min_chillers_required = get_min_chillers_required(cooling_load)

        if can_use_previous_modes(prev_chiller_modes, min_chillers_required):
            discrete_vars = prev_chiller_modes.copy()
            
            # Check if we need to turn on or off chillers
            active_chillers = sum(discrete_vars)
            if active_chillers > min_chillers_required:
                # Consider turning off excess chillers
                while sum(discrete_vars) > min_chillers_required:
                    chiller_to_turn_off = random.choice([i for i in range(N_CHILLERS) if discrete_vars[i] == 1])
                    if chiller_tracker.can_change(chiller_to_turn_off):
                        discrete_vars[chiller_to_turn_off] = 0
                    else:
                        break
            elif active_chillers < min_chillers_required:
                # Consider turning on additional chillers
                while sum(discrete_vars) < min_chillers_required:
                    chiller_to_turn_on = random.choice([i for i in range(N_CHILLERS) if discrete_vars[i] == 0])
                    if chiller_tracker.can_change(chiller_to_turn_on):
                        discrete_vars[chiller_to_turn_on] = 1
                    else:
                        break
            print("Using modified previous chiller modes")
        else:
            valid_combinations = get_valid_chiller_combinations(min_chillers_required, prev_chiller_modes, chiller_tracker)
            if not valid_combinations:
                print("No valid combinations found. Using random combination.")
                discrete_vars = [0] * N_CHILLERS
                active_indices = random.sample(range(N_CHILLERS), min_chillers_required)
                for idx in active_indices:
                    discrete_vars[idx] = 1
            else:
                best_combination = None
                best_power = float('inf')
                for combo in valid_combinations:
                    continuous_vars, power = optimize_continuous_vars(combo, cooling_load, current_state, prev_chiller_modes, chiller_tracker)
                    full_input = combo + continuous_vars
                    # power = objective(full_input, current_state, cooling_load)
                    print(f"Testing combination {combo} - Power: {power}")
                    if power < best_power:
                        best_power = power
                        best_combination = combo
                discrete_vars = best_combination
            print(f"Selected discrete vars: {discrete_vars}")

        continuous_vars, optimal_power = optimize_continuous_vars(discrete_vars, cooling_load, current_state, prev_chiller_modes, chiller_tracker)
        optimal_inputs = discrete_vars + continuous_vars
        constraints_values = constraints(optimal_inputs, current_state, cooling_load, prev_chiller_modes, chiller_tracker)
        print("Debug: Constraints:", constraints_values)
        if any(c < 0 for c in constraints_values):
            print("Warning: Final solution violates constraints!")
            for i, c in enumerate(constraints_values):
                if c < 0:
                    print(f"Constraint {i} violated: {c}")
        print("\nDebug: Optimal inputs:", optimal_inputs)
        print(f"Optimal power: {optimal_power}")
        print(f"Final chiller modes: {discrete_vars}")
        print(f"Final CHW setpoints: {continuous_vars[:N_CHILLERS]}")
        print(f"Final CHW flows: {continuous_vars[N_CHILLERS:2*N_CHILLERS]}")
        print(f"Final CDW flows: {continuous_vars[2*N_CHILLERS:]}")

        next_state, actual_power = system_model(current_state, optimal_inputs, cooling_load)
        
        all_changed = True
        for i in range(N_CHILLERS):
            if chiller_tracker.can_change(i) == 0:
                all_changed = False
        if not all_changed:
            for i in range(N_CHILLERS):
                if discrete_vars[i] != prev_chiller_modes[i]:
                    if chiller_tracker.can_change(i):
                        chiller_tracker.record_change(i)
        
        optimal_trajectory.append((optimal_inputs, actual_power))
        # current_state = next_state.copy()  # Make sure to copy the state
        prev_chiller_modes = discrete_vars.copy()  # Make sure to copy the modes

    return optimal_trajectory

# Run the optimization
initial_state = {
    'outdoor_weather_station_wetbulb_temperature': data.iloc[0]['outdoor_weather_station_wetbulb_temperature'],
    'outdoor_weather_station_drybulb_temperature': data.iloc[0]['outdoor_weather_station_drybulb_temperature'],
    'outdoor_weather_station_relative_humidity': data.iloc[0]['outdoor_weather_station_relative_humidity'],
    'plant_target_chw_setpoint': data.iloc[0]['plant_target_chw_setpoint'],
    'new_day': 0
}

optimal_trajectory = mpc_control(initial_state)


In [None]:
import cProfile
cProfile.run('mpc_control(initial_state)')

#### Normal plot

In [None]:
# Evaluate and visualize results
actual_power = data['plant_power'].values
predicted_power = [t[1] for t in optimal_trajectory]

plt.figure(figsize=(12, 6))
plt.plot(actual_power, label='Actual Power')
plt.plot(predicted_power, label='Optimized Power')
plt.legend()
plt.title('Actual vs Optimized Power Consumption')
plt.xlabel('Time')
plt.ylabel('Power (kW)')
plt.show()

# Calculate improvement
improvement = (sum(actual_power) - sum(predicted_power)) / sum(actual_power) * 100
print(f"Total power consumption reduction: {improvement:.2f}%")

# Visualize chiller modes
chiller_modes = np.array([t[0][:N_CHILLERS] for t in optimal_trajectory])
plt.figure(figsize=(12, 6))
for i in range(N_CHILLERS):
    plt.plot(chiller_modes[:, i], label=f'Chiller {i+1}')
plt.legend()
plt.title('Chiller Operation Modes')
plt.xlabel('Time')
plt.ylabel('On/Off')
plt.yticks([0, 1])
plt.show()

# Visualize setpoints and flows
setpoints = np.array([t[0][N_CHILLERS:2*N_CHILLERS] for t in optimal_trajectory])
chw_flows = np.array([t[0][2*N_CHILLERS:3*N_CHILLERS] for t in optimal_trajectory])
cdw_flows = np.array([t[0][3*N_CHILLERS:] for t in optimal_trajectory])

plt.figure(figsize=(12, 6))
for i in range(N_CHILLERS):
    plt.plot(setpoints[:, i], label=f'Chiller {i+1} Setpoint')
plt.legend()
plt.title('Chiller Setpoints')
plt.xlabel('Time')
plt.ylabel('Temperature (°F)')
plt.show()

plt.figure(figsize=(12, 6))
for i in range(N_CHILLERS):
    plt.plot(chw_flows[:, i], label=f'Chiller {i+1} CHW Flow')
plt.legend()
plt.title('Chilled Water Flow Rates')
plt.xlabel('Time')
plt.ylabel('Flow Rate (GPM)')
plt.show()

plt.figure(figsize=(12, 6))
for i in range(N_CHILLERS):
    plt.plot(cdw_flows[:, i], label=f'Chiller {i+1} CDW Flow')
plt.legend()
plt.title('Condenser Water Flow Rates')
plt.xlabel('Time')
plt.ylabel('Flow Rate (GPM)')
plt.show()

#### PLotly plot

In [None]:
import plotly.graph_objects as go

# Actual vs Optimized Power Consumption
fig1 = go.Figure()

# Add Actual Power trace
fig1.add_trace(go.Scatter(x=np.arange(len(actual_power)), y=actual_power, mode='lines', name='Actual Power'))

# Add Optimized Power trace
fig1.add_trace(go.Scatter(x=np.arange(len(predicted_power)), y=predicted_power, mode='lines', name='Optimized Power'))

# Update layout
fig1.update_layout(
    title='Actual vs Optimized Power Consumption',
    xaxis_title='Time',
    yaxis_title='Power (kW)',
    legend=dict(x=0, y=1, traceorder='normal'),
    height=600,
    width=1200
)

fig1.show()


In [None]:
# Calculate improvement
improvement = (sum(actual_power) - sum(predicted_power)) / sum(actual_power) * 100

print(f"Total power consumption reduction: {improvement:.2f}%")


In [None]:
# Chiller Operation Modes
fig2 = go.Figure()

# Add traces for each chiller mode
for i in range(N_CHILLERS):
    fig2.add_trace(go.Scatter(x=np.arange(len(chiller_modes)), y=chiller_modes[:, i], mode='lines', name=f'Chiller {i+1}'))

# Update layout
fig2.update_layout(
    title='Chiller Operation Modes',
    xaxis_title='Time',
    yaxis_title='On/Off',
    legend=dict(x=0, y=1, traceorder='normal'),
    yaxis=dict(tickvals=[0, 1]),
    height=600,
    width=1200
)

fig2.show()

In [None]:
# Chiller Setpoints
fig3 = go.Figure()

# Add traces for each chiller setpoint
for i in range(N_CHILLERS):
    fig3.add_trace(go.Scatter(x=np.arange(len(setpoints)), y=setpoints[:, i], mode='lines', name=f'Chiller {i+1} Setpoint'))

# Update layout
fig3.update_layout(
    title='Chiller Setpoints',
    xaxis_title='Time',
    yaxis_title='Temperature (°F)',
    legend=dict(x=1, y=1, traceorder='normal'),  # Move legend to top right corner
    height=600,
    width=1200
)

fig3.show()


In [None]:
import plotly.graph_objects as go
import numpy as np

# Assuming setpoints and chw_flows are available
# Calculate total setpoint weighted by flow rate and handle division by zero
total_setpoint_weighted = np.sum(setpoints * chw_flows / np.maximum(np.sum(chw_flows, axis=1, keepdims=True), 1e-6), axis=1)
total_actual_chws = data['plant_target_chw_setpoint']  # Replace with your actual CHWS data

# Create figure
fig = go.Figure()

# Add traces for total setpoint weighted and total actual CHWS
fig.add_trace(go.Scatter(x=np.arange(len(total_setpoint_weighted)), y=total_setpoint_weighted, mode='lines', name='Total Setpoint Weighted'))
fig.add_trace(go.Scatter(x=np.arange(len(total_actual_chws)), y=total_actual_chws, mode='lines', name='Total Actual CHWS'))

# Update layout
fig.update_layout(
    title='Total Target Setpoint (Weighted by Flow) vs Actual CHWS',
    xaxis_title='Time',
    yaxis_title='Temperature (°F)',
    legend=dict(x=1, y=1, traceorder='normal'),  # Move legend to top right corner
    height=600,
    width=1200
)

fig.show()


In [None]:
import plotly.graph_objects as go
import numpy as np

# Assuming cooling_load, chw_flows, and setpoints are available
# Calculate total setpoint weighted by flow rate and handle division by zero
total_setpoint_weighted = np.sum(setpoints * chw_flows / np.maximum(np.sum(chw_flows, axis=1, keepdims=True), 1e-6), axis=1)

# Calculate optimized CHWR
chw_flows_sum = np.sum(chw_flows, axis=1)
optimized_chwr = (data['plant_cooling_rate'] * 24) / np.where(chw_flows_sum != 0, chw_flows_sum, 1e-6) + total_setpoint_weighted

# Create figure
fig = go.Figure()

# Add trace for optimized CHWR
fig.add_trace(go.Scatter(x=np.arange(len(optimized_chwr)), y=optimized_chwr, mode='lines', name='Optimized CHWR'))

# Update layout
fig.update_layout(
    title='Optimized CHWR',
    xaxis_title='Time',
    yaxis_title='Temperature (°F)',
    legend=dict(x=1, y=1, traceorder='normal'),  # Move legend to top right corner
    height=600,
    width=1200
)

fig.show()


In [None]:
# Chilled Water Flow Rates
fig4 = go.Figure()

# Add traces for each chiller CHW flow rate
for i in range(N_CHILLERS):
    fig4.add_trace(go.Scatter(x=np.arange(len(chw_flows)), y=chw_flows[:, i], mode='lines', name=f'Chiller {i+1} CHW Flow'))

# Update layout
fig4.update_layout(
    title='Chilled Water Flow Rates',
    xaxis_title='Time',
    yaxis_title='Flow Rate (GPM)',
    legend=dict(x=1, y=1, traceorder='normal'),  # Move legend to top right corner
    height=600,
    width=1200
)

fig4.show()


In [None]:
# Condenser Water Flow Rates
fig5 = go.Figure()

# Add traces for each chiller CDW flow rate
for i in range(N_CHILLERS):
    fig5.add_trace(go.Scatter(x=np.arange(len(cdw_flows)), y=cdw_flows[:, i], mode='lines', name=f'Chiller {i+1} CDW Flow'))

# Update layout
fig5.update_layout(
    title='Condenser Water Flow Rates',
    xaxis_title='Time',
    yaxis_title='Flow Rate (GPM)',
    legend=dict(x=1, y=1, traceorder='normal'),  # Move legend to top right corner
    height=600,
    width=1200
)

fig5.show()


In [None]:
import numpy as np
import plotly.graph_objects as go

# Calculate plant efficiency from actual and optimized power
actual_power = data['plant_power'].values
predicted_power = [t[1] for t in optimal_trajectory]  # Assuming optimal_trajectory contains optimized power
actual_cooling_rate = data['plant_cooling_rate'].values  # Cooling rate in tons

# Calculate efficiency (kW/ton)
actual_efficiency = actual_power / actual_cooling_rate
optimized_efficiency = np.array(predicted_power) / actual_cooling_rate

# Plant Efficiency Plot
fig_efficiency = go.Figure()

# Add actual efficiency trace
fig_efficiency.add_trace(go.Scatter(x=np.arange(len(actual_efficiency)), y=actual_efficiency, mode='lines', name='Actual Efficiency'))

# Add optimized efficiency trace
fig_efficiency.add_trace(go.Scatter(x=np.arange(len(optimized_efficiency)), y=optimized_efficiency, mode='lines', name='Optimized Efficiency'))

# Update layout
fig_efficiency.update_layout(
    title='Plant Efficiency: Actual vs Optimized',
    xaxis_title='Time',
    yaxis_title='Efficiency (kW/ton)',
    legend=dict(x=0, y=1, traceorder='normal'),
    height=600,
    width=1200
)

fig_efficiency.show()

