In [24]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow.keras.layers import Input, Dense, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.losses import mse
import optuna

# Load and preprocess the data
data = pd.read_csv('/Users/hsiaopingni/Desktop/Hsiao-Ping PhD/Data/paper- Chiller/Anomaly_Detection_Results_VAE.csv', encoding='unicode_escape')
data = data[['ï»¿Discharge Temp (F)', 'Input % full load amps (Motor) (%)', 'Condenser liq temp IN (F)',
             'Condenser liq temp OUT (F)', 'Chilled liq temp IN (F)', 'Chilled liq temp OUT (F)',
             'Condenser saturation (F)', 'Evaporator saturation (F)', 'Evaporator pressure (PSIG)',
             'Condenser pressure (PSIG)', 'Oil sump temp (F)', 'Oil pressure (PSIG)', 'Anomaly']]

# Normalize the data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data.drop(columns=['Anomaly']))  # Normalize only the features

# Separate anomalies and normal data
anomalies = data[data['Anomaly'] == 1]  # Assuming '1' is the label for anomalies
normal_data = data[data['Anomaly'] == 0]  # Assuming '0' is the label for normal data

# Normalize the data separately for anomalies and normal data
anomalies_scaled = scaler.transform(anomalies.drop(columns=['Anomaly']))
normal_data_scaled = scaler.transform(normal_data.drop(columns=['Anomaly']))

# Define the Sampling layer for VAE
class Sampling(Layer):
    def call(self, inputs):
        mean, log_var = inputs
        batch = tf.shape(mean)[0]
        dim = tf.shape(mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return mean + tf.exp(0.5 * log_var) * epsilon

# Define VAE architecture
def build_vae(input_dim, hidden_dims, latent_dim):
    # Encoder
    inputs = Input(shape=(input_dim,))
    x = inputs
    for dim in hidden_dims:
        x = Dense(dim, activation='relu')(x)
    mean = Dense(latent_dim)(x)
    log_var = Dense(latent_dim)(x)
    z = Sampling()([mean, log_var])
    encoder = Model(inputs, [mean, log_var, z], name="encoder")
    
    # Decoder
    latent_inputs = Input(shape=(latent_dim,))
    x = latent_inputs
    for dim in reversed(hidden_dims):
        x = Dense(dim, activation='relu')(x)
    outputs = Dense(input_dim, activation='sigmoid')(x)
    decoder = Model(latent_inputs, outputs, name="decoder")
    
    # VAE Model
    reconstructed = decoder(encoder(inputs)[2])
    vae = Model(inputs, reconstructed, name="vae")
    
    # VAE Loss
    reconstruction_loss = mse(inputs, reconstructed) * input_dim
    kl_loss = 1 + log_var - tf.square(mean) - tf.exp(log_var)
    kl_loss = tf.reduce_sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    vae_loss = tf.reduce_mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)
    
    return vae, encoder, decoder

# Define the objective function for Optuna
def objective(trial):
    # Suggest hyperparameters using TPE
    hidden_dim1 = trial.suggest_int('hidden_dim1', 32, 128, step=16)
    hidden_dim2 = trial.suggest_int('hidden_dim2', 16, 64, step=16)
    latent_dim = trial.suggest_int('latent_dim', 2, 16, step=2)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
    batch_size = trial.suggest_int('batch_size', 16, 64, step=16)
    
    hidden_dims = [hidden_dim1, hidden_dim2]
    
    # Split data into training and validation sets
    X_train, X_val = train_test_split(data_scaled, test_size=0.2, random_state=42)

    # Train the VAE using training and validation data
    vae, encoder, decoder = build_vae(input_dim=data_scaled.shape[1], hidden_dims=hidden_dims, latent_dim=latent_dim)
    vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))
    history = vae.fit(X_train, X_train, validation_data=(X_val, X_val), epochs=100, batch_size=batch_size, verbose=0)
    
    # Access 'val_loss' after adding validation data
    val_loss = min(history.history['val_loss'])
    return val_loss

# Run Optuna study using TPE
study = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=100)

# Get the best hyperparameters from Optuna
best_params = study.best_params
print("Best hyperparameters:", best_params)

# Train the final VAE using the best hyperparameters
hidden_dims = [best_params['hidden_dim1'], best_params['hidden_dim2']]
latent_dim = best_params['latent_dim']
learning_rate = best_params['learning_rate']
batch_size = best_params['batch_size']

vae, encoder, decoder = build_vae(input_dim=data_scaled.shape[1], hidden_dims=hidden_dims, latent_dim=latent_dim)
vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))
history = vae.fit(data_scaled, data_scaled, epochs=100, batch_size=batch_size, verbose=1)

# Augment the normal data (generate 1250 normal samples)
desired_normal_size = 2000
augmented_normal_data_list = []
while len(augmented_normal_data_list) < desired_normal_size:
    new_samples = vae.predict(normal_data_scaled)
    augmented_normal_data_list.extend(new_samples)

augmented_normal_data = pd.DataFrame(augmented_normal_data_list[:desired_normal_size], columns=normal_data.columns[:-1])
augmented_normal_data['label_column'] = 0  # Label for normal data

# Augment the anomalies (generate 1250 anomaly samples)
desired_anomaly_size = 500
augmented_anomaly_data_list = []
while len(augmented_anomaly_data_list) < desired_anomaly_size:
    new_samples = vae.predict(anomalies_scaled)
    augmented_anomaly_data_list.extend(new_samples)

augmented_anomaly_data = pd.DataFrame(augmented_anomaly_data_list[:desired_anomaly_size], columns=anomalies.columns[:-1])
augmented_anomaly_data['label_column'] = 1  # Label for anomalies

# Combine the augmented data and the original data
augmented_data = pd.concat([augmented_normal_data, augmented_anomaly_data], axis=0)

# Shuffle the augmented data
augmented_data = shuffle(augmented_data, random_state=42)

# Check final shape of the augmented dataset
print(f"Final augmented data shape: {augmented_data.shape}")

# Save the augmented data to an Excel file
output_path = '/Users/hsiaopingni/Desktop/Final_Imbalanced_Augmented_Chiller_Data_VAE.xlsx'
augmented_data.to_excel(output_path, index=False)
print(f"Augmented balanced data saved to {output_path}.")


[I 2024-11-24 21:03:13,069] A new study created in memory with name: no-name-2523d367-af9e-4ca2-b937-112caa63a43a
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
[I 2024-11-24 21:03:14,345] Trial 0 finished with value: 0.8475634455680847 and parameters: {'hidden_dim1': 128, 'hidden_dim2': 32, 'latent_dim': 8, 'learning_rate': 0.009513095938512645, 'batch_size': 48}. Best is trial 0 with value: 0.8475634455680847.
[I 2024-11-24 21:03:15,471] Trial 1 finished with value: 0.9280211329460144 and parameters: {'hidden_dim1': 128, 'hidden_dim2': 16, 'latent_dim': 6, 'learning_rate': 0.0002790749916953631, 'batch_size': 64}. Best is trial 0 with value: 0.8475634455680847.
[I 2024-11-24 21:03:16,548] Trial 2 finished with value: 0.9541277885437012 and parameters: {'hidden_dim1': 128, 'hidden_dim2': 48, 'latent_dim': 8, 'learning_rate': 0.0003241322951655266, 'batch_size': 48}. Best is trial 0 with value: 0.8475634455680847.
[I 2024-11-24 21:03:17,661] Trial 3 finished wi

Best hyperparameters: {'hidden_dim1': 128, 'hidden_dim2': 64, 'latent_dim': 6, 'learning_rate': 0.007177297073306272, 'batch_size': 16}
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Ep