In [9]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam

# Load and preprocess the data
data = pd.read_csv('../../data/BCP.csv')

# Drop the Time column and scale the features
features = data.drop(columns=['Time'])
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Define VAE parameters
input_dim = features_scaled.shape[1]  # Number of features
latent_dim = 2  # Dimension of the latent space

# Encoder
inputs = Input(shape=(input_dim,))
h = Dense(16, activation='relu')(inputs)
z_mean = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)

def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim),
                              mean=0., stddev=1.0)
    return z_mean + K.exp(z_log_var / 2) * epsilon

# Sampling layer
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

# Decoder
decoder_h = Dense(16, activation='relu')
decoder_mean = Dense(input_dim)
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

# Define VAE model
vae = Model(inputs, x_decoded_mean)

# Loss function
def vae_loss(inputs, x_decoded_mean):
    # Reconstruction loss
    reconstruction_loss = MeanSquaredError()(inputs, x_decoded_mean)
    reconstruction_loss *= input_dim  # Scale by input dimension to match overall scale

    # KL divergence loss
    kl_loss = 0.5 * K.sum(K.square(z_mean) + K.exp(z_log_var) - z_log_var - 1, axis=-1)
    
    # Combine losses
    return K.mean(reconstruction_loss + kl_loss)

# Compile the model
vae.compile(optimizer=Adam(), loss=vae_loss)

# Train the VAE model
vae.fit(features_scaled, features_scaled,
        epochs=50,  # Adjust epochs as needed
        batch_size=32,
        validation_split=0.2)

# Calculate reconstruction loss for anomaly detection
reconstructed = vae.predict(features_scaled)
reconstruction_error = np.mean(np.square(features_scaled - reconstructed), axis=1)

# Set a threshold for anomaly detection
threshold = np.percentile(reconstruction_error, 95)  # Using the 95th percentile as threshold

# Identify anomalies
anomalies = reconstruction_error > threshold

# Print results
print("Threshold for anomaly detection:", threshold)
print("Number of anomalies detected:", np.sum(anomalies))


Epoch 1/50


ValueError: Tried to convert 'x' to a tensor and failed. Error: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```
