In [170]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

In [171]:
#(X_train, y_train), (X_test, y_test) = keras.datasets.california_housing.load_data()

In [172]:
# Load the dataset (e.g., MNIST)
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()


In [173]:
# Reshape the data to 4D tensors (batch_size, height, width, channels)
X_train = X_train.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

In [174]:
# Normalize the data
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

In [175]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [176]:
# Define the CNN Autoencoder model
model = keras.Sequential([
    keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(7*7*64, activation='relu'),
    keras.layers.Reshape((7, 7, 64)),
    keras.layers.Conv2DTranspose(64, (3, 3), strides=2, padding='same', activation='relu'),
    keras.layers.Conv2DTranspose(32, (3, 3), strides=2, padding='same', activation='relu'),
    keras.layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [177]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [178]:
# Train the model
history = model.fit(X_train, X_train, epochs=10, 
                    validation_data=(X_val, X_val), 
                    verbose=2)


Epoch 1/10


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("data:0", shape=(None, 8), dtype=float32). Expected shape (None, 28, 28, 1), but input has incompatible shape (None, 8)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 8), dtype=float32)
  • training=True
  • mask=None

In [None]:
# Evaluate the model on the test set
mse = model.evaluate(X_test, X_test)
print(f'Test MSE: {mse:.3f}')


In [None]:
# Use the model to detect anomalies
def detect_anomaly(x, threshold=0.02):
    x_reconstructed = model.predict(x)
    mse = np.mean(np.square(x - x_reconstructed), axis=(1, 2, 3))
    return mse > threshold

In [None]:
# Calculate entropy
def calculate_entropy(labels):
    _, counts = np.unique(labels, return_counts=True)
    probabilities = counts / len(labels)
    entropy = -np.sum(probabilities * np.log2(probabilities))
    return entropy


In [None]:
# Test the anomaly detection function
X_anomaly = X_test[:10]  # select a few test images as anomalies
y_pred = detect_anomaly(X_anomaly)
print(y_pred)  # should output [True, True,..., True] if all are detected as anomalies


In [None]:
# Visualize the original and reconstructed images
def visualize_reconstructions(original, reconstructed, num_images=9):
    plt.figure(figsize=(10, 10))
    for i in range(num_images):
        # Original images
        plt.subplot(3, 3, i + 1)
        plt.imshow(original[i].reshape(28, 28), cmap='gray')
        plt.title(f'Original {i + 1}')
        plt.axis('off')

    plt.figure(figsize=(10, 10))
    for i in range(num_images):
        # Reconstructed images
        plt.subplot(3, 3, i + 1)
        plt.imshow(reconstructed[i].reshape(28, 28), cmap='gray')
        plt.title(f'Reconstructed {i + 1}')
        plt.axis('off')
    plt.show()


In [None]:
reconstructed = model.predict(X_anomaly)

In [None]:
visualize_reconstructions(X_anomaly, reconstructed)

In [None]:
# Use the model to detect anomalies
def calculate_reconstruction_errors(model, data):
    reconstructed_data = model.predict(data)
    reconstruction_errors = np.mean(np.square(data - reconstructed_data), axis=(1, 2, 3))
    return reconstruction_errors

In [None]:
# Calculate reconstruction errors
reconstruction_errors = calculate_reconstruction_errors(model, X_test)

In [None]:
# Set a threshold for anomaly detection (for demonstration, using 95th percentile)
threshold = np.percentile(reconstruction_errors, 95)
simulated_y_pred = reconstruction_errors > threshold

In [None]:
# Calculate entropy before and after thresholding
entropy_before = calculate_entropy(true_labels)
entropy_after = calculate_entropy(simulated_y_pred)


In [None]:
# Calculate information gain
information_gain = entropy_before - entropy_after

In [None]:
# Detect anomalies
y_pred = reconstruction_errors > threshold


In [None]:
# Since we don't have true labels for anomalies in MNIST, we'll simulate some anomalies
# For demonstration, let's assume first 100 are normal (0) and next 100 are anomalies (1)
true_labels = np.array([0] * 100 + [1] * 100)
simulated_data = np.concatenate((X_test[:100], X_test[100:200]))


In [None]:
# Calculate reconstruction errors for simulated data
simulated_reconstruction_errors = calculate_reconstruction_errors(model, simulated_data)
simulated_y_pred = simulated_reconstruction_errors > threshold


In [None]:
# Calculate precision, recall, and F1-score
precision = precision_score(true_labels, simulated_y_pred)
recall = recall_score(true_labels, simulated_y_pred)
f1 = f1_score(true_labels, simulated_y_pred)


In [None]:
print(f'Precision: {precision:.3f}')
print(f'Recall: {recall:.3f}')
print(f'F1-Score: {f1:.3f}')
print('\nClassification Report:')
print(classification_report(true_labels, simulated_y_pred))
print(f'Entropy before thresholding: {entropy_before:.3f}')
print(f'Entropy after thresholding: {entropy_after:.3f}')
print(f'Information Gain: {information_gain:.3f}')

In [None]:

# Confusion Matrix
conf_matrix = confusion_matrix(true_labels, simulated_y_pred)
print('\nConfusion Matrix:')
print(conf_matrix)


In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, cmap="YlGnBu", fmt="d")
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix Heatmap')
plt.show()