# Automated Anomaly Detection
**Objective**: Understand and practice automated anomaly detection using various techniques.

**Task**: Autoencoders for Anomaly Detection

**Steps**:
1. Data Set: Download a dataset of electricity consumption data.
2. Build an Autoencoder: Construct a simple autoencoder using a neural network for the
normal consumption data.
3. Identify Anomalies: Use the trained model to reconstruct the data and identify anomalies based on reconstruction error.
4. Visualize: Plot both the actual and reconstructed data to highlight anomalies.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Step 1: Generate sample electricity consumption data (replace with your actual data)
np.random.seed(42)
time_steps = np.arange(1000)
normal_consumption = 5 + 2 * np.sin(0.1 * time_steps) + 0.5 * np.random.randn(len(time_steps))

# Introduce some anomalies (sudden spikes or drops in consumption)
anomaly_indices = np.random.randint(0, len(normal_consumption), 15)
normal_consumption[anomaly_indices[:5]] += 5  # Spikes
normal_consumption[anomaly_indices[5:10]] -= 3  # Drops
normal_consumption[anomaly_indices[10:]] += 7 * np.sin(0.2 * anomaly_indices[10:]) # Different pattern

df = pd.DataFrame({'Consumption': normal_consumption})

# Normalize the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df['Consumption_Normalized'] = scaler.fit_transform(df[['Consumption']])
data = df['Consumption_Normalized'].values.reshape(-1, 1)

# Split data into training and testing sets
train_size = int(len(data) * 0.8)
train_data = data[:train_size]
test_data = data[train_size:]

# Step 2: Build an Autoencoder: Construct a simple autoencoder using a neural network for the normal consumption data.
# Define the Autoencoder model
def build_autoencoder(input_shape):
    model = keras.Sequential([
        keras.layers.Input(shape=input_shape),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(16, activation='relu'),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(input_shape[0], activation='linear') # Output layer should have the same shape as input
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

input_shape = (1,) # Each data point is a single value
autoencoder = build_autoencoder(input_shape)
autoencoder.summary()

# Train the Autoencoder
history = autoencoder.fit(train_data, train_data, epochs=50, batch_size=32, validation_split=0.1, verbose=0)

# Step 3: Identify Anomalies: Use the trained model to reconstruct the data and identify anomalies based on reconstruction error.
# Reconstruct the test data
reconstructed_data_normalized = autoencoder.predict(test_data)
reconstructed_data = scaler.inverse_transform(reconstructed_data_normalized)
original_test_data = scaler.inverse_transform(test_data)

# Calculate the reconstruction error
mse = np.mean(np.square(original_test_data - reconstructed_data), axis=1)

# Define a threshold for anomaly detection (you might need to tune this)
threshold = np.percentile(mse, 95) # Consider top 5% of errors as anomalies
anomalies_df = pd.DataFrame({'Original': original_test_data.flatten(), 'Reconstructed': reconstructed_data.flatten(), 'MSE': mse})
anomalies_df['Anomaly'] = anomalies_df['MSE'] > threshold
anomaly_indices_detected = anomalies_df[anomalies_df['Anomaly']].index + train_size # Adjust index to original data

print("Number of Anomalies Detected:", anomalies_df['Anomaly'].sum())
print("\nIndices of Detected Anomalies in Original Data:", anomaly_indices_detected.tolist())

# Step 4: Visualize: Plot both the actual and reconstructed data to highlight anomalies.
plt.figure(figsize=(12, 6))
plt.plot(df['Consumption'], label='Original Consumption')
plt.plot(df.index[train_size:], reconstructed_data, label='Reconstructed Consumption')
plt.scatter(df.index[anomaly_indices_detected], df['Consumption'].iloc[anomaly_indices_detected], color='red', label='Detected Anomaly')
plt.title('Anomaly Detection using Autoencoder')
plt.xlabel('Time Steps')
plt.ylabel('Electricity Consumption')
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 4))
plt.plot(anomalies_df['MSE'], label='Reconstruction Error (MSE)')
plt.axhline(y=threshold, color='r', linestyle='--', label=f'Anomaly Threshold ({threshold:.2f})')
plt.title('Reconstruction Error')
plt.xlabel('Test Data Points')
plt.ylabel('Mean Squared Error')
plt.legend()
plt.grid(True)
plt.show()

ModuleNotFoundError: No module named 'tensorflow'