# Automated Anomaly Detection
**Objective**: Understand and practice automated anomaly detection using various techniques.

**Task**: Autoencoders for Anomaly Detection

**Steps**:
1. Data Set: Download a dataset of electricity consumption data.
2. Build an Autoencoder: Construct a simple autoencoder using a neural network for the
normal consumption data.
3. Identify Anomalies: Use the trained model to reconstruct the data and identify anomalies based on reconstruction error.
4. Visualize: Plot both the actual and reconstructed data to highlight anomalies.

In [None]:
# write your code from here


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# --------- Function: Generate Data ---------
def generate_data(n_normal=500, anomalies=None, seed=42):
    np.random.seed(seed)
    normal = np.random.normal(loc=0.5, scale=0.1, size=n_normal)
    if anomalies is None:
        anomalies = [1.2, 1.3, 0.05, 1.4, 0.02]
    data = np.concatenate([normal, anomalies])
    return pd.DataFrame({'Consumption': data})

# --------- Function: Build Autoencoder ---------
def build_autoencoder(input_dim):
    model = Sequential([
        Dense(8, activation='relu', input_shape=(input_dim,)),
        Dense(4, activation='relu'),
        Dense(8, activation='relu'),
        Dense(input_dim, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(0.001), loss='mse')
    return model

# --------- Function: Detect Anomalies ---------
def detect_anomalies(data, model, scaler, error_threshold=None):
    scaled = scaler.transform(data)
    reconstructed = model.predict(scaled)
    errors = np.mean(np.square(scaled - reconstructed), axis=1)
    
    if error_threshold is None:
        error_threshold = np.mean(errors[:500]) + 3 * np.std(errors[:500])

    return errors, errors > error_threshold

# --------- Function: Visualize ---------
def visualize_anomalies(df, reconstructed, is_anomaly):
    plt.figure(figsize=(14, 6))
    plt.plot(df['Consumption'], label='Actual')
    plt.plot(reconstructed, label='Reconstructed', alpha=0.7)
    plt.scatter(df.index[is_anomaly], df['Consumption'][is_anomaly], color='red', label='Anomalies')
    plt.title("Autoencoder Anomaly Detection")
    plt.xlabel("Time Index")
    plt.ylabel("Electricity Consumption")
    plt.legend()
    plt.grid(True)
    plt.show()

# --------- Main Pipeline ---------
def main():
    df = generate_data()
    
    # Sanity Check: Unit Testing
    assert not df.isnull().values.any(), "Missing values in data"
    assert 'Consumption' in df.columns, "'Consumption' column missing"
    
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df[['Consumption']])
    
    autoencoder = build_autoencoder(input_dim=1)
    autoencoder.fit(scaled_data, scaled_data, epochs=50, batch_size=32, verbose=0)

    errors, anomalies = detect_anomalies(df[['Consumption']], autoencoder, scaler)
    df['ReconstructionError'] = errors
    df['Anomaly'] = anomalies

    reconstructed_unscaled = scaler.inverse_transform(autoencoder.predict(scaled_data))
    visualize_anomalies(df, reconstructed_unscaled, df['Anomaly'])

    print(f"Total Anomalies Detected: {df['Anomaly'].sum()}")

# Run the modular pipeline
main()


ModuleNotFoundError: No module named 'tensorflow'