# Automated Anomaly Detection
**Objective**: Understand and practice automated anomaly detection using various techniques.

**Task**: Autoencoders for Anomaly Detection

**Steps**:
1. Data Set: Download a dataset of electricity consumption data.
2. Build an Autoencoder: Construct a simple autoencoder using a neural network for the
normal consumption data.
3. Identify Anomalies: Use the trained model to reconstruct the data and identify anomalies based on reconstruction error.
4. Visualize: Plot both the actual and reconstructed data to highlight anomalies.

In [None]:
# write your code from here
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Step 1: Create synthetic electricity consumption dataset
np.random.seed(42)
normal_data = np.random.normal(loc=500, scale=50, size=300)
anomalies = [1000, 1050, 1100]  # Injected anomalies
data = np.concatenate([normal_data, anomalies])
df = pd.DataFrame({'consumption': data})

# Step 2: Preprocess and scale the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[['consumption']])

# Build Autoencoder
model = Sequential([
    Dense(16, activation='relu', input_shape=(1,)),
    Dense(8, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse')
model.fit(scaled_data, scaled_data, epochs=50, batch_size=16, verbose=0)

# Step 3: Reconstruct and calculate reconstruction error
reconstructed = model.predict(scaled_data)
mse = np.mean(np.power(scaled_data - reconstructed, 2), axis=1)
threshold = np.percentile(mse, 95)  # Dynamic threshold
df['reconstruction_error'] = mse
df['anomaly'] = df['reconstruction_error'] > threshold

# Step 4: Visualize actual vs reconstructed and anomalies
plt.figure(figsize=(14, 6))
plt.plot(df.index, df['consumption'], label='Actual Consumption', marker='o')
plt.plot(df.index, scaler.inverse_transform(reconstructed), label='Reconstructed', linestyle='dashed')
plt.scatter(df[df['anomaly']].index, df[df['anomaly']]['consumption'], color='red', label='Anomalies', zorder=5)
plt.title("Electricity Consumption - Autoencoder Anomaly Detection")
plt.xlabel("Time Index")
plt.ylabel("Consumption")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

print(f"Total anomalies detected: {df['anomaly'].sum()}")
