# Automated Anomaly Detection
**Objective**: Understand and practice automated anomaly detection using various techniques.

**Task**: Autoencoders for Anomaly Detection

**Steps**:
1. Data Set: Download a dataset of electricity consumption data.
2. Build an Autoencoder: Construct a simple autoencoder using a neural network for the
normal consumption data.
3. Identify Anomalies: Use the trained model to reconstruct the data and identify anomalies based on reconstruction error.
4. Visualize: Plot both the actual and reconstructed data to highlight anomalies.

In [1]:
# write your code from here
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error
from tensorflow.keras import Model
from tensorflow.keras.layers import Input

# Step 1: Generate Synthetic Electricity Consumption Data
# Simulating electricity consumption data for 1000 days
np.random.seed(42)
days = np.arange(1000)
consumption = np.random.normal(loc=200, scale=50, size=1000)  # Normal consumption (mean=200, std=50)

# Introduce anomalies (unusually high consumption)
consumption[::50] = consumption[::50] + np.random.normal(loc=300, scale=100, size=20)  # Anomalies every 50th day

# Create DataFrame
df = pd.DataFrame({'Day': days, 'Consumption': consumption})

# Step 2: Preprocess the Data (Standardize)
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[['Consumption']])

# Step 3: Build the Autoencoder
input_layer = Input(shape=(1,))
encoded = Dense(64, activation='relu')(input_layer)
encoded = Dense(32, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(encoded)
decoded = Dense(1)(decoded)

autoencoder = Model(input_layer, decoded)

# Compile the model
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

# Step 4: Train the Autoencoder on normal data (no anomalies)
autoencoder.fit(df_scaled, df_scaled, epochs=50, batch_size=32, shuffle=True, verbose=1)

# Step 5: Predict/reconstruct the data
reconstructed = autoencoder.predict(df_scaled)

# Step 6: Calculate the reconstruction error (mean squared error)
reconstruction_error = np.mean(np.square(df_scaled - reconstructed), axis=1)

# Step 7: Identify anomalies based on reconstruction error
threshold = np.percentile(reconstruction_error, 95)  # Setting threshold to the 95th percentile
df['Anomaly'] = np.where(reconstruction_error > threshold, 'Anomaly', 'Normal')

# Step 8: Visualize the Actual vs Reconstructed Data and Highlight Anomalies
plt.figure(figsize=(12, 6))

# Plot actual data (electricity consumption)
plt.plot(df['Day'], df['Consumption'], label='Actual Consumption', color='blue')

# Plot reconstructed data
plt.plot(df['Day'], scaler.inverse_transform(reconstructed), label='Reconstructed Consumption', color='green', linestyle='--')

# Highlight anomalies
anomalies = df[df['Anomaly'] == 'Anomaly']
plt.scatter(anomalies['Day'], anomalies['Consumption'], color='red', label='Anomalies', zorder=5)

plt.title('Electricity Consumption with Anomalies Detected by Autoencoder')
plt.xlabel('Day')
plt.ylabel('Consumption')
plt.legend()
plt.grid(True)
plt.show()

# Step 9: Print the detected anomalies
print(f"Detected Anomalies: {anomalies.shape[0]}")


ModuleNotFoundError: No module named 'tensorflow'