In [None]:
### anomaly_detection.ipynb
```python
# Advanced Time Series Anomaly Detection
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, RepeatVector, TimeDistributed
import warnings
warnings.filterwarnings('ignore')

# Generate synthetic time series data with anomalies
np.random.seed(42)
dates = pd.date_range('2020-01-01', periods=1000, freq='D')
normal_data = np.sin(np.arange(1000) * 0.1) + 0.1 * np.random.randn(1000)
anomalies = np.zeros(1000)
anomaly_indices = [100, 200, 300, 500, 700, 800]
for idx in anomaly_indices:
    normal_data[idx:idx+5] += np.random.choice([-3, 3]) * np.random.rand(5)
    anomalies[idx:idx+5] = 1

df = pd.DataFrame({
    'date': dates,
    'value': normal_data,
    'is_anomaly': anomalies
})

print("Dataset shape:", df.shape)
print("Anomaly percentage:", df['is_anomaly'].mean() * 100, "%")

# Visualization
plt.figure(figsize=(15, 6))
plt.plot(df['date'], df['value'], label='Time Series', alpha=0.7)
anomaly_points = df[df['is_anomaly'] == 1]
plt.scatter(anomaly_points['date'], anomaly_points['value'],
           color='red', label='True Anomalies', s=50)
plt.title('Time Series with Anomalies')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()

# 1. Isolation Forest Anomaly Detection
iso_forest = IsolationForest(contamination=0.1, random_state=42)
iso_predictions = iso_forest.fit_predict(df[['value']])
iso_predictions = (iso_predictions == -1).astype(int)

# 2. LSTM Autoencoder for Anomaly Detection
def create_lstm_autoencoder(timesteps, features):
    input_layer = Input(shape=(timesteps, features))
    encoder = LSTM(50, activation='relu', return_sequences=True)(input_layer)
    encoder = LSTM(25, activation='relu')(encoder)
    decoder = RepeatVector(timesteps)(encoder)
    decoder = LSTM(25, activation='relu', return_sequences=True)(decoder)
    decoder = LSTM(50, activation='relu', return_sequences=True)(decoder)
    output = TimeDistributed(Dense(features))(decoder)

    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

# Prepare data for LSTM
def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length + 1):
        sequences.append(data[i:i+seq_length])
    return np.array(sequences)

seq_length = 30
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[['value']])
sequences = create_sequences(scaled_data, seq_length)

# Train LSTM Autoencoder
lstm_autoencoder = create_lstm_autoencoder(seq_length, 1)
history = lstm_autoencoder.fit(sequences, sequences,
                              epochs=50, batch_size=32,
                              validation_split=0.2, verbose=0)

# Predict and calculate reconstruction errors
reconstructed = lstm_autoencoder.predict(sequences)
mse = np.mean(np.power(sequences - reconstructed, 2), axis=(1, 2))

# Set threshold for anomalies (95th percentile)
threshold = np.percentile(mse, 95)
lstm_predictions = (mse > threshold).astype(int)

# Align predictions with original dataframe
lstm_anomalies = np.zeros(len(df))
lstm_anomalies[seq_length-1:seq_length-1+len(lstm_predictions)] = lstm_predictions

# 3. Statistical Anomaly Detection (Z-score)
z_scores = np.abs((df['value'] - df['value'].mean()) / df['value'].std())
z_threshold = 3
z_predictions = (z_scores > z_threshold).astype(int)

# Performance Evaluation
def evaluate_model(y_true, y_pred, model_name):
    print(f"\n{model_name} Performance:")
    print(classification_report(y_true, y_pred))
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    print(f"Precision: {precision:.3f}, Recall: {recall:.3f}, F1-Score: {f1:.3f}")

evaluate_model(df['is_anomaly'], iso_predictions, "Isolation Forest")
evaluate_model(df['is_anomaly'][seq_length-1:seq_length-1+len(lstm_predictions)],
               lstm_predictions, "LSTM Autoencoder")
evaluate_model(df['is_anomaly'], z_predictions, "Z-Score")

# Visualization of Results
fig, axes = plt.subplots(3, 1, figsize=(15, 12))

# Isolation Forest
axes[0].plot(df['date'], df['value'], alpha=0.7, label='Original')
iso_anomalies = df[iso_predictions == 1]
axes[0].scatter(iso_anomalies['date'], iso_anomalies['value'],
               color='red', label='Detected Anomalies', s=30)
axes[0].set_title('Isolation Forest Anomaly Detection')
axes[0].legend()

# LSTM Autoencoder
axes[1].plot(df['date'], df['value'], alpha=0.7, label='Original')
lstm_anomaly_dates = df['date'][seq_length-1:seq_length-1+len(lstm_predictions)]
lstm_anomaly_values = df['value'][seq_length-1:seq_length-1+len(lstm_predictions)]
lstm_detected = pd.DataFrame({
    'date': lstm_anomaly_dates[lstm_predictions == 1],
    'value': lstm_anomaly_values[lstm_predictions == 1]
})
axes[1].scatter(lstm_detected['date'], lstm_detected['value'],
               color='red', label='Detected Anomalies', s=30)
axes[1].set_title('LSTM Autoencoder Anomaly Detection')
axes[1].legend()

# Z-Score
axes[2].plot(df['date'], df['value'], alpha=0.7, label='Original')
z_anomalies = df[z_predictions == 1]
axes[2].scatter(z_anomalies['date'], z_anomalies['value'],
               color='red', label='Detected Anomalies', s=30)
axes[2].set_title('Z-Score Anomaly Detection')
axes[2].legend()

plt.tight_layout()
plt.show()

print("Advanced Time Series Anomaly Detection Complete!")