In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, RepeatVector, TimeDistributed
from sklearn.preprocessing import MinMaxScaler


In [None]:
# Generate synthetic time-series data
np.random.seed(42)
time = np.arange(0, 100, 0.1)
sin_wave = np.sin(time)  # Generate sine wave
noise = np.random.normal(0, 0.1, len(time))  # Add random noise
data = sin_wave + noise

# Introduce anomalies
data[450:470] += 2  # Add spike anomalies
data[700:720] -= 2  # Add dip anomalies

# Plot the data
plt.figure(figsize=(12, 6))
plt.plot(time, data, label="Time Series Data")
plt.axvspan(45, 47, color="red", alpha=0.3, label="Anomalies")
plt.axvspan(70, 72, color="red", alpha=0.3)
plt.title("Synthetic Time Series with Anomalies")
plt.xlabel("Time")
plt.ylabel("Value")
plt.legend()
plt.show()


In [None]:
# Normalize the data
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data.reshape(-1, 1))

# Create sequences for the Autoencoder
def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i + seq_length])
    return np.array(sequences)

sequence_length = 50
X = create_sequences(data_normalized, sequence_length)

print("\nInput Shape for Autoencoder:", X.shape)


In [None]:
# Build the LSTM Autoencoder
model = Sequential([
    LSTM(64, activation='relu', input_shape=(sequence_length, 1), return_sequences=True),
    LSTM(32, activation='relu', return_sequences=False),
    RepeatVector(sequence_length),
    LSTM(32, activation='relu', return_sequences=True),
    LSTM(64, activation='relu', return_sequences=True),
    TimeDistributed(Dense(1))
])

model.compile(optimizer='adam', loss='mse')
model.summary()


In [None]:
# Split data into train and test sets
train_size = int(0.7 * len(X))
X_train, X_test = X[:train_size], X[train_size:]

# Train the model
history = model.fit(
    X_train, X_train,
    epochs=20,
    batch_size=32,
    validation_split=0.1,
    shuffle=True
)

# Plot training loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
# Predict on test data
X_test_pred = model.predict(X_test)
reconstruction_error = np.mean(np.abs(X_test_pred - X_test), axis=(1, 2))

# Set a threshold for anomalies
threshold = np.percentile(reconstruction_error, 95)
print("\nReconstruction Error Threshold:", threshold)

# Identify anomalies
anomalies = reconstruction_error > threshold
print("Number of Anomalies Detected:", np.sum(anomalies))


In [None]:
# Create an anomaly plot
anomaly_indices = np.where(anomalies)[0] + train_size + sequence_length

plt.figure(figsize=(12, 6))
plt.plot(time, data, label="Time Series Data")
plt.scatter(time[anomaly_indices], data[anomaly_indices], color="red", label="Detected Anomalies")
plt.title("Anomaly Detection in Time Series Data")
plt.xlabel("Time")
plt.ylabel("Value")
plt.legend()
plt.show()


In [None]:
# Histogram of reconstruction errors
plt.figure(figsize=(10, 6))
plt.hist(reconstruction_error, bins=50, alpha=0.75, label="Reconstruction Error")
plt.axvline(threshold, color='red', linestyle='dashed', linewidth=2, label='Threshold')
plt.title("Reconstruction Error Distribution")
plt.xlabel("Reconstruction Error")
plt.ylabel("Frequency")
plt.legend()
plt.show()
