In [6]:
import os
import time
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

Matplotlib is building the font cache; this may take a moment.


In [None]:
class LSTMAutoencoderAnomalyDetector:
    """
    LSTM Autoencoder for Anomaly Detection in Time Series Data
    
    This class provides a complete workflow for training an LSTM Autoencoder
    to detect anomalies in sequential data, with built-in preprocessing, 
    training, and anomaly detection capabilities.
    """
    
    def __init__(self, n_timesteps=10, n_features=18):
        """
        Initialize the LSTM Autoencoder Anomaly Detector
        
        Args:
            n_timesteps (int): Number of time steps in input sequence
            n_features (int): Number of features in each time step
        """
        self.n_timesteps = n_timesteps
        self.n_features = n_features
        self.scaler = StandardScaler()
        self.model = None
        self.anomaly_threshold = None
    
    def _create_sequences(self, data, n_timesteps):
        """
        Convert flat data into sequences for LSTM
        
        Args:
            data (np.ndarray): Input data array
            n_timesteps (int): Number of time steps in sequence
        
        Returns:
            np.ndarray: Sequences of data
        """
        if len(data) < n_timesteps:
            raise ValueError(f"Not enough data ({len(data)} samples) to create sequences of length {n_timesteps}.")
        
        sequences = []
        for i in range(len(data) - n_timesteps + 1):
            sequences.append(data[i:i + n_timesteps])
        
        return np.array(sequences)
    
    def _build_lstm_autoencoder(self):
        """
        Build LSTM Autoencoder model architecture
        
        Returns:
            keras.Model: Compiled LSTM Autoencoder model
        """
        input_shape = (self.n_timesteps, self.n_features)
        
        inputs = keras.Input(shape=input_shape)
        
        # Encoder
        x = layers.LSTM(units=24, activation='tanh', return_sequences=True, name='LSTM_Enc_1')(inputs)
        encoded = layers.LSTM(units=6, activation='tanh', return_sequences=False, name='LSTM_Enc_2_Bottleneck')(x)
        
        # Decoder
        z = layers.RepeatVector(self.n_timesteps, name='RepeatVector')(encoded)
        x = layers.LSTM(units=6, activation='tanh', return_sequences=True, name='LSTM_Dec_1')(z)
        x = layers.LSTM(units=24, activation='tanh', return_sequences=True, name='LSTM_Dec_2')(x)
        
        # Output Layer
        decoded = layers.TimeDistributed(layers.Dense(units=self.n_features, activation='linear'), 
                                        name='Output_Dense')(x)
        
        autoencoder = keras.Model(inputs, decoded, name="LSTM_Autoencoder")
        autoencoder.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='mae')
        
        return autoencoder
    
    def preprocess_data(self, data):
        """
        Preprocess input data by scaling and creating sequences
        
        Args:
            data (pd.DataFrame or np.ndarray): Input data
        
        Returns:
            np.ndarray: Preprocessed sequences
        """
        # Ensure data is numpy array
        if isinstance(data, pd.DataFrame):
            data = data.values
        
        # Scale data
        scaled_data = self.scaler.fit_transform(data)
        
        # Create sequences
        return self._create_sequences(scaled_data, self.n_timesteps)
    
    def train(self, data, epochs=50, batch_size=64, validation_split=0.2):
        """
        Train the LSTM Autoencoder
        
        Args:
            data (pd.DataFrame or np.ndarray): Training data
            epochs (int): Number of training epochs
            batch_size (int): Batch size for training
            validation_split (float): Proportion of data to use for validation
        """
        # Preprocess data
        sequences = self.preprocess_data(data)
        
        # Split data
        X_train, X_val = train_test_split(sequences, test_size=validation_split, shuffle=False)
        
        # Build model
        self.model = self._build_lstm_autoencoder()
        
        # Early stopping
        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss', 
            patience=10, 
            restore_best_weights=True
        )
        
        # Train
        start_time = time.time()
        history = self.model.fit(
            X_train, X_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_val, X_val),
            callbacks=[early_stopping],
            shuffle=True
        )
        
        # Calculate anomaly threshold
        val_mae_loss = np.mean(np.abs(
            self.model.predict(X_val) - X_val
        ), axis=(1, 2))
        
        self.anomaly_threshold = np.mean(val_mae_loss) + 3.0 * np.std(val_mae_loss)
        
        # Plot training history
        plt.figure(figsize=(10, 5))
        plt.plot(history.history['loss'], label='Training MAE Loss')
        plt.plot(history.history['val_loss'], label='Validation MAE Loss')
        plt.title('Training and Validation Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Mean Absolute Error (MAE)')
        plt.legend()
        plt.grid(True)
        plt.show()
    
    def detect_anomalies(self, data):
        """
        Detect anomalies in input data
        
        Args:
            data (pd.DataFrame or np.ndarray): Data to check for anomalies
        
        Returns:
            np.ndarray: Boolean array indicating anomalies
        """
        if self.model is None or self.anomaly_threshold is None:
            raise ValueError("Model must be trained before detecting anomalies")
        
        # Preprocess data
        sequences = self.preprocess_data(data)
        
        # Predict and calculate reconstruction error
        predictions = self.model.predict(sequences)
        mae_loss = np.mean(np.abs(predictions - sequences), axis=(1, 2))
        
        # Detect anomalies
        return mae_loss > self.anomaly_threshold

In [None]:
def load_data(csv_path, feature_columns):
    """
    Load and validate data from CSV
    
    Args:
        csv_path (str): Path to CSV file
        feature_columns (list): List of feature column names
    
    Returns:
        pd.DataFrame: Validated dataframe
    """
    if not os.path.exists(csv_path):
        raise FileNotFoundError(f"CSV file not found: {csv_path}")
    
    try:
        df = pd.read_csv(csv_path)
        
        # Verify columns
        missing_cols = [col for col in feature_columns if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing columns: {missing_cols}")
        
        return df[feature_columns]
    
    except Exception as e:
        raise IOError(f"Error reading CSV: {e}")

In [None]:
# Configuration
NORMAL_DATA_PATH = '/normal_data.csv'
ANOMALY_DATA_PATH = '/anomaly_data.csv'

feature_columns = [
    f'actual_q_{i}' for i in range(6)] + \
    [f'actual_qd_{i}' for i in range(6)] + \
    [f'actual_current_{i}' for i in range(6)]

# Initialize and train anomaly detector
detector = LSTMAutoencoderAnomalyDetector(
    n_timesteps=10, 
    n_features=len(feature_columns)
)

try:
    # Load and train on normal data
    normal_data = load_data(NORMAL_DATA_PATH, feature_columns)
    detector.train(normal_data)
    
    # Optional: Test on anomaly data
    if os.path.exists(ANOMALY_DATA_PATH):
        anomaly_data = load_data(ANOMALY_DATA_PATH, feature_columns)
        anomalies = detector.detect_anomalies(anomaly_data)
        
        print(f"Detected {anomalies.sum()} anomalies out of {len(anomalies)} sequences")

except Exception as e:
    print(f"An error occurred: {e}")