In [None]:
# **Integrated Anomaly Detection System**
# 
# This notebook integrates all components for a complete anomaly detection system.

# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import logging
import os
import pickle
import json

# Import custom modules
import sys
sys.path.append('./')
from utils.data_loader import load_data, normalize_data, create_sequences
from utils.model_utils import get_reconstruction_errors, EnsembleModel, load_trained_models
from utils.evaluation import AnomalyInterpreter

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("notebook", font_scale=1.2)

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

np.random.seed(42)
torch.manual_seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logger.info(f"Usando dispositivo: {device}")

# Load all models
def load_all_models():
    """Load all trained models from various components."""
    
    # Load autoencoder models
    autoencoder_models, autoencoder_metadata = load_trained_models(base_path="models/autoencoder/")
    
    # Load dynamic threshold function
    try:
        with open("models/dynamic_thresholds/hybrid_threshold_model.pkl", "rb") as f:
            threshold_model = pickle.load(f)
        
        def apply_dynamic_threshold(errors, timestamps):
            generator = threshold_model['generator']
            weights = threshold_model['best_weights']
            val_errors = threshold_model['validation_errors']['ensemble']
            factor = threshold_model['factor']
            return generator.hybrid_threshold(errors, timestamps, val_errors, weights, factor)
        
        dynamic_threshold_fn = apply_dynamic_threshold
    except:
        logger.warning("Dynamic threshold model not found. Using static threshold.")
        dynamic_threshold_fn = None
    
    # Load prediction models
    try:
        from utils.model_utils import load_prediction_models
        prediction_models = load_prediction_models(base_path="models/prediction/")
    except:
        logger.warning("Prediction models not found.")
        prediction_models = None
    
    # Load transfer learning models
    try:
        # Custom function to load transfer models
        def load_transfer_models(base_path="models/transfer/"):
            """Load transfer learned models."""
            # Load metadata
            with open(os.path.join(base_path, "transfer_metadata.json"), "r") as f:
                metadata = json.load(f)
            
            # Load thresholds
            with open(os.path.join(base_path, "thresholds.json"), "r") as f:
                thresholds = json.load(f)
            
            # Load ensemble weights
            with open(os.path.join(base_path, "ensemble_weights.json"), "r") as f:
                weights_dict = json.load(f)
            
            return {
                "metadata": metadata,
                "thresholds": thresholds,
                "weights": weights_dict
            }
        
        transfer_models = load_transfer_models(base_path="models/transfer/")
    except:
        logger.warning("Transfer learning models not found.")
        transfer_models = None
    
    return {
        "autoencoder": {
            "models": autoencoder_models,
            "metadata": autoencoder_metadata
        },
        "dynamic_threshold": dynamic_threshold_fn,
        "prediction": prediction_models,
        "transfer": transfer_models
    }

# Integrated analysis function
def analyze_time_series(new_data, models, scaler=None, device='cpu'):
    """Analyze a time series using all available models."""
    results = {}
    
    # Preprocess data
    if scaler is None:
        data_scaled, scaler = normalize_data(new_data)
    else:
        data_scaled = scaler.transform(new_data.values.reshape(-1, 1))
    
    # Get sequence length from metadata
    sequence_length = models['autoencoder']['metadata']['sequence_length']
    
    # Create sequences
    X = create_sequences(data_scaled, seq_length=sequence_length, step=1)
    
    # Convert to tensor and create loader
    X_tensor = torch.FloatTensor(X)
    data_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(X_tensor),
        batch_size=64,
        shuffle=False
    )
    
    # 1. Detect anomalies using autoencoders
    autoencoder_models = models['autoencoder']['models']
    
    lstm_errors, lstm_orig, lstm_recon = get_reconstruction_errors(
        autoencoder_models['lstm'], data_loader, device
    )
    
    gru_errors, gru_orig, gru_recon = get_reconstruction_errors(
        autoencoder_models['gru'], data_loader, device
    )
    
    transformer_errors, transformer_orig, transformer_recon = get_reconstruction_errors(
        autoencoder_models['transformer'], data_loader, device
    )
    
    # Create ensemble
    ensemble = EnsembleModel(
        models=[autoencoder_models['lstm'], autoencoder_models['gru'], autoencoder_models['transformer']],
        names=["LSTM", "GRU", "Transformer"]
    )
    
    ensemble_errors, _ = ensemble.get_weighted_errors(
        [data_loader] * 3,
        device
    )
    # 2. Apply dynamic thresholds if available
    timestamps = pd.date_range(start='2022-01-01', periods=len(ensemble_errors), freq='H')
    
    if models['dynamic_threshold'] is not None:
        dynamic_threshold_fn = models['dynamic_threshold']
        thresholds = dynamic_threshold_fn(ensemble_errors, timestamps)
    else:
        # Use static threshold
        val_errors = np.random.choice(ensemble_errors, size=int(len(ensemble_errors)*0.3))
        thresholds = np.mean(val_errors) + 1.5 * np.std(val_errors)
    
    # Detect anomalies
    anomalies = ensemble_errors > thresholds
    
    # Store anomaly detection results
    results["anomalies"] = {
        "errors": {
            "lstm": lstm_errors,
            "gru": gru_errors,
            "transformer": transformer_errors,
            "ensemble": ensemble_errors
        },
        "reconstructions": {
            "lstm": (lstm_orig, lstm_recon),
            "gru": (gru_orig, gru_recon),
            "transformer": (transformer_orig, transformer_recon)
        },
        "thresholds": thresholds,
        "anomaly_indices": np.where(anomalies)[0],
        "anomaly_scores": ensemble_errors / (thresholds if isinstance(thresholds, np.ndarray) else np.array([thresholds] * len(ensemble_errors)))
    }
    
    # 3. Make predictions if models available
    if models['prediction'] is not None:
        prediction_models = models['prediction']
        
        # Use most recent data for prediction
        if len(data_scaled) >= sequence_length:
            recent_data = data_scaled[-sequence_length:].reshape(1, sequence_length, 1)
        else:
            # If not enough data, pad with zeros
            pad_size = sequence_length - len(data_scaled)
            padding = np.zeros((pad_size, 1))
            recent_data = np.concatenate([padding, data_scaled]).reshape(1, sequence_length, 1)
        
        # Make predictions with ensemble model
        ensemble_predictor = prediction_models['ensemble']
        predictions = ensemble_predictor.predict(recent_data, device)
        
        # Inverse transform predictions
        if scaler is not None:
            predictions = scaler.inverse_transform(predictions.squeeze()).reshape(-1, 1)
        
        results["predictions"] = {
            "values": predictions,
            "horizon": prediction_models['config']['prediction_horizon']
        }
    
    # 4. Analyze anomalies
    interpreter = AnomalyInterpreter(
        model=None,
        test_data=lstm_orig,
        reconstructed_data=lstm_recon,
        errors=ensemble_errors,
        thresholds=thresholds,
        timestamps=timestamps
    )
    
    top_anomalies = interpreter.analyze_top_anomalies(top_k=5)
    results["analysis"] = {
        "top_anomalies": top_anomalies,
        "interpreter": interpreter
    }
    
    return results

# Function to visualize integrated results
def visualize_integrated_results(new_data, results):
    """Visualize the integrated analysis results."""
    
    # 1. Anomaly detection visualization
    plt.figure(figsize=(15, 8))
    
    errors = results["anomalies"]["errors"]["ensemble"]
    thresholds = results["anomalies"]["thresholds"]
    anomaly_indices = results["anomalies"]["anomaly_indices"]
    
    plt.subplot(2, 1, 1)
    plt.plot(new_data, label='Original Data')
    if len(anomaly_indices) > 0:
        plt.scatter(anomaly_indices, new_data.iloc[anomaly_indices], color='red', label='Anomalies')
    plt.title('Time Series with Anomalies')
    plt.xlabel('Time')
    plt.ylabel('Value')
    plt.legend()
    plt.grid(True)
    
    plt.subplot(2, 1, 2)
    plt.plot(errors, label='Reconstruction Error')
    if isinstance(thresholds, np.ndarray):
        plt.plot(thresholds, 'r--', label='Dynamic Threshold')
    else:
        plt.axhline(y=thresholds, color='r', linestyle='--', label=f'Threshold ({thresholds:.6f})')
    
    if len(anomaly_indices) > 0:
        plt.scatter(anomaly_indices, errors[anomaly_indices], color='red')
    
    plt.title('Reconstruction Error with Threshold')
    plt.xlabel('Time')
    plt.ylabel('Error')
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.savefig('anomaly_detection_results.png', dpi=300)
    plt.show()
    
    # 2. Show top anomalies
    top_anomalies = results["analysis"]["top_anomalies"]
    interpreter = results["analysis"]["interpreter"]
    
    print("\n=== Top 5 Detected Anomalies ===")
    for i, anomaly in enumerate(top_anomalies):
        print(f"Anomaly #{i+1}:")
        print(f"  Index: {anomaly['index']}")
        if 'timestamp' in anomaly:
            print(f"  Timestamp: {anomaly['timestamp']}")
        print(f"  Error: {anomaly['error']:.6f}")
        print(f"  Threshold: {anomaly['threshold']:.6f}")
        print(f"  Error/Threshold Ratio: {anomaly['error_ratio']:.4f}")
        
        # Visualize the anomaly
        interpreter.plot_reconstruction_comparison(anomaly['index'])
        plt.savefig(f'anomaly_{i+1}_reconstruction.png', dpi=300)
        plt.show()
        
        interpreter.visualize_contribution_heatmap(anomaly['index'])
        plt.savefig(f'anomaly_{i+1}_heatmap.png', dpi=300)
        plt.show()
    
    # 3. Future predictions visualization if available
    if "predictions" in results:
        predictions = results["predictions"]["values"]
        horizon = results["predictions"]["horizon"]
        
        plt.figure(figsize=(12, 6))
        
        # Plot original data
        plt.plot(range(len(new_data)), new_data, label='Historical Data')
        
        # Plot predictions
        pred_indices = range(len(new_data), len(new_data) + len(predictions))
        plt.plot(pred_indices, predictions, 'g--', label='Predictions')
        
        plt.title('Time Series Predictions')
        plt.xlabel('Time')
        plt.ylabel('Value')
        plt.legend()
        plt.grid(True)
        
        plt.savefig('predictions.png', dpi=300)
        plt.show()

# Main function to demonstrate the integrated system
def main():
    # Load all models
    models = load_all_models()
    
    # Load a test dataset
    filepath = 'machine_temperature_system_failure.csv'
    df = load_data(filepath)
    
    # Take a portion of the data for demonstration
    test_data = df[['value']].iloc[-1000:]
    
    # Analyze the time series
    results = analyze_time_series(test_data, models, device=device)
    
    # Visualize the results
    visualize_integrated_results(test_data, results)
    
    print("\nIntegrated analysis completed successfully!")

if __name__ == "__main__":
    main()