<a href="https://colab.research.google.com/github/lesteraiof/TESISMDW/blob/main/MDW_LOGS_REALTIME.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
"""
Sistema de Detecci√≥n de Fallas en Tiempo Real para WebLogic MDW
Versi√≥n corregida - Sin errores de modificaci√≥n durante iteraci√≥n
"""

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
import warnings
warnings.filterwarnings('ignore')

# Librer√≠as para ML
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import RobustScaler
import pickle

# Para simulaci√≥n
import threading
import queue
import random
import copy  # Importar copy para evitar modificar diccionarios en iteraci√≥n

# ============================================================================
# 1. SISTEMA DE INGESTA DE LOGS (VERSI√ìN CORREGIDA)
# ============================================================================

class LogStreamSimulatorFixed:
    """Simula logs MDW de WebLogic sin errores"""

    def __init__(self, logs_per_second=1):
        self.logs_per_second = logs_per_second
        self.log_queue = queue.Queue()
        self.running = False

    def _generate_log_entry(self):
        """Genera un log MDW simulado de manera segura"""
        timestamp = datetime.now()
        state = random.choice(['normal', 'warning', 'critical'])

        # Crear diccionario base
        log_entry = {
            'timestamp': timestamp,
            'system_state': state.upper()
        }

        # A√±adir m√©tricas seg√∫n estado
        if state == 'normal':
            log_entry.update({
                'jvm_heap_used_percent': random.uniform(60, 75),
                'active_threads': random.randint(20, 40),
                'stuck_threads': random.randint(0, 2),
                'response_time_avg_ms': random.uniform(100, 300),
                'cpu_usage_percent': random.uniform(30, 60),
                'error_rate_percent': random.uniform(0.1, 1.0),
                'db_active_connections': random.randint(10, 20),
                'throughput_requests_sec': random.uniform(100, 200),
                'session_count': random.randint(500, 800)
            })
        elif state == 'warning':
            log_entry.update({
                'jvm_heap_used_percent': random.uniform(75, 85),
                'active_threads': random.randint(40, 60),
                'stuck_threads': random.randint(2, 5),
                'response_time_avg_ms': random.uniform(300, 800),
                'cpu_usage_percent': random.uniform(60, 80),
                'error_rate_percent': random.uniform(1.0, 5.0),
                'db_active_connections': random.randint(20, 30),
                'throughput_requests_sec': random.uniform(50, 100),
                'session_count': random.randint(800, 1000)
            })
        else:  # critical
            log_entry.update({
                'jvm_heap_used_percent': random.uniform(85, 99),
                'active_threads': random.randint(60, 100),
                'stuck_threads': random.randint(5, 20),
                'response_time_avg_ms': random.uniform(800, 2000),
                'cpu_usage_percent': random.uniform(80, 99),
                'error_rate_percent': random.uniform(5.0, 30.0),
                'db_active_connections': random.randint(30, 50),
                'throughput_requests_sec': random.uniform(10, 50),
                'session_count': random.randint(1000, 1500)
            })

        return log_entry

    def start_streaming(self):
        """Inicia el streaming de logs"""
        self.running = True

        def _stream():
            while self.running:
                log_entry = self._generate_log_entry()
                self.log_queue.put(log_entry)
                time.sleep(1 / self.logs_per_second)

        stream_thread = threading.Thread(target=_stream, daemon=True)
        stream_thread.start()

        return self.log_queue

    def stop_streaming(self):
        """Detiene el streaming"""
        self.running = False

# ============================================================================
# 2. PROCESADOR DE LOGS (VERSI√ìN CORREGIDA)
# ============================================================================

class RealTimeLogProcessorFixed:
    """Procesa logs sin errores de modificaci√≥n durante iteraci√≥n"""

    def __init__(self, window_size=30):
        self.window_size = window_size
        self.log_buffer = []

    def process_log(self, log_entry):
        """Procesa un log individual de manera segura"""
        # Crear copia del log entry para no modificar el original
        features = copy.deepcopy(log_entry)

        # A√±adir caracter√≠sticas temporales
        features['hour_of_day'] = features['timestamp'].hour
        features['is_business_hours'] = 1 if 8 <= features['timestamp'].hour <= 18 else 0

        # A√±adir al buffer
        self.log_buffer.append(features)

        # Mantener solo la ventana de tiempo
        if len(self.log_buffer) > self.window_size:
            self.log_buffer.pop(0)

        # Extraer caracter√≠sticas de ventana
        window_features = self._extract_window_features_safe()

        return features, window_features

    def _extract_window_features_safe(self):
        """Extrae caracter√≠sticas sin modificar diccionarios durante iteraci√≥n"""
        if len(self.log_buffer) < 5:  # M√≠nimo para calcular
            return None

        df_window = pd.DataFrame(self.log_buffer)

        # Crear diccionario nuevo (no modificar durante iteraci√≥n)
        window_features = {
            'timestamp': datetime.now(),
            'window_size': len(self.log_buffer)
        }

        # Estad√≠sticas b√°sicas (pre-calculadas)
        metrics_to_calculate = [
            'jvm_heap_used_percent',
            'response_time_avg_ms',
            'error_rate_percent',
            'cpu_usage_percent'
        ]

        for metric in metrics_to_calculate:
            if metric in df_window.columns:
                values = df_window[metric].dropna()
                if len(values) > 0:
                    window_features[f'{metric}_mean'] = float(values.mean())
                    window_features[f'{metric}_std'] = float(values.std())
                    window_features[f'{metric}_max'] = float(values.max())
                    window_features[f'{metric}_trend'] = float(self._calculate_trend_safe(values))

        # Caracter√≠sticas derivadas (a√±adir despu√©s)
        if ('jvm_heap_used_percent' in df_window.columns and
            'cpu_usage_percent' in df_window.columns):
            heap_mean = df_window['jvm_heap_used_percent'].mean()
            cpu_mean = df_window['cpu_usage_percent'].mean()
            window_features['resource_pressure'] = float((heap_mean * cpu_mean) / 100)

        if ('stuck_threads' in df_window.columns and
            'active_threads' in df_window.columns):
            stuck_mean = df_window['stuck_threads'].mean()
            active_mean = df_window['active_threads'].mean()
            window_features['thread_pressure'] = float(stuck_mean / (active_mean + 1))

        return window_features

    def _calculate_trend_safe(self, values):
        """Calcula tendencia de manera segura"""
        if len(values) < 2:
            return 0.0

        try:
            x = np.arange(len(values))
            slope, _ = np.polyfit(x, values.values, 1)
            return float(slope)
        except:
            return 0.0

# ============================================================================
# 3. MODELO DE DETECCI√ìN (VERSI√ìN SIMPLIFICADA)
# ============================================================================

class SimpleAnomalyDetector:
    """Detecci√≥n simple y robusta"""

    def __init__(self):
        self.model = None
        self.scaler = RobustScaler()
        self.is_trained = False

    def train_model(self, training_data):
        """Entrena modelo con datos simples"""
        print("ü§ñ Entrenando modelo...")

        # Preparar datos de entrenamiento
        X = self._prepare_training_data(training_data)

        if len(X) < 10:
            print("‚ö†Ô∏è  Datos insuficientes para entrenar")
            return False

        # Entrenar Isolation Forest
        self.model = IsolationForest(
            n_estimators=50,
            contamination=0.1,
            random_state=42,
            verbose=0
        )

        self.model.fit(X)
        self.scaler.fit(X)
        self.is_trained = True

        print(f"‚úÖ Modelo entrenado con {len(X)} muestras")
        return True

    def _prepare_training_data(self, training_data):
        """Prepara datos de entrenamiento de manera simple"""
        features_list = []

        for entry in training_data:
            if isinstance(entry, dict):
                features = []
                # Solo caracter√≠sticas num√©ricas clave
                for key in ['jvm_heap_used_percent', 'response_time_avg_ms',
                          'error_rate_percent', 'cpu_usage_percent']:
                    if key in entry:
                        features.append(float(entry.get(key, 0)))

                if len(features) == 4:  # Solo si tenemos todas las caracter√≠sticas
                    features_list.append(features)

        return np.array(features_list)

    def predict(self, features_dict):
        """Predice si hay anomal√≠a"""
        if not self.is_trained:
            return {'is_anomaly': False, 'score': 0.0, 'confidence': 0.0}

        # Extraer caracter√≠sticas en orden consistente
        feature_values = []
        for key in ['jvm_heap_used_percent', 'response_time_avg_ms',
                   'error_rate_percent', 'cpu_usage_percent']:
            feature_values.append(float(features_dict.get(key, 0)))

        # Escalar
        features_scaled = self.scaler.transform([feature_values])

        # Predecir
        score = self.model.decision_function(features_scaled)[0]
        is_anomaly = score < -0.2  # Umbral simple

        return {
            'is_anomaly': bool(is_anomaly),
            'score': float(score),
            'confidence': float(min(1.0, abs(score)))
        }

    def save_model(self, filename='model.pkl'):
        """Guarda el modelo"""
        if self.is_trained:
            with open(filename, 'wb') as f:
                pickle.dump({
                    'model': self.model,
                    'scaler': self.scaler,
                    'is_trained': True
                }, f)
            print(f"üíæ Modelo guardado en {filename}")

# ============================================================================
# 4. SISTEMA DE ALERTAS SIMPLE
# ============================================================================

class SimpleAlertSystem:
    """Sistema de alertas simple"""

    def __init__(self):
        self.alerts = []
        self.alert_cooldown = {}

    def check_alerts(self, log_entry, anomaly_result):
        """Verifica alertas de manera simple"""
        alerts = []
        timestamp = datetime.now()

        # Alertas basadas en m√©tricas
        metric_checks = [
            ('jvm_heap_used_percent', 85, 'WARNING', 90, 'CRITICAL'),
            ('response_time_avg_ms', 500, 'WARNING', 1000, 'CRITICAL'),
            ('error_rate_percent', 5, 'WARNING', 10, 'CRITICAL'),
            ('cpu_usage_percent', 80, 'WARNING', 90, 'CRITICAL')
        ]

        for metric, warn_thresh, warn_sev, crit_thresh, crit_sev in metric_checks:
            if metric in log_entry:
                value = log_entry[metric]

                if value >= crit_thresh:
                    severity = crit_sev
                    threshold = crit_thresh
                elif value >= warn_thresh:
                    severity = warn_sev
                    threshold = warn_thresh
                else:
                    continue

                # Verificar cooldown
                alert_key = f"{severity}_{metric}"
                if alert_key in self.alert_cooldown:
                    if (timestamp - self.alert_cooldown[alert_key]).seconds < 300:
                        continue

                alert = {
                    'timestamp': timestamp,
                    'severity': severity,
                    'metric': metric,
                    'value': value,
                    'threshold': threshold,
                    'message': f"{metric} = {value:.1f} (umbral: {threshold})"
                }

                alerts.append(alert)
                self.alert_cooldown[alert_key] = timestamp

        # Alerta por anomal√≠a
        if anomaly_result['is_anomaly']:
            alert = {
                'timestamp': timestamp,
                'severity': 'CRITICAL',
                'type': 'ANOMALY',
                'score': anomaly_result['score'],
                'message': f"Anomal√≠a detectada (score: {anomaly_result['score']:.3f})"
            }
            alerts.append(alert)

        # Guardar alertas
        self.alerts.extend(alerts)

        # Limitar historial
        if len(self.alerts) > 50:
            self.alerts = self.alerts[-50:]

        return alerts

# ============================================================================
# 5. SISTEMA COMPLETO SIMPLIFICADO
# ============================================================================

class RealTimeSystemSimple:
    """Sistema completo simplificado y estable"""

    def __init__(self):
        self.log_stream = LogStreamSimulatorFixed(logs_per_second=2)
        self.log_processor = RealTimeLogProcessorFixed(window_size=20)
        self.anomaly_detector = SimpleAnomalyDetector()
        self.alert_system = SimpleAlertSystem()

        self.running = False
        self.training_data = []

    def initialize(self, training_time=10):
        """Inicializa el sistema de manera simple"""
        print("üöÄ Inicializando sistema...")

        # 1. Generar datos de entrenamiento
        print(f"üìä Generando {training_time} segundos de datos de entrenamiento...")
        self._collect_training_data(training_time)

        # 2. Entrenar modelo
        if len(self.training_data) > 0:
            success = self.anomaly_detector.train_model(self.training_data)
            if not success:
                print("‚ö†Ô∏è  No se pudo entrenar el modelo, usando sistema basado en reglas")

        # 3. Iniciar streaming
        self.log_queue = self.log_stream.start_streaming()

        print("‚úÖ Sistema inicializado")

    def _collect_training_data(self, seconds):
        """Recolecta datos de entrenamiento"""
        start_time = time.time()
        collected = 0

        while time.time() - start_time < seconds:
            # Generar log manualmente para entrenamiento
            log_entry = self.log_stream._generate_log_entry()
            features, _ = self.log_processor.process_log(log_entry)

            if features:
                self.training_data.append(features)
                collected += 1

            time.sleep(0.2)  # 5 logs por segundo

        print(f"   üìà {collected} logs recolectados para entrenamiento")

    def run_detection(self, duration=30):
        """Ejecuta detecci√≥n en tiempo real"""
        print(f"\nüéØ Ejecutando detecci√≥n por {duration} segundos...")

        self.running = True
        start_time = time.time()

        stats = {
            'logs_processed': 0,
            'anomalies_detected': 0,
            'alerts_generated': 0
        }

        try:
            while self.running and (time.time() - start_time) < duration:
                try:
                    # Obtener log
                    log_entry = self.log_queue.get(timeout=0.5)
                    stats['logs_processed'] += 1

                    # Procesar log
                    features, window_features = self.log_processor.process_log(log_entry)

                    # Detectar anomal√≠a si tenemos caracter√≠sticas de ventana
                    if window_features:
                        anomaly_result = self.anomaly_detector.predict(window_features)

                        if anomaly_result['is_anomaly']:
                            stats['anomalies_detected'] += 1

                        # Generar alertas
                        alerts = self.alert_system.check_alerts(features, anomaly_result)
                        stats['alerts_generated'] += len(alerts)

                        # Mostrar estado peri√≥dicamente
                        if stats['logs_processed'] % 10 == 0:
                            self._print_status(stats, features, anomaly_result, alerts)

                    time.sleep(0.1)

                except queue.Empty:
                    continue

        except KeyboardInterrupt:
            print("\nüõë Detenido por usuario")
        finally:
            self.stop()
            self._print_summary(stats, start_time, time.time())

    def _print_status(self, stats, features, anomaly_result, alerts):
        """Imprime estado actual"""
        print(f"\nüìä Log #{stats['logs_processed']} | "
              f"Anomal√≠as: {stats['anomalies_detected']} | "
              f"Alertas: {stats['alerts_generated']}")

        # Mostrar m√©tricas clave
        metrics_display = []
        for metric in ['jvm_heap_used_percent', 'response_time_avg_ms', 'error_rate_percent']:
            if metric in features:
                value = features[metric]
                if 'percent' in metric:
                    metrics_display.append(f"{metric.split('_')[0].upper()}: {value:.1f}%")
                elif 'ms' in metric:
                    metrics_display.append(f"Resp: {value:.0f}ms")
                else:
                    metrics_display.append(f"{metric}: {value:.1f}")

        print(f"   üìà {' | '.join(metrics_display)}")

        if anomaly_result['is_anomaly']:
            print(f"   üö® ANOMAL√çA! Score: {anomaly_result['score']:.3f}")

        if alerts:
            for alert in alerts[:2]:  # Mostrar m√°ximo 2 alertas
                print(f"   ‚ö†Ô∏è  {alert['severity']}: {alert['message'][:60]}")

    def _print_summary(self, stats, start_time, end_time):
        """Imprime resumen final"""
        duration = end_time - start_time

        print("\n" + "="*60)
        print("üìà RESUMEN DE EJECUCI√ìN")
        print("="*60)

        print(f"\nüìä ESTAD√çSTICAS:")
        print(f"  ‚Ä¢ Duraci√≥n: {duration:.1f} segundos")
        print(f"  ‚Ä¢ Logs procesados: {stats['logs_processed']}")
        print(f"  ‚Ä¢ Tasa: {stats['logs_processed']/max(duration,1):.1f} logs/seg")
        print(f"  ‚Ä¢ Anomal√≠as detectadas: {stats['anomalies_detected']}")
        print(f"  ‚Ä¢ Alertas generadas: {stats['alerts_generated']}")

        print(f"\nü§ñ MODELO:")
        print(f"  ‚Ä¢ Entrenado: {'S√≠' if self.anomaly_detector.is_trained else 'No'}")
        print(f"  ‚Ä¢ Datos entrenamiento: {len(self.training_data)}")

        print(f"\nüö® √öLTIMAS ALERTAS:")
        recent_alerts = self.alert_system.alerts[-5:][::-1]
        for i, alert in enumerate(recent_alerts, 1):
            time_str = alert['timestamp'].strftime("%H:%M:%S")
            print(f"  {i}. [{time_str}] {alert['severity']}: {alert['message'][:40]}")

        # Guardar modelo si fue entrenado
        if self.anomaly_detector.is_trained:
            self.anomaly_detector.save_model('modelo_final.pkl')

        print(f"\n‚úÖ Sistema completado exitosamente!")

    def stop(self):
        """Detiene el sistema"""
        self.running = False
        self.log_stream.stop_streaming()

    def run_demo(self, train_sec=10, detect_sec=30):
        """Ejecuta demostraci√≥n completa"""
        print("="*70)
        print("üöÄ DEMO: DETECCI√ìN DE FALLAS EN TIEMPO REAL")
        print("="*70)

        try:
            # Inicializar
            self.initialize(training_time=train_sec)

            # Ejecutar detecci√≥n
            self.run_detection(duration=detect_sec)

        except Exception as e:
            print(f"\n‚ùå Error: {e}")
            import traceback
            traceback.print_exc()

        finally:
            print("\n" + "="*70)
            print("üéØ DEMO COMPLETADA")
            print("="*70)

# ============================================================================
# EJECUCI√ìN EN COLAB
# ============================================================================

def ejecutar_sistema_simple():
    """Ejecuta el sistema simplificado"""
    print("üöÄ Iniciando sistema simplificado de detecci√≥n...")

    sistema = RealTimeSystemSimple()
    sistema.run_demo(train_sec=8, detect_sec=20)

    return sistema

# Para ejecutar en Colab directamente
if __name__ == "__main__":
    sistema = ejecutar_sistema_simple()

üöÄ Iniciando sistema simplificado de detecci√≥n...
üöÄ DEMO: DETECCI√ìN DE FALLAS EN TIEMPO REAL
üöÄ Inicializando sistema...
üìä Generando 8 segundos de datos de entrenamiento...
   üìà 39 logs recolectados para entrenamiento
ü§ñ Entrenando modelo...
‚úÖ Modelo entrenado con 39 muestras
‚úÖ Sistema inicializado

üéØ Ejecutando detecci√≥n por 20 segundos...

üìä Log #10 | Anomal√≠as: 0 | Alertas: 4
   üìà JVM: 75.1% | Resp: 696ms | ERROR: 1.9%

üìä Log #20 | Anomal√≠as: 0 | Alertas: 7
   üìà JVM: 64.7% | Resp: 174ms | ERROR: 0.9%

üìä Log #30 | Anomal√≠as: 0 | Alertas: 8
   üìà JVM: 85.2% | Resp: 1051ms | ERROR: 5.8%

üìä Log #40 | Anomal√≠as: 0 | Alertas: 8
   üìà JVM: 79.5% | Resp: 498ms | ERROR: 1.3%

üìà RESUMEN DE EJECUCI√ìN

üìä ESTAD√çSTICAS:
  ‚Ä¢ Duraci√≥n: 20.1 segundos
  ‚Ä¢ Logs procesados: 41
  ‚Ä¢ Tasa: 2.0 logs/seg
  ‚Ä¢ Anomal√≠as detectadas: 0
  ‚Ä¢ Alertas generadas: 8

ü§ñ MODELO:
  ‚Ä¢ Entrenado: S√≠
  ‚Ä¢ Datos entrenamiento: 39

üö® √öLTIMAS AL

In [None]:
# Opci√≥n 1: Ejecutar todo de una vez
sistema = ejecutar_sistema_simple()

# Opci√≥n 2: Ejecutar paso a paso
sistema = RealTimeSystemSimple()
sistema.initialize(training_time=8)  # 8 segundos para entrenar
sistema.run_detection(duration=20)   # 20 segundos de detecci√≥n

üöÄ Iniciando sistema simplificado de detecci√≥n...
üöÄ DEMO: DETECCI√ìN DE FALLAS EN TIEMPO REAL
üöÄ Inicializando sistema...
üìä Generando 8 segundos de datos de entrenamiento...
   üìà 40 logs recolectados para entrenamiento
ü§ñ Entrenando modelo...
‚úÖ Modelo entrenado con 40 muestras
‚úÖ Sistema inicializado

üéØ Ejecutando detecci√≥n por 20 segundos...

üìä Log #10 | Anomal√≠as: 0 | Alertas: 8
   üìà JVM: 62.1% | Resp: 140ms | ERROR: 0.2%

üìä Log #20 | Anomal√≠as: 0 | Alertas: 8
   üìà JVM: 74.6% | Resp: 281ms | ERROR: 0.8%

üìä Log #30 | Anomal√≠as: 0 | Alertas: 8
   üìà JVM: 76.9% | Resp: 492ms | ERROR: 4.7%

üìä Log #40 | Anomal√≠as: 0 | Alertas: 8
   üìà JVM: 73.1% | Resp: 291ms | ERROR: 0.1%

üìà RESUMEN DE EJECUCI√ìN

üìä ESTAD√çSTICAS:
  ‚Ä¢ Duraci√≥n: 20.1 segundos
  ‚Ä¢ Logs procesados: 41
  ‚Ä¢ Tasa: 2.0 logs/seg
  ‚Ä¢ Anomal√≠as detectadas: 0
  ‚Ä¢ Alertas generadas: 8

ü§ñ MODELO:
  ‚Ä¢ Entrenado: S√≠
  ‚Ä¢ Datos entrenamiento: 40

üö® √öLTIMAS ALE