In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, IsolationForest
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.multioutput import MultiOutputRegressor
import warnings
warnings.filterwarnings('ignore')

In [None]:
def load_data(self):
        """
        Cargar los datos desde el archivo .txt
        """
        print(f"\n[1] CARGANDO DATOS: {self.mixture_name}")
        print("-" * 80)
        
        # Definir nombres de columnas
        columns = ['Time_s', 'CO_or_CH4_ppm', 'Ethylene_ppm'] + \
                  [f'Sensor_{i}' for i in range(1, 17)]
        
        try:
            # Cargar datos (asumiendo separación por espacios/tabs)
            self.df = pd.read_csv(self.filepath, sep=r'\s+', header=None, 
                                  names=columns, engine='python')
            
            print(f"✓ Datos cargados exitosamente")
            print(f"  - Registros: {len(self.df):,}")
            print(f"  - Columnas: {len(self.df.columns)}")
            print(f"  - Duración: {self.df['Time_s'].max():.0f} segundos ({self.df['Time_s'].max()/3600:.2f} horas)")
            print(f"  - Memoria: {self.df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
            
            # Verificar valores faltantes
            missing = self.df.isnull().sum().sum()
            print(f"  - Valores faltantes: {missing}")
            
            return True
            
        except Exception as e:
            print(f"✗ Error al cargar datos: {e}")
            return False

In [None]:
def exploratory_analysis(self):
        """
        Análisis exploratorio de datos (EDA)
        """
        print(f"\n[2] ANÁLISIS EXPLORATORIO - {self.mixture_name}")
        print("-" * 80)
        
        # Estadísticas básicas de concentraciones
        print("\n2.1 Estadísticas de Concentraciones:")
        print(self.df[['CO_or_CH4_ppm', 'Ethylene_ppm']].describe())
        
        # Análisis de transiciones
        transitions_gas1 = (self.df['CO_or_CH4_ppm'].diff() != 0).sum()
        transitions_gas2 = (self.df['Ethylene_ppm'].diff() != 0).sum()
        print(f"\n2.2 Transiciones detectadas:")
        print(f"  - Gas 1 (CO/CH4): {transitions_gas1}")
        print(f"  - Gas 2 (Etileno): {transitions_gas2}")
        
        # Análisis de sensores
        sensor_cols = [col for col in self.df.columns if col.startswith('Sensor_')]
        print(f"\n2.3 Análisis de sensores:")
        
        for i, col in enumerate(sensor_cols, 1):
            mean_val = self.df[col].mean()
            std_val = self.df[col].std()
            min_val = self.df[col].min()
            max_val = self.df[col].max()
            
            # Detectar posibles flatlines
            changes = (self.df[col].diff() != 0).sum()
            flatline_pct = (1 - changes/len(self.df)) * 100
            
            print(f"  Sensor {i:2d}: μ={mean_val:7.2f}, σ={std_val:7.2f}, "
                  f"rango=[{min_val:7.2f}, {max_val:7.2f}], flatline={flatline_pct:.2f}%")
        
        # Detectar gas puro
        gas_pure_mask = (self.df['CO_or_CH4_ppm'] == 0) | (self.df['Ethylene_ppm'] == 0)
        gas_pure_pct = (gas_pure_mask.sum() / len(self.df)) * 100
        print(f"\n2.4 Regímenes de operación:")
        print(f"  - Gas puro: {gas_pure_pct:.2f}%")
        print(f"  - Mezcla: {100-gas_pure_pct:.2f}%")
        
        return True

In [None]:
def preprocess_data(self, downsample_factor=100, window_size=5):
        """
        Preprocesamiento de datos
        
        Parameters:
        -----------
        downsample_factor : int
            Factor de downsampling (ej. 100 para pasar de 100Hz a 1Hz)
        window_size : int
            Tamaño de ventana para suavizado
        """
        print(f"\n[3] PREPROCESAMIENTO - {self.mixture_name}")
        print("-" * 80)
        
        print(f"  - Frecuencia original: 100 Hz")
        print(f"  - Factor de downsampling: {downsample_factor}")
        print(f"  - Frecuencia resultante: {100/downsample_factor} Hz")
        
        # Downsampling
        self.df_processed = self.df.iloc[::downsample_factor].copy().reset_index(drop=True)
        print(f"  - Registros después de downsampling: {len(self.df_processed):,}")
        
        # Convertir sensores a kΩ
        sensor_cols = [col for col in self.df_processed.columns if col.startswith('Sensor_')]
        for col in sensor_cols:
            self.df_processed[col] = 40000 / self.df_processed[col]
        
        print(f"  - Sensores convertidos a kΩ")
        
        # Suavizado con media móvil
        for col in sensor_cols:
            self.df_processed[f'{col}_smooth'] = self.df_processed[col].rolling(
                window=window_size, center=True, min_periods=1).mean()
        
        print(f"  - Aplicado suavizado con ventana de {window_size} muestras")
        
        return True

In [None]:
def feature_engineering(self, lag_steps=[1, 2, 5], window_sizes=[5, 10]):
    """
    Ingeniería de características
    
    Parameters:
    -----------
    lag_steps : list
        Pasos de retardo para crear features temporales
    window_sizes : list
        Tamaños de ventana para agregados
    """
    print(f"\n[4] INGENIERÍA DE CARACTERÍSTICAS - {self.mixture_name}")
    print("-" * 80)
    
    sensor_cols = [col for col in self.df_processed.columns if col.startswith('Sensor_') 
                    and not col.endswith('_smooth')]
    
    feature_count = 0
    
    # Features temporales: lags
    print(f"  - Creando lags: {lag_steps}")
    for col in sensor_cols:
        for lag in lag_steps:
            self.df_processed[f'{col}_lag{lag}'] = self.df_processed[col].shift(lag)
            feature_count += 1
    
    # Features temporales: agregados en ventanas
    print(f"  - Creando agregados en ventanas: {window_sizes}")
    for col in sensor_cols:
        for win in window_sizes:
            self.df_processed[f'{col}_mean{win}'] = self.df_processed[col].rolling(
                window=win, min_periods=1).mean()
            self.df_processed[f'{col}_std{win}'] = self.df_processed[col].rolling(
                window=win, min_periods=1).std()
            self.df_processed[f'{col}_max{win}'] = self.df_processed[col].rolling(
                window=win, min_periods=1).max()
            self.df_processed[f'{col}_min{win}'] = self.df_processed[col].rolling(
                window=win, min_periods=1).min()
            feature_count += 4
    
    # Derivadas (cambios)
    print(f"  - Creando derivadas (cambios)")
    for col in sensor_cols:
        self.df_processed[f'{col}_diff'] = self.df_processed[col].diff()
        feature_count += 1
    
    # Eliminar NaN generados por lags y ventanas
    initial_rows = len(self.df_processed)
    self.df_processed = self.df_processed.dropna()
    dropped_rows = initial_rows - len(self.df_processed)
    
    print(f"  - Total de features creadas: {feature_count}")
    print(f"  - Filas eliminadas por NaN: {dropped_rows}")
    print(f"  - Registros finales: {len(self.df_processed):,}")
    
    return True

In [None]:
def prepare_train_test(self, test_size=0.2, val_size=0.1):
        """
        Preparar conjuntos de entrenamiento, validación y test con partición temporal
        
        Parameters:
        -----------
        test_size : float
            Proporción del conjunto de test
        val_size : float
            Proporción del conjunto de validación (del conjunto de entrenamiento)
        """
        print(f"\n[5] PREPARACIÓN TRAIN/VAL/TEST - {self.mixture_name}")
        print("-" * 80)
        
        # Características (X) y objetivos (y)
        feature_cols = [col for col in self.df_processed.columns 
                       if col.startswith('Sensor_')]
        target_cols = ['CO_or_CH4_ppm', 'Ethylene_ppm']
        
        X = self.df_processed[feature_cols].values
        y = self.df_processed[target_cols].values
        
        # Partición temporal (sin shuffle para evitar data leakage)
        n_samples = len(X)
        n_test = int(n_samples * test_size)
        n_val = int((n_samples - n_test) * val_size)
        
        # Train-Val-Test split temporal
        X_temp, X_test = X[:-n_test], X[-n_test:]
        y_temp, y_test = y[:-n_test], y[-n_test:]
        
        X_train, X_val = X_temp[:-n_val], X_temp[-n_val:]
        y_train, y_val = y_temp[:-n_val], y_temp[-n_val:]
        
        print(f"  - Total muestras: {n_samples:,}")
        print(f"  - Train: {len(X_train):,} ({len(X_train)/n_samples*100:.1f}%)")
        print(f"  - Validación: {len(X_val):,} ({len(X_val)/n_samples*100:.1f}%)")
        print(f"  - Test: {len(X_test):,} ({len(X_test)/n_samples*100:.1f}%)")
        print(f"  - Features: {X.shape[1]}")
        print(f"  - Targets: {y.shape[1]}")
        
        # Escalado
        print(f"\n  - Aplicando StandardScaler...")
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)
        
        self.data = {
            'X_train': X_train_scaled,
            'X_val': X_val_scaled,
            'X_test': X_test_scaled,
            'y_train': y_train,
            'y_val': y_val,
            'y_test': y_test,
            'scaler': scaler,
            'feature_cols': feature_cols,
            'target_cols': target_cols
        }
        
        return True

In [None]:
def train_models(self):
        """
        Entrenar múltiples modelos de regresión
        """
        print(f"\n[6] ENTRENAMIENTO DE MODELOS - {self.mixture_name}")
        print("-" * 80)
        
        X_train = self.data['X_train']
        y_train = self.data['y_train']
        X_val = self.data['X_val']
        y_val = self.data['y_val']
        
        # Definir modelos
        models_def = {
            'Linear Regression': LinearRegression(),
            'Random Forest': RandomForestRegressor(n_estimators=50, max_depth=15, 
                                                   random_state=42, n_jobs=-1),
            'Gradient Boosting': GradientBoostingRegressor(n_estimators=50, max_depth=5,
                                                           random_state=42)
        }
        
        # Entrenar y evaluar cada modelo
        for name, model in models_def.items():
            print(f"\n  Entrenando: {name}")
            print(f"  {'-'*60}")
            
            # Entrenar
            if name == 'Linear Regression':
                # MultiOutputRegressor no necesario para LinearRegression
                model.fit(X_train, y_train)
            else:
                # Usar MultiOutputRegressor para otros modelos
                multi_model = MultiOutputRegressor(model)
                multi_model.fit(X_train, y_train)
                model = multi_model
            
            # Predecir en validación
            y_pred = model.predict(X_val)
            
            # Métricas
            mae = mean_absolute_error(y_val, y_pred)
            rmse = np.sqrt(mean_squared_error(y_val, y_pred))
            r2 = r2_score(y_val, y_pred)
            
            # Métricas por objetivo
            mae_per_target = [mean_absolute_error(y_val[:, i], y_pred[:, i]) 
                             for i in range(y_val.shape[1])]
            rmse_per_target = [np.sqrt(mean_squared_error(y_val[:, i], y_pred[:, i]))
                              for i in range(y_val.shape[1])]
            r2_per_target = [r2_score(y_val[:, i], y_pred[:, i])
                            for i in range(y_val.shape[1])]
            
            print(f"    MAE global: {mae:.4f} ppm")
            print(f"    RMSE global: {rmse:.4f} ppm")
            print(f"    R² global: {r2:.4f}")
            print(f"    MAE por gas: {[f'{m:.4f}' for m in mae_per_target]}")
            print(f"    RMSE por gas: {[f'{r:.4f}' for r in rmse_per_target]}")
            print(f"    R² por gas: {[f'{r:.4f}' for r in r2_per_target]}")
            
            # Guardar modelo y resultados
            self.models[name] = {
                'model': model,
                'metrics': {
                    'mae': mae,
                    'rmse': rmse,
                    'r2': r2,
                    'mae_per_target': mae_per_target,
                    'rmse_per_target': rmse_per_target,
                    'r2_per_target': r2_per_target
                },
                'predictions_val': y_pred
            }
        
        return True

In [None]:
def evaluate_best_model(self):
        """
        Evaluar el mejor modelo en el conjunto de test
        """
        print(f"\n[7] EVALUACIÓN EN TEST - {self.mixture_name}")
        print("-" * 80)
        
        # Seleccionar mejor modelo (menor MAE en validación)
        best_model_name = min(self.models.keys(), 
                             key=lambda k: self.models[k]['metrics']['mae'])
        best_model_info = self.models[best_model_name]
        
        print(f"\n  Mejor modelo: {best_model_name}")
        print(f"  {'-'*60}")
        
        # Evaluar en test
        X_test = self.data['X_test']
        y_test = self.data['y_test']
        
        y_pred_test = best_model_info['model'].predict(X_test)
        
        # Métricas en test
        mae_test = mean_absolute_error(y_test, y_pred_test)
        rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
        r2_test = r2_score(y_test, y_pred_test)
        
        mae_per_target = [mean_absolute_error(y_test[:, i], y_pred_test[:, i]) 
                         for i in range(y_test.shape[1])]
        rmse_per_target = [np.sqrt(mean_squared_error(y_test[:, i], y_pred_test[:, i]))
                          for i in range(y_test.shape[1])]
        r2_per_target = [r2_score(y_test[:, i], y_pred_test[:, i])
                        for i in range(y_test.shape[1])]
        
        print(f"    MAE: {mae_test:.4f} ppm")
        print(f"    RMSE: {rmse_test:.4f} ppm")
        print(f"    R²: {r2_test:.4f}")
        print(f"\n  Métricas por gas:")
        for i, target in enumerate(self.data['target_cols']):
            print(f"    {target}:")
            print(f"      MAE:  {mae_per_target[i]:.4f} ppm")
            print(f"      RMSE: {rmse_per_target[i]:.4f} ppm")
            print(f"      R²:   {r2_per_target[i]:.4f}")
        
        # Calcular MAE normalizado (como % del rango)
        ranges = [y_test[:, i].max() - y_test[:, i].min() for i in range(y_test.shape[1])]
        mae_norm = [(mae_per_target[i] / ranges[i]) * 100 if ranges[i] > 0 else 0 
                    for i in range(len(ranges))]
        
        print(f"\n  MAE normalizado (% del rango):")
        for i, target in enumerate(self.data['target_cols']):
            print(f"    {target}: {mae_norm[i]:.2f}%")
        
        # Guardar resultados de test
        self.results['test'] = {
            'best_model': best_model_name,
            'y_pred': y_pred_test,
            'y_true': y_test,
            'mae': mae_test,
            'rmse': rmse_test,
            'r2': r2_test,
            'mae_per_target': mae_per_target,
            'rmse_per_target': rmse_per_target,
            'r2_per_target': r2_per_target,
            'mae_normalized': mae_norm
        }
        
        return True

In [None]:
def detect_anomalies(self, contamination=0.05):
        """
        Detección de anomalías basada en residuales
        
        Parameters:
        -----------
        contamination : float
            Proporción esperada de anomalías
        """
        print(f"\n[8] DETECCIÓN DE ANOMALÍAS - {self.mixture_name}")
        print("-" * 80)
        
        # Calcular residuales en test
        y_true = self.results['test']['y_true']
        y_pred = self.results['test']['y_pred']
        residuals = y_true - y_pred
        
        print(f"  8.1 Anomalías basadas en reglas de señal:")
        print(f"  {'-'*60}")
        
        # Reglas simples en el conjunto de test procesado
        test_indices = range(len(self.df_processed) - len(y_true), len(self.df_processed))
        df_test = self.df_processed.iloc[test_indices].reset_index(drop=True)
        
        sensor_cols = [col for col in df_test.columns 
                      if col.startswith('Sensor_') and not any(x in col for x in 
                      ['smooth', 'lag', 'mean', 'std', 'max', 'min', 'diff'])]
        
        # Detectar flatlines (sin cambios)
        flatlines = 0
        for col in sensor_cols:
            changes = (df_test[col].diff().abs() < 0.01).sum()
            if changes / len(df_test) > 0.95:  # 95% sin cambios
                flatlines += 1
        
        print(f"    - Sensores con flatline detectados: {flatlines}/{len(sensor_cols)}")
        
        # Detectar saturación (valores extremos constantes)
        saturations = 0
        for col in sensor_cols:
            max_val = df_test[col].max()
            at_max = (df_test[col] > max_val * 0.98).sum()
            if at_max / len(df_test) > 0.1:  # 10% cerca del máximo
                saturations += 1
        
        print(f"    - Sensores con posible saturación: {saturations}/{len(sensor_cols)}")
        
        print(f"\n  8.2 Anomalías basadas en residuales:")
        print(f"  {'-'*60}")
        
        # Usar Isolation Forest en residuales
        iso_forest = IsolationForest(contamination=contamination, random_state=42)
        anomaly_labels = iso_forest.fit_predict(residuals)
        
        n_anomalies = (anomaly_labels == -1).sum()
        anomaly_pct = (n_anomalies / len(anomaly_labels)) * 100
        
        print(f"    - Anomalías detectadas: {n_anomalies} ({anomaly_pct:.2f}%)")
        print(f"    - Umbral de contaminación: {contamination*100:.2f}%")
        
        # Análisis de residuales
        residual_stats = {
            'mean': residuals.mean(axis=0),
            'std': residuals.std(axis=0),
            'max_abs': np.abs(residuals).max(axis=0)
        }
        
        print(f"\n  8.3 Estadísticas de residuales por gas:")
        for i, target in enumerate(self.data['target_cols']):
            print(f"    {target}:")
            print(f"      Media: {residual_stats['mean'][i]:.4f} ppm")
            print(f"      Std: {residual_stats['std'][i]:.4f} ppm")
            print(f"      Max abs: {residual_stats['max_abs'][i]:.4f} ppm")
        
        # Guardar resultados de anomalías
        self.results['anomalies'] = {
            'flatlines': flatlines,
            'saturations': saturations,
            'isolation_forest_labels': anomaly_labels,
            'n_anomalies': n_anomalies,
            'residuals': residuals,
            'residual_stats': residual_stats
        }
        
        return True

In [None]:
def test_robustness(self, failure_rates=[0.1, 0.2, 0.3], n_trials=5):
        """
        Probar robustez ante fallos de sensores
        
        Parameters:
        -----------
        failure_rates : list
            Tasas de fallo a probar (proporción de sensores deshabilitados)
        n_trials : int
            Número de pruebas por tasa de fallo
        """
        print(f"\n[9] PRUEBAS DE ROBUSTEZ - {self.mixture_name}")
        print("-" * 80)
        
        X_test = self.data['X_test'].copy()
        y_test = self.data['y_test']
        best_model = self.models[self.results['test']['best_model']]['model']
        
        baseline_mae = self.results['test']['mae']
        
        print(f"  MAE baseline (sin fallos): {baseline_mae:.4f} ppm")
        print(f"\n  Simulando fallos de sensores:")
        print(f"  {'-'*60}")
        
        robustness_results = []
        
        for failure_rate in failure_rates:
            maes = []
            
            for trial in range(n_trials):
                # Copiar datos de test
                X_test_failed = X_test.copy()
                
                # Deshabilitar aleatoriamente sensores (poner a 0)
                n_features = X_test.shape[1]
                n_failures = int(n_features * failure_rate)
                failed_indices = np.random.choice(n_features, n_failures, replace=False)
                X_test_failed[:, failed_indices] = 0
                
                # Predecir con sensores fallados
                y_pred_failed = best_model.predict(X_test_failed)
                
                # Calcular MAE
                mae_failed = mean_absolute_error(y_test, y_pred_failed)
                maes.append(mae_failed)
            
            # Estadísticas
            mean_mae = np.mean(maes)
            std_mae = np.std(maes)
            degradation = ((mean_mae - baseline_mae) / baseline_mae) * 100
            
            print(f"    Tasa de fallo {failure_rate*100:.0f}%:")
            print(f"      MAE medio: {mean_mae:.4f} ± {std_mae:.4f} ppm")
            print(f"      Degradación: {degradation:.2f}%")
            
            robustness_results.append({
                'failure_rate': failure_rate,
                'mean_mae': mean_mae,
                'std_mae': std_mae,
                'degradation_pct': degradation
            })
        
        self.results['robustness'] = robustness_results
        
        return True

In [None]:
def generate_visualizations(self, output_dir='/home/claude/visualizations'):
        """
        Generar visualizaciones del análisis
        """
        print(f"\n[10] GENERANDO VISUALIZACIONES - {self.mixture_name}")
        print("-" * 80)
        
        import os
        os.makedirs(output_dir, exist_ok=True)
        
        # 1. Evolución temporal de concentraciones
        print(f"  - Generando gráfica de evolución temporal...")
        fig, axes = plt.subplots(2, 1, figsize=(14, 8))
        
        time_subset = self.df['Time_s'][:10000]  # Primeros 10k puntos
        
        axes[0].plot(time_subset, self.df['CO_or_CH4_ppm'][:10000], 
                    label='CO/CH₄', linewidth=0.8)
        axes[0].set_ylabel('Concentración (ppm)', fontsize=11)
        axes[0].set_title(f'Evolución temporal de concentraciones - {self.mixture_name}', 
                         fontsize=13, fontweight='bold')
        axes[0].legend()
        axes[0].grid(True, alpha=0.3)
        
        axes[1].plot(time_subset, self.df['Ethylene_ppm'][:10000], 
                    label='Etileno', color='orange', linewidth=0.8)
        axes[1].set_xlabel('Tiempo (s)', fontsize=11)
        axes[1].set_ylabel('Concentración (ppm)', fontsize=11)
        axes[1].legend()
        axes[1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(f'{output_dir}/01_evolucion_temporal_{self.mixture_name}.png', 
                   dpi=300, bbox_inches='tight')
        plt.close()
        
        # 2. Comparación de modelos
        print(f"  - Generando gráfica de comparación de modelos...")
        fig, ax = plt.subplots(figsize=(10, 6))
        
        model_names = list(self.models.keys())
        mae_values = [self.models[name]['metrics']['mae'] for name in model_names]
        rmse_values = [self.models[name]['metrics']['rmse'] for name in model_names]
        
        x = np.arange(len(model_names))
        width = 0.35
        
        ax.bar(x - width/2, mae_values, width, label='MAE', alpha=0.8)
        ax.bar(x + width/2, rmse_values, width, label='RMSE', alpha=0.8)
        
        ax.set_xlabel('Modelo', fontsize=11)
        ax.set_ylabel('Error (ppm)', fontsize=11)
        ax.set_title(f'Comparación de modelos - {self.mixture_name}', 
                    fontsize=13, fontweight='bold')
        ax.set_xticks(x)
        ax.set_xticklabels(model_names, rotation=15, ha='right')
        ax.legend()
        ax.grid(True, alpha=0.3, axis='y')
        
        plt.tight_layout()
        plt.savefig(f'{output_dir}/02_comparacion_modelos_{self.mixture_name}.png', 
                   dpi=300, bbox_inches='tight')
        plt.close()
        
        # 3. Predicciones vs Real (Test)
        print(f"  - Generando gráfica de predicciones vs real...")
        fig, axes = plt.subplots(1, 2, figsize=(14, 6))
        
        y_true = self.results['test']['y_true']
        y_pred = self.results['test']['y_pred']
        
        for i, target in enumerate(self.data['target_cols']):
            axes[i].scatter(y_true[:, i], y_pred[:, i], alpha=0.5, s=10)
            axes[i].plot([y_true[:, i].min(), y_true[:, i].max()], 
                        [y_true[:, i].min(), y_true[:, i].max()], 
                        'r--', linewidth=2, label='Perfecto')
            axes[i].set_xlabel('Real (ppm)', fontsize=11)
            axes[i].set_ylabel('Predicho (ppm)', fontsize=11)
            axes[i].set_title(f'{target}\nR²={self.results["test"]["r2_per_target"][i]:.4f}', 
                            fontsize=12)
            axes[i].legend()
            axes[i].grid(True, alpha=0.3)
        
        plt.suptitle(f'Predicciones vs Real - {self.mixture_name}', 
                    fontsize=14, fontweight='bold')
        plt.tight_layout()
        plt.savefig(f'{output_dir}/03_predicciones_vs_real_{self.mixture_name}.png', 
                   dpi=300, bbox_inches='tight')
        plt.close()
        
        # 4. Distribución de residuales
        print(f"  - Generando gráfica de distribución de residuales...")
        fig, axes = plt.subplots(1, 2, figsize=(14, 6))
        
        residuals = self.results['anomalies']['residuals']
        
        for i, target in enumerate(self.data['target_cols']):
            axes[i].hist(residuals[:, i], bins=50, alpha=0.7, edgecolor='black')
            axes[i].axvline(0, color='red', linestyle='--', linewidth=2, label='Cero')
            axes[i].set_xlabel('Residual (ppm)', fontsize=11)
            axes[i].set_ylabel('Frecuencia', fontsize=11)
            axes[i].set_title(f'{target}\nμ={residuals[:, i].mean():.4f}, σ={residuals[:, i].std():.4f}', 
                            fontsize=12)
            axes[i].legend()
            axes[i].grid(True, alpha=0.3, axis='y')
        
        plt.suptitle(f'Distribución de residuales - {self.mixture_name}', 
                    fontsize=14, fontweight='bold')
        plt.tight_layout()
        plt.savefig(f'{output_dir}/04_distribucion_residuales_{self.mixture_name}.png', 
                   dpi=300, bbox_inches='tight')
        plt.close()
        
        # 5. Robustez ante fallos
        print(f"  - Generando gráfica de robustez...")
        fig, ax = plt.subplots(figsize=(10, 6))
        
        failure_rates = [r['failure_rate']*100 for r in self.results['robustness']]
        mean_maes = [r['mean_mae'] for r in self.results['robustness']]
        std_maes = [r['std_mae'] for r in self.results['robustness']]
        
        ax.errorbar(failure_rates, mean_maes, yerr=std_maes, 
                   marker='o', markersize=8, capsize=5, linewidth=2)
        ax.axhline(self.results['test']['mae'], color='red', linestyle='--', 
                  linewidth=2, label='MAE sin fallos')
        ax.set_xlabel('Tasa de fallo de sensores (%)', fontsize=11)
        ax.set_ylabel('MAE (ppm)', fontsize=11)
        ax.set_title(f'Robustez ante fallos de sensores - {self.mixture_name}', 
                    fontsize=13, fontweight='bold')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(f'{output_dir}/05_robustez_fallos_{self.mixture_name}.png', 
                   dpi=300, bbox_inches='tight')
        plt.close()
        
        print(f"\n  ✓ Visualizaciones guardadas en: {output_dir}")
        
        return True

In [None]:
datasets = [
        {
            'filepath': 'datasets/ethylene_CO.txt',
            'mixture_name': 'Etileno_CO'
        },
        {
            'filepath': 'datasets/ethylene_methane.txt',
            'mixture_name': 'Etileno_Metano'
        }
    ]
    
all_results = {}
    
for dataset_info in datasets:
    filepath = dataset_info['filepath']
    mixture_name = dataset_info['mixture_name']
    
    print(f"\n\n{'='*80}")
    print(f"PROCESANDO: {mixture_name}")
    print(f"{'='*80}\n")
    
    # Verificar si el archivo existe
    import os
    if not os.path.exists(filepath):
        print(f"⚠ ADVERTENCIA: Archivo no encontrado: {filepath}")
        print(f"  Por favor, descarga el dataset desde:")
        print(f"  https://archive.ics.uci.edu/dataset/322")
        print(f"  Y coloca los archivos .txt en /home/claude/\n")
        continue
    
  