In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers, backend as K
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import PowerTransformer, MaxAbsScaler
from sklearn.metrics import precision_recall_curve, auc, roc_curve, roc_auc_score
from scipy import stats
from scipy.stats import norm
from sklearn.neighbors import KernelDensity
from sklearn.model_selection import GridSearchCV
import joblib
import time
import logging
import warnings
import json

# Setup logging and suppress warnings
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('Layer1_VAE')

# Create directories
os.makedirs('plots', exist_ok=True)
os.makedirs('models', exist_ok=True)
os.makedirs('reports', exist_ok=True)

class Layer1AutoencoderVAE:
    def __init__(self, input_dim, latent_dim=6, learning_rate=1e-4, layer_sizes=None):
        """Initialize the VAE model with configurable architecture"""
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.learning_rate = learning_rate
        self.layer_sizes = layer_sizes or [64, 32]  # Default larger network
        self.encoder = None
        self.decoder = None
        self.vae = None
        self.kde = None
        self.threshold = None
        self.fallback_threshold = None  # Added for robustness
        self.reconstruction_loss_tracker = keras.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.build_model()
        
    def sampling(self, args):
        """Reparameterization trick for VAE"""
        z_mean, z_log_var = args
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.random.normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon
        
    def build_model(self):
        """Build the VAE with customizable architecture"""
        # Encoder
        encoder_inputs = layers.Input(shape=(self.input_dim,))
        x = encoder_inputs
        
        for size in self.layer_sizes:
            x = layers.Dense(size, activation="relu", 
                             kernel_regularizer=regularizers.l2(1e-4))(x)
            x = layers.BatchNormalization()(x)
            x = layers.Dropout(0.2)(x)
        
        # VAE latent space
        z_mean = layers.Dense(self.latent_dim, name="z_mean")(x)
        z_log_var = layers.Dense(self.latent_dim, name="z_log_var")(x)
        z = layers.Lambda(self.sampling, output_shape=(self.latent_dim,), name="z")([z_mean, z_log_var])
        
        # Instantiate encoder
        self.encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
        
        # Decoder
        latent_inputs = layers.Input(shape=(self.latent_dim,))
        x = latent_inputs
        
        for size in reversed(self.layer_sizes):
            x = layers.Dense(size, activation="relu", 
                             kernel_regularizer=regularizers.l2(1e-4))(x)
            x = layers.BatchNormalization()(x)
            x = layers.Dropout(0.2)(x)
        
        decoder_outputs = layers.Dense(self.input_dim, activation="sigmoid")(x)
        
        # Instantiate decoder
        self.decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
        
        # Instantiate VAE model
        outputs = self.decoder(self.encoder(encoder_inputs)[2])
        self.vae = keras.Model(encoder_inputs, outputs, name="vae")
        
        # Define VAE loss with beta parameter for KL term weighting
        beta = 1.0  # Can be adjusted to control KL weight
        reconstruction_loss = keras.losses.MeanSquaredError()(encoder_inputs, outputs)
        reconstruction_loss *= self.input_dim
        kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
        vae_loss = K.mean(reconstruction_loss + beta * kl_loss)
        
        self.vae.add_loss(vae_loss)
        self.vae.compile(optimizer=keras.optimizers.Adam(learning_rate=self.learning_rate))
        
        # Custom metrics
        self.vae.metrics_names.append("reconstruction_loss")
        self.vae.metrics_names.append("kl_loss")
        self.vae.metrics.append(self.reconstruction_loss_tracker)
        self.vae.metrics.append(self.kl_loss_tracker)
        
    def train(self, X_train, X_val, epochs=100, batch_size=32):
        """Train the VAE model with early stopping and LR reduction"""
        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss', patience=15, restore_best_weights=True
        )
        
        reduce_lr = keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss', factor=0.5, patience=7, min_lr=1e-7
        )
        
        tensorboard_callback = keras.callbacks.TensorBoard(
            log_dir=f'./logs/vae_{time.strftime("%Y%m%d-%H%M%S")}',
            histogram_freq=1
        )
        
        class VAECallback(keras.callbacks.Callback):
            def __init__(self, parent):
                super(VAECallback, self).__init__()
                self.parent = parent
                
            def on_epoch_end(self, epoch, logs=None):
                # Track separate loss components
                x_val_reconstructed = self.model.predict(X_val)
                reconstruction_loss = np.mean(np.square(X_val - x_val_reconstructed))
                z_mean, z_log_var, _ = self.parent.encoder.predict(X_val)
                kl_loss = -0.5 * np.mean(np.sum(1 + z_log_var - np.square(z_mean) - np.exp(z_log_var), axis=1))
                
                # Update metrics
                self.parent.reconstruction_loss_tracker.update_state(reconstruction_loss)
                self.parent.kl_loss_tracker.update_state(kl_loss)
                
                logs['reconstruction_loss'] = reconstruction_loss
                logs['kl_loss'] = kl_loss
        
        vae_callback = VAECallback(self)
        
        logger.info("Starting VAE training...")
        history = self.vae.fit(
            X_train, X_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_val, X_val),
            callbacks=[early_stopping, reduce_lr, tensorboard_callback, vae_callback],
            verbose=1
        )
        
        logger.info("VAE training completed.")
        self._set_dynamic_threshold(X_train)
        return history
    
    def _set_dynamic_threshold(self, X_data):
        """Set robust thresholds using multiple methods"""
        # Compute reconstruction errors
        _, _, z = self.encoder.predict(X_data)
        reconstructed = self.decoder.predict(z)
        mse = np.mean(np.square(X_data - reconstructed), axis=1)
        
        # Set percentile-based fallback threshold (90th-99th percentile)
        self.fallback_threshold = np.percentile(mse, 97.5)
        
        # Optimize KDE bandwidth using grid search
        param_grid = {'bandwidth': np.logspace(-2, 1, 10)}
        grid_search = GridSearchCV(KernelDensity(kernel='gaussian'), param_grid, cv=5)
        grid_search.fit(mse.reshape(-1, 1))
        
        # Use the optimized bandwidth
        best_bandwidth = grid_search.best_params_['bandwidth']
        self.kde = KernelDensity(kernel='gaussian', bandwidth=best_bandwidth).fit(mse.reshape(-1, 1))
        
        log_dens = self.kde.score_samples(mse.reshape(-1, 1))
        scores = -log_dens
        
        # Robust elbow finding
        try:
            sorted_scores = np.sort(scores)
            n_samples = len(sorted_scores)
            
            if n_samples < 10:  # Not enough samples for reliable elbow detection
                self.threshold = self.fallback_threshold
            else:
                indices = np.arange(n_samples)
                
                # Use window averaging for more stable angle calculation
                window_size = max(3, int(n_samples * 0.02))
                angles = []
                
                for i in range(window_size, n_samples - window_size):
                    # Use windowed points for more stability
                    p1 = np.array([indices[i-window_size]/n_samples, sorted_scores[i-window_size]])
                    p2 = np.array([indices[i]/n_samples, sorted_scores[i]])
                    p3 = np.array([indices[i+window_size]/n_samples, sorted_scores[i+window_size]])
                    
                    # Compute vectors
                    v1 = p2 - p1
                    v2 = p3 - p2
                    
                    # Normalize vectors
                    v1_norm = np.linalg.norm(v1)
                    v2_norm = np.linalg.norm(v2)
                    
                    if v1_norm > 0 and v2_norm > 0:
                        v1 = v1 / v1_norm
                        v2 = v2 / v2_norm
                        
                        # Compute angle using dot product
                        dot_product = np.dot(v1, v2)
                        angle = np.arccos(np.clip(dot_product, -1.0, 1.0))
                        angles.append(angle)
                    else:
                        angles.append(0)
                
                if len(angles) > 0 and max(angles) > 0.1:  # Check if we have meaningful angles
                    elbow_idx = np.argmax(angles) + window_size
                    adaptive_threshold = sorted_scores[elbow_idx]
                    
                    # Blend with percentile-based threshold for robustness
                    self.threshold = 0.7 * adaptive_threshold + 0.3 * self.fallback_threshold
                else:
                    self.threshold = self.fallback_threshold
        except Exception as e:
            logger.warning(f"Error in threshold calculation: {e}. Using fallback threshold.")
            self.threshold = self.fallback_threshold
        
        logger.info(f"Dynamic threshold: {self.threshold:.6f} (fallback: {self.fallback_threshold:.6f})")
        
        # Visualize the threshold
        plt.figure(figsize=(10, 6))
        plt.hist(scores, bins=50, alpha=0.6, color='blue')
        plt.axvline(x=self.threshold, color='red', linestyle='--', label=f'Threshold: {self.threshold:.6f}')
        plt.axvline(x=self.fallback_threshold, color='green', linestyle=':', label=f'Fallback: {self.fallback_threshold:.6f}')
        plt.title('Anomaly Score Distribution and Thresholds')
        plt.xlabel('Anomaly Score (-log density)')
        plt.ylabel('Frequency')
        plt.legend()
        plt.savefig('plots/anomaly_threshold.png')
        plt.close()
    
    def detect_anomalies(self, X_data):
        """Detect anomalies in the data"""
        if self.kde is None or self.threshold is None:
            raise ValueError("Model hasn't been trained yet. Call train() first.")
        
        # Get latent representations and reconstructions
        _, _, z = self.encoder.predict(X_data)
        reconstructed = self.decoder.predict(z)
        
        # Compute reconstruction error (MSE)
        mse = np.mean(np.square(X_data - reconstructed), axis=1)
        
        # Compute log density and anomaly scores
        log_dens = self.kde.score_samples(mse.reshape(-1, 1))
        anomaly_scores = -log_dens
        
        # Identify anomalies
        anomaly_indices = np.where(anomaly_scores > self.threshold)[0]
        anomalies = X_data[anomaly_indices]
        
        # Compute confidence
        max_score = np.max(anomaly_scores)
        min_score = np.min(anomaly_scores)
        confidence = (anomaly_scores - min_score) / (max_score - min_score) if max_score > min_score else np.zeros_like(anomaly_scores)
        
        return anomalies, anomaly_indices, anomaly_scores, confidence
    
    def get_encoded_features(self, X_data):
        """Extract features from the encoder's latent space"""
        _, _, z = self.encoder.predict(X_data)
        return z
    
    def save_model(self, base_path='models'):
        """Save the model and artifacts"""
        timestamp = time.strftime("%Y%m%d-%H%M%S")
        
        # Save full VAE model
        self.vae.save(f'{base_path}/layer1_model_{timestamp}.h5')
        self.encoder.save(f'{base_path}/layer1_encoder_{timestamp}.h5')
        self.decoder.save(f'{base_path}/layer1_decoder_{timestamp}.h5')
        
        # Create symlinks to latest models
        for model_type in ['model', 'encoder', 'decoder']:
            latest_link = f'{base_path}/layer1_{model_type}.h5'
            if os.path.exists(latest_link):
                os.remove(latest_link)
            os.symlink(f'layer1_{model_type}_{timestamp}.h5', latest_link)
        
        # Save threshold and metadata
        model_config = {
            'input_dim': self.input_dim,
            'latent_dim': self.latent_dim,
            'layer_sizes': self.layer_sizes,
            'threshold': float(self.threshold),
            'fallback_threshold': float(self.fallback_threshold),
            'timestamp': timestamp
        }
        
        with open(f'{base_path}/layer1_config_{timestamp}.json', 'w') as f:
            json.dump(model_config, f, indent=4)
        
        # Save the KDE model
        joblib.dump(self.kde, f'{base_path}/layer1_kde_{timestamp}.pkl')
        joblib.dump(self.kde, f'{base_path}/layer1_kde.pkl')
        
        return timestamp

def analyze_features(data, save_dir='plots'):
    """Analyze feature distributions and create visualizations"""
    # Create feature distribution plots
    plt.figure(figsize=(15, 10))
    
    features = data.columns
    num_features = len(features)
    rows = int(np.ceil(num_features / 3))
    
    for i, feature in enumerate(features):
        plt.subplot(rows, 3, i+1)
        sns.histplot(data[feature], kde=True)
        plt.title(f'{feature} Distribution')
        plt.tight_layout()
    
    plt.savefig(f'{save_dir}/feature_distributions.png')
    plt.close()
    
    # Feature correlation heatmap
    plt.figure(figsize=(12, 10))
    corr_matrix = data.corr()
    mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
    sns.heatmap(corr_matrix, mask=mask, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
    plt.title('Feature Correlation Heatmap')
    plt.tight_layout()
    plt.savefig(f'{save_dir}/feature_correlations.png')
    plt.close()
    
    # Calculate feature variances
    variances = data.var().sort_values(ascending=False)
    
    plt.figure(figsize=(12, 6))
    sns.barplot(x=variances.index, y=variances.values)
    plt.title('Feature Variance')
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.savefig(f'{save_dir}/feature_variances.png')
    plt.close()
    
    # Identify low variance features
    low_var_threshold = 0.01
    low_var_features = variances[variances < low_var_threshold].index.tolist()
    
    return {
        'variances': variances,
        'low_var_features': low_var_features,
        'correlation_matrix': corr_matrix
    }

def find_optimal_latent_dim(X_train, X_val, input_dim, min_dim=3, max_dim=15):
    """Use k-fold cross-validation to find optimal latent dimension"""
    logger.info("Finding optimal latent dimension...")
    
    # Define candidate dimensions to test
    if input_dim <= 10:
        candidate_dims = list(range(min_dim, min(max_dim, input_dim) + 1))
    else:
        # Test a range with more focus on smaller dimensions
        candidate_dims = list(range(min_dim, min(8, input_dim // 2) + 1))
        candidate_dims += [min(d, input_dim // 2) for d in [10, 12, 15]]
    
    # Remove duplicates and sort
    candidate_dims = sorted(list(set(candidate_dims)))
    
    # Combine train and validation for k-fold
    X_combined = np.vstack([X_train, X_val])
    
    results = []
    kf = KFold(n_splits=3, shuffle=True, random_state=42)
    
    for latent_dim in candidate_dims:
        fold_losses = []
        
        for train_idx, val_idx in kf.split(X_combined):
            X_fold_train, X_fold_val = X_combined[train_idx], X_combined[val_idx]
            
            # Train a smaller model for quick evaluation
            model = Layer1AutoencoderVAE(
                input_dim=input_dim, 
                latent_dim=latent_dim,
                layer_sizes=[32, 16],  # Smaller network for quick evaluation
                learning_rate=1e-3
            )
            
            # Train with fewer epochs for efficiency
            history = model.train(
                X_fold_train, X_fold_val, 
                epochs=30, 
                batch_size=64
            )
            
            # Get the best validation loss
            best_val_loss = min(history.history['val_loss'])
            fold_losses.append(best_val_loss)
        
        # Average loss across folds
        avg_loss = np.mean(fold_losses)
        logger.info(f"Latent dim {latent_dim}: avg validation loss = {avg_loss:.6f}")
        results.append((latent_dim, avg_loss))
    
    # Find dimension with lowest loss
    results.sort(key=lambda x: x[1])
    best_dim = results[0][0]
    
    # Visualize dimension search
    dims, losses = zip(*results)
    plt.figure(figsize=(10, 6))
    plt.plot(dims, losses, 'o-')
    plt.axvline(x=best_dim, color='red', linestyle='--')
    plt.title(f'Latent Dimension Optimization (Best: {best_dim})')
    plt.xlabel('Latent Dimension')
    plt.ylabel('Validation Loss')
    plt.grid(True)
    plt.savefig('plots/latent_dim_optimization.png')
    plt.close()
    
    logger.info(f"Optimal latent dimension: {best_dim}")
    return best_dim

def generate_evaluation_plots(model, X_normal, X_test=None, y_test=None, save_dir='plots'):
    """Generate evaluation plots for the model"""
    # Reconstruction error distribution for normal data
    _, _, z_normal = model.encoder.predict(X_normal)
    X_normal_reconstructed = model.decoder.predict(z_normal)
    normal_mse = np.mean(np.square(X_normal - X_normal_reconstructed), axis=1)
    
    plt.figure(figsize=(10, 6))
    sns.histplot(normal_mse, kde=True, color='blue', label='Normal')
    
    # If test data with labels is available
    if X_test is not None and y_test is not None:
        _, _, z_test = model.encoder.predict(X_test)
        X_test_reconstructed = model.decoder.predict(z_test)
        test_mse = np.mean(np.square(X_test - X_test_reconstructed), axis=1)
        
        # Separate normal and anomaly in test set
        if np.sum(y_test) > 0:  # If we have anomalies
            anomaly_mse = test_mse[y_test == 1]
            sns.histplot(anomaly_mse, kde=True, color='red', alpha=0.6, label='Anomaly')
    
    plt.axvline(x=model.threshold, color='green', linestyle='--', 
                label=f'Threshold: {model.threshold:.6f}')
    plt.title('Reconstruction Error Distribution')
    plt.xlabel('Mean Squared Error')
    plt.ylabel('Frequency')
    plt.legend()
    plt.savefig(f'{save_dir}/reconstruction_error_dist.png')
    plt.close()
    
    # If test data with labels is available, generate ROC and PR curves
    if X_test is not None and y_test is not None and np.sum(y_test) > 0:
        # Get anomaly scores for test data
        _, _, anomaly_scores, _ = model.detect_anomalies(X_test)
        
        # ROC curve
        fpr, tpr, _ = roc_curve(y_test, anomaly_scores)
        roc_auc = auc(fpr, tpr)
        
        plt.figure(figsize=(10, 6))
        plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic')
        plt.legend(loc="lower right")
        plt.savefig(f'{save_dir}/roc_curve.png')
        plt.close()
        
        # Precision-Recall curve
        precision, recall, _ = precision_recall_curve(y_test, anomaly_scores)
        pr_auc = auc(recall, precision)
        
        plt.figure(figsize=(10, 6))
        plt.plot(recall, precision, color='blue', lw=2, label=f'PR curve (area = {pr_auc:.2f})')
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('Precision-Recall Curve')
        plt.legend(loc="lower left")
        plt.savefig(f'{save_dir}/pr_curve.png')
        plt.close()
        
        # Latent space visualization (2D projection if latent_dim > 2)
        _, _, z = model.encoder.predict(X_test)
        
        if model.latent_dim >= 2:
            plt.figure(figsize=(10, 8))
            if np.sum(y_test == 0) > 0:
                plt.scatter(z[y_test == 0, 0], z[y_test == 0, 1], c='blue', alpha=0.5, label='Normal')
            if np.sum(y_test == 1) > 0:
                plt.scatter(z[y_test == 1, 0], z[y_test == 1, 1], c='red', alpha=0.5, label='Anomaly')
            plt.title('Latent Space Visualization (First 2 Dimensions)')
            plt.xlabel('Latent Dim 1')
            plt.ylabel('Latent Dim 2')
            plt.legend()
            plt.savefig(f'{save_dir}/latent_space_2d.png')
            plt.close()

def generate_report(model, history, feature_analysis, data_info, timestamp, save_dir='reports'):
    """Generate a summary report of the model training and evaluation"""
    report = []
    report.append("=" * 80)
    report.append("LAYER 1 AUTOENCODER-VAE MODEL SUMMARY REPORT")
    report.append("=" * 80)
    report.append(f"Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}")
    report.append(f"Model timestamp: {timestamp}")
    report.append("\n")
    
    # Data information
    report.append("DATA INFORMATION")
    report.append("-" * 80)
    report.append(f"Total samples: {data_info['total_samples']}")
    report.append(f"Training samples: {data_info['train_samples']}")
    report.append(f"Validation samples: {data_info['val_samples']}")
    report.append(f"Input features: {data_info['n_features']}")
    report.append(f"Feature names: {', '.join(data_info['feature_names'])}")
    report.append("\n")
    
    # Feature analysis
    report.append("FEATURE ANALYSIS")
    report.append("-" * 80)
    report.append("Top 5 features by variance:")
    top_var_features = feature_analysis['variances'].head(5)
    for feature, var in top_var_features.items():
        report.append(f"  - {feature}: {var:.6f}")
    
    report.append("\nLow variance features:")
    for feature in feature_analysis['low_var_features']:
        report.append(f"  - {feature}: {feature_analysis['variances'][feature]:.6f}")
    report.append("\n")
    
    # Model architecture
    report.append("MODEL ARCHITECTURE")
    report.append("-" * 80)
    report.append(f"Input dimension: {model.input_dim}")
    report.append(f"Latent dimension: {model.latent_dim}")
    report.append(f"Hidden layer sizes: {model.layer_sizes}")
    report.append(f"Learning rate: {model.learning_rate}")
    report.append("\n")
    
    # Training performance
    report.append("TRAINING PERFORMANCE")
    report.append("-" * 80)
    report.append(f"Final training loss: {history.history['loss'][-1]:.6f}")
    report.append(f"Final validation loss: {history.history['val_loss'][-1]:.6f}")
    report.append(f"Final reconstruction loss: {history.history['reconstruction_loss'][-1]:.6f}")
    report.append(f"Final KL loss: {history.history['kl_loss'][-1]:.6f}")
    report.append(f"Training epochs: {len(history.history['loss'])}")
    report.append("\n")
    
    # Anomaly detection
    report.append("ANOMALY DETECTION")
    report.append("-" * 80)
    report.append(f"Dynamic threshold: {model.threshold:.6f}")
    report.append(f"Fallback threshold: {model.fallback_threshold:.6f}")
    report.append(f"Threshold method: Kernel Density Estimation with robust elbow finding")
    report.append("\n")
    
    # Saved artifacts
    report.append("SAVED ARTIFACTS")
    report.append("-" * 80)
    report.append(f"Full model: models/layer1_model_{timestamp}.h5")
    report.append(f"Encoder model: models/layer1_encoder_{timestamp}.h5")
    report.append(f"Decoder model: models/layer1_decoder_{timestamp}.h5")
    report.append(f"KDE model: models/layer1_kde_{timestamp}.pkl")
    report.append(f"Configuration: models/layer1_config_{timestamp}.json")
    report.append("\n")
    
    # Write report to file
    with open(f"{save_dir}/layer1_report_{timestamp}.txt", "w") as f:
        f.write("\n".join(report))
    
    # Create symlink to latest report
    latest_report = f"{save_dir}/layer1_report_latest.txt"
    if os.path.exists(latest_report):
        os.remove(latest_report)
    os.symlink(f"layer1_report_{timestamp}.txt", latest_report)
    
    return report

def main():
    start_time = time.time()
    np.random.seed(42)
    tf.random.set_seed(42)
    
    logger.info("Loading dataset...")
    dataset_path = "layer1_training_data.csv"
    
    try:
        df = pd.read_csv(dataset_path)
        logger.info(f"Loaded dataset with {len(df)} samples and {len(df.columns)} features")
    except Exception as e:
        logger.error(f"Error loading dataset: {e}")
        logger.info("Using sample data instead...")
        sample_data = """tcp.dstport_category,mbtcp.trans_id,tcp.ack,mqtt.ver,tcp.connection.synack,mbtcp.len,mqtt.conflags,mqtt.conack.flags,tcp.connection.rst,http.tls_port,tcp.srcport,tcp.connection.fin,mqtt.hdrflags
0.5,0.0,1.2316824563829088e-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7660298471022675,0.0,0.8
0.5,0.0,1.4780189476594907e-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9434339426862391,0.0,0.0
1.0,0.0,3.1824211308021596e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7660298471022675,0.0,0.8
0.5,0.0,1.2316824563829088e-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7660298471022675,0.0,0.8"""
        df = pd.read_csv(pd.StringIO(sample_data))
        
    # Preprocess data
    logger.info("Preprocessing data...")
    
    # Drop columns with zero variance
    var = df.var()
    zero_var_cols = var[var == 0].index.tolist()
    if zero_var_cols:
        logger.info(f"Dropping {len(zero_var_cols)} zero-variance columns: {zero_var_cols}")
        df = df.drop(columns=zero_var_cols)
    
    # Handle NaN values
    if df.isna().any().any():
        logger.info("Filling NaN values with 0")
        df = df.fillna(0)
    
    # Scale features to [0, 1] range for VAE
    scaler = MaxAbsScaler()
    X_scaled = scaler.fit_transform(df)
    
    # Save scaler for future use
    joblib.dump(scaler, 'models/layer1_scaler.pkl')
    
    # Split data for training
    X_train, X_val = train_test_split(X_scaled, test_size=0.2, random_state=42)
    
    # Analyze features
    logger.info("Analyzing features...")
    feature_analysis = analyze_features(df)
    
    # Find optimal latent dimension
    input_dim = X_train.shape[1]
    best_latent_dim = find_optimal_latent_dim(X_train, X_val, input_dim)
    
    # Define model architecture based on input size
    if input_dim <= 10:
        layer_sizes = [32, 16]
    elif input_dim <= 20:
        layer_sizes = [64, 32, 16]
    else:
        layer_sizes = [128, 64, 32]
    
    # Create and train the model
    logger.info(f"Building VAE model with latent dim {best_latent_dim}...")
    model = Layer1AutoencoderVAE(
        input_dim=input_dim,
        latent_dim=best_latent_dim,
        layer_sizes=layer_sizes
    )
    
    logger.info("Training VAE model...")
    history = model.train(X_train, X_val, epochs=200, batch_size=64)
    
    # Generate evaluation plots
    logger.info("Generating evaluation plots...")
    generate_evaluation_plots(model, X_train)
    
    # Save the model
    logger.info("Saving model...")
    timestamp = model.save_model()
    
    # Prepare data info for report
    data_info = {
        'total_samples': len(df),
        'train_samples': len(X_train),
        'val_samples': len(X_val),
        'n_features': input_dim,
        'feature_names': df.columns.tolist()
    }
    
    # Generate and save report
    logger.info("Generating report...")
    report = generate_report(model, history, feature_analysis, data_info, timestamp)
    
    # Extract encoded features for Layer 2
    logger.info("Extracting encoded features for Layer 2...")
    encoded_features = model.get_encoded_features(X_scaled)
    
    # Save encoded features for Layer 2
    encoded_df = pd.DataFrame(
        encoded_features,
        columns=[f'vae_feature_{i}' for i in range(best_latent_dim)]
    )
    encoded_df.to_csv('layer1_encoded_features.csv', index=False)
    
    # Perform anomaly detection on training data (to demonstrate)
    logger.info("Running anomaly detection on training data...")
    anomalies, anomaly_indices, anomaly_scores, confidence = model.detect_anomalies(X_scaled)
    
    # Save anomaly detection results
    anomaly_results = pd.DataFrame({
        'anomaly_score': anomaly_scores,
        'confidence': confidence,
        'is_anomaly': anomaly_scores > model.threshold
    })
    anomaly_results.to_csv('layer1_anomaly_results.csv', index=False)
    
    # Print summary of anomalies found
    anomaly_count = len(anomaly_indices)
    logger.info(f"Found {anomaly_count} potential anomalies ({(anomaly_count/len(X_scaled))*100:.2f}%)")
    
    # Print execution time
    execution_time = time.time() - start_time
    logger.info(f"Total execution time: {execution_time:.2f} seconds")
    
    logger.info("Layer 1 VAE processing completed successfully!")

if __name__ == "__main__":
    main()

2025-03-22 12:10:24,689 - INFO - Loading dataset...
2025-03-22 12:10:24,793 - INFO - Loaded dataset with 150000 samples and 13 features
2025-03-22 12:10:24,795 - INFO - Preprocessing data...
2025-03-22 12:10:24,849 - INFO - Analyzing features...


: 

In [7]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers, backend as K
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MaxAbsScaler
from sklearn.metrics import precision_recall_curve, auc, roc_curve, roc_auc_score
from sklearn.neighbors import KernelDensity
from sklearn.model_selection import GridSearchCV
import joblib
import time
import logging
import warnings
import json

# Setup logging and suppress warnings
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('Layer1_VAE')

# Create directories
os.makedirs('plots', exist_ok=True)
os.makedirs('models', exist_ok=True)
os.makedirs('reports', exist_ok=True)

class Layer1AutoencoderVAE:
    def __init__(self, input_dim, latent_dim=6, learning_rate=1e-4, layer_sizes=None, beta=0.8):
        """Initialize the hybrid Autoencoder-VAE model with configurable architecture"""
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.learning_rate = learning_rate
        self.layer_sizes = layer_sizes or [64, 32]
        self.beta = beta  # KL weight factor
        self.encoder = None
        self.decoder = None
        self.ae_encoder = None  # Pure autoencoder encoder
        self.ae_decoder = None  # Pure autoencoder decoder
        self.vae = None
        self.kde = None
        self.threshold = None
        self.fallback_threshold = None
        self.reconstruction_loss_tracker = keras.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.build_model()
        
    def sampling(self, args):
        """Reparameterization trick for VAE"""
        z_mean, z_log_var = args
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.random.normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon
        
    def build_model(self):
        """Build the hybrid Autoencoder-VAE with dual reconstruction paths"""
        # Input layer
        encoder_inputs = keras.Input(shape=(self.input_dim,))
        
        # === PURE AUTOENCODER PATH ===
        ae_x = encoder_inputs
        
        # Encoder layers for pure autoencoder
        for i, size in enumerate(self.layer_sizes):
            ae_x = layers.Dense(
                size, 
                activation="relu",
                kernel_regularizer=regularizers.l2(1e-5),
                name=f"ae_encoder_dense_{i}"
            )(ae_x)
            ae_x = layers.BatchNormalization(name=f"ae_encoder_bn_{i}")(ae_x)
            
        # Pure autoencoder bottleneck (non-variational)
        ae_bottleneck = layers.Dense(
            self.latent_dim,
            activation="relu",
            name="ae_bottleneck"
        )(ae_x)
        
        # Decoder layers for pure autoencoder
        ae_decoded = ae_bottleneck
        for i, size in enumerate(reversed(self.layer_sizes)):
            ae_decoded = layers.Dense(
                size,
                activation="relu",
                kernel_regularizer=regularizers.l2(1e-5),
                name=f"ae_decoder_dense_{i}"
            )(ae_decoded)
            ae_decoded = layers.BatchNormalization(name=f"ae_decoder_bn_{i}")(ae_decoded)
            
        # Output layer for pure autoencoder
        ae_outputs = layers.Dense(
            self.input_dim, 
            activation="sigmoid",
            name="ae_output"
        )(ae_decoded)
        
        # Create pure autoencoder model
        self.ae_encoder = keras.Model(encoder_inputs, ae_bottleneck, name="ae_encoder")
        self.ae_decoder = keras.Model(ae_bottleneck, ae_outputs, name="ae_decoder")
        
        # === VAE PATH ===
        vae_x = encoder_inputs
        
        # Encoder layers for VAE
        for i, size in enumerate(self.layer_sizes):
            vae_x = layers.Dense(
                size, 
                activation="relu", 
                kernel_regularizer=regularizers.l2(1e-5),
                name=f"vae_encoder_dense_{i}"
            )(vae_x)
            vae_x = layers.BatchNormalization(name=f"vae_encoder_bn_{i}")(vae_x)
            vae_x = layers.Dropout(0.2, name=f"vae_encoder_dropout_{i}")(vae_x)
        
        # VAE latent space parameters
        z_mean = layers.Dense(self.latent_dim, name="z_mean")(vae_x)
        z_log_var = layers.Dense(self.latent_dim, name="z_log_var")(vae_x)
        
        # Sampling layer
        z = layers.Lambda(self.sampling, output_shape=(self.latent_dim,), name="z")([z_mean, z_log_var])
        
        # Instantiate encoder
        self.encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
        
        # Decoder for VAE
        latent_inputs = keras.Input(shape=(self.latent_dim,), name="decoder_input")
        vae_decoded = latent_inputs
        
        for i, size in enumerate(reversed(self.layer_sizes)):
            vae_decoded = layers.Dense(
                size, 
                activation="relu", 
                kernel_regularizer=regularizers.l2(1e-5),
                name=f"vae_decoder_dense_{i}"
            )(vae_decoded)
            vae_decoded = layers.BatchNormalization(name=f"vae_decoder_bn_{i}")(vae_decoded)
            vae_decoded = layers.Dropout(0.2, name=f"vae_decoder_dropout_{i}")(vae_decoded)
        
        # VAE output layer
        vae_outputs = layers.Dense(self.input_dim, activation="sigmoid", name="vae_output")(vae_decoded)
        
        # Instantiate decoder
        self.decoder = keras.Model(latent_inputs, vae_outputs, name="decoder")
        
        # Define full VAE model with custom loss
        class VAEModel(keras.Model):
            def __init__(self, encoder, decoder, ae_encoder, ae_decoder, beta=0.8, **kwargs):
                super(VAEModel, self).__init__(**kwargs)
                self.encoder = encoder
                self.decoder = decoder
                self.ae_encoder = ae_encoder
                self.ae_decoder = ae_decoder
                self.beta = beta
                self.total_loss_tracker = keras.metrics.Mean(name="loss")
                self.reconstruction_loss_tracker = keras.metrics.Mean(name="reconstruction_loss")
                self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
                self.ae_loss_tracker = keras.metrics.Mean(name="ae_loss")
                
            def call(self, inputs):
                # Get VAE outputs
                z_mean, z_log_var, z = self.encoder(inputs)
                vae_reconstructed = self.decoder(z)
                
                # Get pure AE outputs
                ae_bottleneck = self.ae_encoder(inputs)
                ae_reconstructed = self.ae_decoder(ae_bottleneck)
                
                # Combine both reconstructions
                return ae_reconstructed, vae_reconstructed
                
            def train_step(self, data):
                inputs = data
                
                with tf.GradientTape() as tape:
                    # Encode and decode using both paths
                    z_mean, z_log_var, z = self.encoder(inputs)
                    vae_reconstructed = self.decoder(z)
                    
                    ae_bottleneck = self.ae_encoder(inputs)
                    ae_reconstructed = self.ae_decoder(ae_bottleneck)
                    
                    # VAE reconstruction loss - fixed MSE calculation
                    vae_reconstruction_loss = tf.reduce_mean(
                        tf.reduce_sum(
                            tf.square(inputs - vae_reconstructed),
                            axis=1
                        )
                    )
                    
                    # Pure autoencoder loss - fixed MSE calculation
                    ae_loss = tf.reduce_mean(
                        tf.reduce_sum(
                            tf.square(inputs - ae_reconstructed),
                            axis=1
                        )
                    )
                    
                    # KL divergence loss
                    kl_loss = -0.5 * tf.reduce_mean(
                        tf.reduce_sum(
                            1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), 
                            axis=1
                        )
                    )
                    
                    # Hybrid loss: AE loss + VAE loss (reconstruction + weighted KL)
                    total_loss = ae_loss + vae_reconstruction_loss + self.beta * kl_loss
                    
                # Compute gradients
                grads = tape.gradient(total_loss, self.trainable_weights)
                self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
                
                # Update metrics
                self.total_loss_tracker.update_state(total_loss)
                self.reconstruction_loss_tracker.update_state(vae_reconstruction_loss)
                self.kl_loss_tracker.update_state(kl_loss)
                self.ae_loss_tracker.update_state(ae_loss)
                
                return {
                    "loss": self.total_loss_tracker.result(),
                    "reconstruction_loss": self.reconstruction_loss_tracker.result(),
                    "kl_loss": self.kl_loss_tracker.result(),
                    "ae_loss": self.ae_loss_tracker.result()
                }
                
            def test_step(self, data):
                inputs = data
                
                # Encode and decode
                z_mean, z_log_var, z = self.encoder(inputs)
                vae_reconstructed = self.decoder(z)
                
                ae_bottleneck = self.ae_encoder(inputs)
                ae_reconstructed = self.ae_decoder(ae_bottleneck)
                
                # VAE reconstruction loss - fixed MSE calculation
                vae_reconstruction_loss = tf.reduce_mean(
                    tf.reduce_sum(
                        tf.square(inputs - vae_reconstructed),
                        axis=1
                    )
                )
                
                # Pure autoencoder loss - fixed MSE calculation
                ae_loss = tf.reduce_mean(
                    tf.reduce_sum(
                        tf.square(inputs - ae_reconstructed),
                        axis=1
                    )
                )
                
                # KL divergence loss
                kl_loss = -0.5 * tf.reduce_mean(
                    tf.reduce_sum(
                        1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), 
                        axis=1
                    )
                )
                
                # Total loss
                total_loss = ae_loss + vae_reconstruction_loss + self.beta * kl_loss
                
                # Update metrics
                self.total_loss_tracker.update_state(total_loss)
                self.reconstruction_loss_tracker.update_state(vae_reconstruction_loss)
                self.kl_loss_tracker.update_state(kl_loss)
                self.ae_loss_tracker.update_state(ae_loss)
                
                return {
                    "loss": self.total_loss_tracker.result(),
                    "reconstruction_loss": self.reconstruction_loss_tracker.result(),
                    "kl_loss": self.kl_loss_tracker.result(),
                    "ae_loss": self.ae_loss_tracker.result()
                }
        
        # Instantiate the VAE model with custom loss
        self.vae = VAEModel(
            self.encoder, 
            self.decoder, 
            self.ae_encoder, 
            self.ae_decoder, 
            beta=self.beta,
            name="hybrid_vae"
        )
        
        # Compile the model
        self.vae.compile(optimizer=keras.optimizers.Adam(learning_rate=self.learning_rate))
    
    def train(self, X_train, X_val, epochs=100, batch_size=32):
        """Train the VAE model with early stopping and LR reduction"""
        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss', patience=15, restore_best_weights=True, verbose=1
        )
        
        reduce_lr = keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss', factor=0.5, patience=7, min_lr=1e-7, verbose=1
        )
        
        tensorboard_callback = keras.callbacks.TensorBoard(
            log_dir=f'./logs/vae_{time.strftime("%Y%m%d-%H%M%S")}',
            histogram_freq=1
        )
        
        logger.info("Starting hybrid Autoencoder-VAE training...")
        history = self.vae.fit(
            X_train, 
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_val, None),
            callbacks=[early_stopping, reduce_lr, tensorboard_callback],
            verbose=1
        )
        
        logger.info("Hybrid Autoencoder-VAE training completed.")
        self._set_dynamic_threshold(X_train)
        return history
    
    def _set_dynamic_threshold(self, X_data):
        """Set robust thresholds using KDE with percentile-based fallback"""
        # Get latent representations and reconstructions
        ae_bottleneck = self.ae_encoder.predict(X_data)
        ae_reconstructed = self.ae_decoder.predict(ae_bottleneck)
        
        _, _, z = self.encoder.predict(X_data)
        vae_reconstructed = self.decoder.predict(z)
        
        # Compute reconstruction error (weighted combination of AE and VAE)
        ae_mse = np.mean(np.square(X_data - ae_reconstructed), axis=1)
        vae_mse = np.mean(np.square(X_data - vae_reconstructed), axis=1)
        combined_mse = 0.4 * ae_mse + 0.6 * vae_mse  # Weight VAE slightly higher
        
        # Set multiple percentile-based fallback thresholds
        self.fallback_threshold = np.percentile(combined_mse, 99)
        conservative_threshold = np.percentile(combined_mse, 97.5)
        
        try:
            # Optimize KDE bandwidth using grid search
            param_grid = {'bandwidth': np.logspace(-3, 0, 10)}
            grid_search = GridSearchCV(KernelDensity(kernel='gaussian'), param_grid, cv=3)
            grid_search.fit(combined_mse.reshape(-1, 1))
            
            # Use the optimized bandwidth
            best_bandwidth = grid_search.best_params_['bandwidth']
            self.kde = KernelDensity(kernel='gaussian', bandwidth=best_bandwidth).fit(combined_mse.reshape(-1, 1))
            
            # Calculate density and anomaly scores
            log_dens = self.kde.score_samples(combined_mse.reshape(-1, 1))
            scores = -log_dens
            
            # Find threshold using robust elbow method
            sorted_scores = np.sort(scores)
            n_samples = len(sorted_scores)
            
            # Use smoother gradient calculation for more robust elbow detection
            if n_samples > 50:
                # Calculate gradient
                gradient = np.gradient(sorted_scores)
                # Calculate second derivative (change in gradient)
                grad2 = np.gradient(gradient)
                
                # Find where second derivative is maximized (the "elbow")
                elbow_idx = np.argmax(grad2)
                kde_threshold = sorted_scores[elbow_idx]
                
                # Blend KDE threshold with percentile-based threshold for robustness
                self.threshold = 0.7 * kde_threshold + 0.3 * self.fallback_threshold
                
                # Safety check - ensure threshold isn't too aggressive
                if self.threshold < conservative_threshold:
                    self.threshold = conservative_threshold
            else:
                # Not enough samples for reliable KDE - use fallback
                self.threshold = self.fallback_threshold
        except Exception as e:
            logger.warning(f"Error in KDE threshold calculation: {str(e)}. Using fallback threshold.")
            self.threshold = self.fallback_threshold
        
        logger.info(f"Dynamic threshold: {self.threshold:.6f} (fallback: {self.fallback_threshold:.6f})")
        
        # Visualize the threshold
        plt.figure(figsize=(10, 6))
        plt.hist(combined_mse, bins=50, alpha=0.6, color='blue', density=True)
        plt.axvline(x=self.threshold, color='red', linestyle='--', 
                    label=f'Threshold: {self.threshold:.6f}')
        plt.axvline(x=self.fallback_threshold, color='green', linestyle=':', 
                    label=f'Fallback: {self.fallback_threshold:.6f}')
        
        # Plot KDE curve if available
        if self.kde is not None:
            x_plot = np.linspace(0, max(combined_mse) * 1.1, 1000).reshape(-1, 1)
            log_dens = self.kde.score_samples(x_plot)
            plt.plot(x_plot, np.exp(log_dens), '-', color='purple', lw=2, 
                     label='KDE Estimate')
        
        plt.title('Anomaly Score Distribution and Thresholds')
        plt.xlabel('Reconstruction Error (MSE)')
        plt.ylabel('Density')
        plt.legend()
        plt.savefig('plots/anomaly_threshold.png')
        plt.close()
    
    def detect_anomalies(self, X_data):
        """Detect anomalies using both AE and VAE reconstruction errors"""
        if self.kde is None or self.threshold is None:
            raise ValueError("Model hasn't been trained yet. Call train() first.")
        
        # Get reconstructions from both models
        ae_bottleneck = self.ae_encoder.predict(X_data)
        ae_reconstructed = self.ae_decoder.predict(ae_bottleneck)
        
        _, _, z = self.encoder.predict(X_data)
        vae_reconstructed = self.decoder.predict(z)
        
        # Compute combined reconstruction error
        ae_mse = np.mean(np.square(X_data - ae_reconstructed), axis=1)
        vae_mse = np.mean(np.square(X_data - vae_reconstructed), axis=1)
        combined_mse = 0.4 * ae_mse + 0.6 * vae_mse
        
        # Use KDE to get anomaly scores if available
        if self.kde is not None:
            log_dens = self.kde.score_samples(combined_mse.reshape(-1, 1))
            anomaly_scores = -log_dens
        else:
            # Fallback to direct MSE if KDE failed
            anomaly_scores = combined_mse
        
        # Normalize scores to [0,1] range for easier interpretation
        min_score = np.min(anomaly_scores)
        max_score = np.max(anomaly_scores)
        normalized_scores = (anomaly_scores - min_score) / (max_score - min_score) if max_score > min_score else anomaly_scores
        
        # Identify anomalies
        is_anomaly = normalized_scores > self.threshold
        anomaly_indices = np.where(is_anomaly)[0]
        anomalies = X_data[anomaly_indices]
        
        # Calculate confidence (how far above threshold)
        confidence = np.zeros_like(normalized_scores)
        confidence[is_anomaly] = (normalized_scores[is_anomaly] - self.threshold) / (1 - self.threshold)
        confidence = np.clip(confidence, 0, 1)
        
        return anomalies, anomaly_indices, normalized_scores, confidence
    
    def get_encoded_features(self, X_data):
        """Extract features from the encoder's latent space"""
        _, _, z = self.encoder.predict(X_data)
        return z
    
    def save_model(self, base_path='models'):
        """Save the model and artifacts"""
        timestamp = time.strftime("%Y%m%d-%H%M%S")
        
        # Save VAE components
        self.encoder.save(f'{base_path}/layer1_encoder_{timestamp}.h5')
        self.decoder.save(f'{base_path}/layer1_decoder_{timestamp}.h5')
        
        # Save AE components
        self.ae_encoder.save(f'{base_path}/layer1_ae_encoder_{timestamp}.h5')
        self.ae_decoder.save(f'{base_path}/layer1_ae_decoder_{timestamp}.h5')
        
        # Create symlinks to latest models
        for model_type in ['encoder', 'decoder', 'ae_encoder', 'ae_decoder']:
            latest_link = f'{base_path}/layer1_{model_type}.h5'
            if os.path.exists(latest_link):
                os.remove(latest_link)
            os.symlink(f'layer1_{model_type}_{timestamp}.h5', latest_link)
        
        # Save threshold and metadata
        model_config = {
            'input_dim': self.input_dim,
            'latent_dim': self.latent_dim,
            'layer_sizes': self.layer_sizes,
            'threshold': float(self.threshold),
            'fallback_threshold': float(self.fallback_threshold),
            'beta': self.beta,
            'timestamp': timestamp
        }
        
        with open(f'{base_path}/layer1_config_{timestamp}.json', 'w') as f:
            json.dump(model_config, f, indent=4)
        
        # Save the KDE model if available
        if self.kde is not None:
            joblib.dump(self.kde, f'{base_path}/layer1_kde_{timestamp}.pkl')
            joblib.dump(self.kde, f'{base_path}/layer1_kde.pkl')
        
        return timestamp

def find_optimal_latent_dim(X_train, X_val, input_dim, min_dim=3, max_dim=12):
    """Find optimal latent dimension using cross-validation"""
    logger.info("Finding optimal latent dimension...")
    
    # Define candidate dimensions to test
    if input_dim <= 10:
        candidate_dims = list(range(min_dim, min(max_dim, input_dim) + 1))
    else:
        step = 1 if max_dim - min_dim <= 10 else 2
        candidate_dims = list(range(min_dim, min(max_dim, input_dim // 2) + 1, step))
    
    # Remove duplicates and sort
    candidate_dims = sorted(list(set(candidate_dims)))
    
    results = []
    
    for latent_dim in candidate_dims:
        # Create a smaller model for quick evaluation
        model = Layer1AutoencoderVAE(
            input_dim=input_dim, 
            latent_dim=latent_dim,
            layer_sizes=[32, 16],  # Smaller network for quick evaluation
            learning_rate=1e-3
        )
        
        # Train with fewer epochs for efficiency
        history = model.train(
            X_train, X_val, 
            epochs=30, 
            batch_size=64
        )
        
        # Get the best validation loss
        best_val_loss = min(history.history['val_loss'])
        logger.info(f"Latent dim {latent_dim}: validation loss = {best_val_loss:.6f}")
        results.append((latent_dim, best_val_loss))
    
    # Find dimension with lowest loss
    results.sort(key=lambda x: x[1])
    best_dim = results[0][0]
    
    # Visualize dimension search
    dims, losses = zip(*results)
    plt.figure(figsize=(10, 6))
    plt.plot(dims, losses, 'o-')
    plt.axvline(x=best_dim, color='red', linestyle='--')
    plt.title(f'Latent Dimension Optimization (Best: {best_dim})')
    plt.xlabel('Latent Dimension')
    plt.ylabel('Validation Loss')
    plt.grid(True)
    plt.savefig('plots/latent_dim_optimization.png')
    plt.close()
    
    logger.info(f"Optimal latent dimension: {best_dim}")
    return best_dim

def generate_evaluation_plots(model, X_normal, X_test=None, y_test=None, save_dir='plots'):
    """Generate evaluation plots for the model"""
    # Get reconstructions from both AE and VAE models
    ae_bottleneck = model.ae_encoder.predict(X_normal)
    ae_reconstructed = model.ae_decoder.predict(ae_bottleneck)
    
    _, _, z_normal = model.encoder.predict(X_normal)
    vae_reconstructed = model.decoder.predict(z_normal)
    
    # Compute combined reconstruction error
    ae_mse = np.mean(np.square(X_normal - ae_reconstructed), axis=1)
    vae_mse = np.mean(np.square(X_normal - vae_reconstructed), axis=1)
    combined_mse = 0.4 * ae_mse + 0.6 * vae_mse
    
    # Plot reconstruction error distribution
    plt.figure(figsize=(10, 6))
    sns.histplot(combined_mse, kde=True, color='blue', label='Normal')
    
    # If test data with labels is available
    if X_test is not None and y_test is not None:
        ae_bottleneck_test = model.ae_encoder.predict(X_test)
        ae_reconstructed_test = model.ae_decoder.predict(ae_bottleneck_test)
        
        _, _, z_test = model.encoder.predict(X_test)
        vae_reconstructed_test = model.decoder.predict(z_test)
        
        # Compute combined reconstruction error for test set
        ae_mse_test = np.mean(np.square(X_test - ae_reconstructed_test), axis=1)
        vae_mse_test = np.mean(np.square(X_test - vae_reconstructed_test), axis=1)
        combined_mse_test = 0.4 * ae_mse_test + 0.6 * vae_mse_test
        
        # Separate normal and anomaly in test set
        if np.sum(y_test) > 0:  # If we have anomalies
            anomaly_mse = combined_mse_test[y_test == 1]
            sns.histplot(anomaly_mse, kde=True, color='red', alpha=0.6, label='Anomaly')
    
    plt.axvline(x=model.threshold, color='green', linestyle='--', 
                label=f'Threshold: {model.threshold:.6f}')
    plt.title('Reconstruction Error Distribution')
    plt.xlabel('Mean Squared Error')
    plt.ylabel('Frequency')
    plt.legend()
    plt.savefig(f'{save_dir}/reconstruction_error_dist.png')
    plt.close()
    
    # If test data with labels is available, generate ROC and PR curves
    if X_test is not None and y_test is not None and np.sum(y_test) > 0:
        # Get anomaly scores for test data
        _, _, anomaly_scores, _ = model.detect_anomalies(X_test)
        
        # ROC curve
        fpr, tpr, _ = roc_curve(y_test, anomaly_scores)
        roc_auc = auc(fpr, tpr)
        
        plt.figure(figsize=(10, 6))
        plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic')
        plt.legend(loc="lower right")
        plt.savefig(f'{save_dir}/roc_curve.png')
        plt.close()
        
        # Precision-Recall curve
        precision, recall, _ = precision_recall_curve(y_test, anomaly_scores)
        pr_auc = auc(recall, precision)
        
        plt.figure(figsize=(10, 6))
        plt.plot(recall, precision, color='blue', lw=2, label=f'PR curve (area = {pr_auc:.2f})')
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('Precision-Recall Curve')
        plt.legend(loc="lower left")
        plt.savefig(f'{save_dir}/pr_curve.png')
        plt.close()
        
        # Latent space visualization (2D projection if latent_dim > 2)
        _, _, z = model.encoder.predict(X_test)
        
        if model.latent_dim >= 2:
            plt.figure(figsize=(10, 8))
            if np.sum(y_test == 0) > 0:
                plt.scatter(z[y_test == 0, 0], z[y_test == 0, 1], c='blue', alpha=0.5, label='Normal')
            if np.sum(y_test == 1) > 0:
                plt.scatter(z[y_test == 1, 0], z[y_test == 1, 1], c='red', alpha=0.5, label='Anomaly')
            plt.title('Latent Space Visualization (First 2 Dimensions)')
            plt.xlabel('Latent Dim 1')
            plt.ylabel('Latent Dim 2')
            plt.legend()
            plt.grid(True)
            plt.savefig(f'{save_dir}/latent_space.png')
            plt.close()

            # If latent dimension > 2, create a 3D plot as well
            if model.latent_dim >= 3:
                fig = plt.figure(figsize=(12, 10))
                ax = fig.add_subplot(111, projection='3d')
                if np.sum(y_test == 0) > 0:
                    ax.scatter(z[y_test == 0, 0], z[y_test == 0, 1], z[y_test == 0, 2], 
                            c='blue', alpha=0.5, label='Normal')
                if np.sum(y_test == 1) > 0:
                    ax.scatter(z[y_test == 1, 0], z[y_test == 1, 1], z[y_test == 1, 2], 
                            c='red', alpha=0.5, label='Anomaly')
                ax.set_title('Latent Space Visualization (First 3 Dimensions)')
                ax.set_xlabel('Latent Dim 1')
                ax.set_ylabel('Latent Dim 2')
                ax.set_zlabel('Latent Dim 3')
                ax.legend()
                plt.savefig(f'{save_dir}/latent_space_3d.png')
                plt.close()
        
                # Return metrics for reporting
                return {
                    'roc_auc': roc_auc,
                    'pr_auc': pr_auc
                }

def main():
    """Main execution function"""
    # Load and prepare data 
    logger.info("Loading and preparing data...")
    
    # TODO: Replace with your actual data loading logic
    try:
        df = pd.read_csv('your_dataset.csv')
        features = df.drop(['label', 'id'], axis=1, errors='ignore')
        
        # If labels are available
        if 'label' in df.columns:
            labels = df['label'].values
            normal_data = features[labels == 0].values
            anomaly_data = features[labels == 1].values if np.sum(labels == 1) > 0 else None
        else:
            # Assume all training data is normal for unsupervised learning
            normal_data = features.values
            anomaly_data = None
            labels = None
    except Exception as e:
        logger.error(f"Error loading data: {str(e)}")
        logger.info("Using synthetic data for demonstration...")
        
        # Generate synthetic data for demonstration
        n_features = 20
        n_normal = 1000
        n_anomalies = 50
        
        # Generate normal samples with a specific distribution
        normal_data = np.random.normal(0, 1, size=(n_normal, n_features))
        
        # Generate anomalies with a different distribution
        anomaly_data = np.random.normal(3, 2, size=(n_anomalies, n_features))
        
        # Create labels
        labels = np.zeros(n_normal + n_anomalies)
        labels[n_normal:] = 1
        
        # Combine data
        all_data = np.vstack([normal_data, anomaly_data])
    
    # Scale data
    scaler = MaxAbsScaler()
    if anomaly_data is not None and labels is not None:
        # Scale using only normal data to avoid anomaly influence
        normal_data_scaled = scaler.fit_transform(normal_data)
        all_data_scaled = scaler.transform(all_data)
    else:
        normal_data_scaled = scaler.fit_transform(normal_data)
        all_data_scaled = normal_data_scaled
        
    # Split normal data for training and validation
    X_train, X_val = train_test_split(normal_data_scaled, test_size=0.2, random_state=42)
    
    # Get dimensions
    input_dim = X_train.shape[1]
    
    # Find optimal latent dimension
    optimal_dim = find_optimal_latent_dim(X_train, X_val, input_dim)
    
    # Find optimal layer sizes based on input dimension
    if input_dim <= 10:
        layer_sizes = [32, 16]
    elif input_dim <= 50:
        layer_sizes = [64, 32, 16]
    else:
        layer_sizes = [128, 64, 32]
    
    # Create and train the model with optimal parameters
    logger.info(f"Creating model with latent_dim={optimal_dim}, layers={layer_sizes}")
    model = Layer1AutoencoderVAE(
        input_dim=input_dim,
        latent_dim=optimal_dim,
        layer_sizes=layer_sizes,
        beta=0.8  # KL weight factor
    )
    
    # Perform cross-validation to ensure robust performance
    n_splits = 5
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    # Initialize metrics tracking
    val_losses = []
    train_times = []
    
    # Main training with cross-validation
    logger.info(f"Starting {n_splits}-fold cross-validation...")
    
    fold = 1
    for train_idx, val_idx in kf.split(normal_data_scaled):
        logger.info(f"Training fold {fold}/{n_splits}")
        X_train_fold, X_val_fold = normal_data_scaled[train_idx], normal_data_scaled[val_idx]
        
        # Track training time
        start_time = time.time()
        
        # Train the model
        history = model.train(
            X_train_fold, 
            X_val_fold,
            epochs=150,
            batch_size=32
        )
        
        # Record metrics
        train_time = time.time() - start_time
        best_val_loss = min(history.history['val_loss'])
        
        train_times.append(train_time)
        val_losses.append(best_val_loss)
        
        logger.info(f"Fold {fold} - Training time: {train_time:.2f}s, Best val loss: {best_val_loss:.6f}")
        fold += 1
        
        # Only do one fold for quick testing if needed
        # break  # Uncomment to only do one fold
    
    # Average metrics
    avg_val_loss = np.mean(val_losses)
    avg_train_time = np.mean(train_times)
    
    logger.info(f"Cross-validation complete - Avg val loss: {avg_val_loss:.6f}, Avg train time: {avg_train_time:.2f}s")
    
    # Final training on all normal data
    logger.info("Training final model on all normal data...")
    history = model.train(
        normal_data_scaled,
        normal_data_scaled[:100],  # Small validation set for early stopping
        epochs=200,
        batch_size=32
    )
    
    # Save the model
    timestamp = model.save_model()
    
    # Generate evaluation plots
    if anomaly_data is not None and labels is not None:
        logger.info("Generating evaluation plots with labeled test data...")
        metrics = generate_evaluation_plots(model, normal_data_scaled, all_data_scaled, labels)
        
        # Log performance metrics
        logger.info(f"ROC AUC: {metrics['roc_auc']:.4f}")
        logger.info(f"PR AUC: {metrics['pr_auc']:.4f}")
        
        # Save metrics to report
        with open(f'reports/layer1_metrics_{timestamp}.json', 'w') as f:
            json.dump({
                'roc_auc': float(metrics['roc_auc']),
                'pr_auc': float(metrics['pr_auc']),
                'avg_val_loss': float(avg_val_loss),
                'avg_train_time': float(avg_train_time),
                'input_dim': int(input_dim),
                'latent_dim': int(optimal_dim),
                'layer_sizes': layer_sizes,
                'timestamp': timestamp
            }, f, indent=4)
    else:
        logger.info("Generating basic evaluation plots...")
        generate_evaluation_plots(model, normal_data_scaled)
    
    logger.info("Layer 1 VAE model training and evaluation complete.")
    return model, timestamp

if __name__ == "__main__":
    main()

2025-03-22 12:51:00,647 - INFO - Loading and preparing data...
2025-03-22 12:51:00,649 - ERROR - Error loading data: [Errno 2] No such file or directory: 'your_dataset.csv'
2025-03-22 12:51:00,649 - INFO - Using synthetic data for demonstration...
2025-03-22 12:51:00,663 - INFO - Finding optimal latent dimension...
2025-03-22 12:51:00,783 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - ae_loss: 6.3908 - kl_loss: 0.2055 - loss: 13.5509 - reconstruction_loss: 6.9956 - val_ae_loss: 5.6228 - val_kl_loss: 0.0681 - val_loss: 12.1632 - val_reconstruction_loss: 6.4859 - learning_rate: 0.0010
Epoch 2/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 5.4064 - kl_loss: 0.0575 - loss: 11.9462 - reconstruction_loss: 6.4938 - val_ae_loss: 4.2152 - val_kl_loss: 0.0286 - val_loss: 10.2248 - val_reconstruction_loss: 5.9867 - learning_rate: 0.0010
Epoch 3/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 3.9000 - kl_loss: 0.0272 - loss: 9.9422 - reconstruction_loss: 6.0204 - val_ae_loss: 2.7146 - val_kl_loss: 0.0205 - val_loss: 8.2030 - val_reconstruction_loss: 5.4720 - learning_rate: 0.0010
Epoch 4/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 2.5532 - kl_loss: 0.0212 - loss: 7.9403

2025-03-22 12:51:09,192 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 616us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 602us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 629us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 607us/step


2025-03-22 12:51:09,825 - INFO - Dynamic threshold: 0.604404 (fallback: 0.170850)
2025-03-22 12:51:10,039 - INFO - Latent dim 3: validation loss = 3.407724
2025-03-22 12:51:10,112 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - ae_loss: 6.5959 - kl_loss: 0.1359 - loss: 13.1339 - reconstruction_loss: 6.4293 - val_ae_loss: 6.2265 - val_kl_loss: 0.0783 - val_loss: 12.1332 - val_reconstruction_loss: 5.8440 - learning_rate: 0.0010
Epoch 2/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 6.1934 - kl_loss: 0.0882 - loss: 12.0794 - reconstruction_loss: 5.8155 - val_ae_loss: 5.4515 - val_kl_loss: 0.0882 - val_loss: 10.6189 - val_reconstruction_loss: 5.0969 - learning_rate: 0.0010
Epoch 3/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - ae_loss: 5.2763 - kl_loss: 0.1099 - loss: 10.3423 - reconstruction_loss: 4.9780 - val_ae_loss: 4.2356 - val_kl_loss: 0.1601 - val_loss: 8.5677 - val_reconstruction_loss: 4.2040 - learning_rate: 0.0010
Epoch 4/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 3.9736 - kl_loss: 0.2033 - loss: 8.036

2025-03-22 12:51:17,474 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 614us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 633us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 717us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 657us/step


2025-03-22 12:51:18,110 - INFO - Dynamic threshold: 1.692072 (fallback: 0.168434)
2025-03-22 12:51:18,249 - INFO - Latent dim 4: validation loss = 3.374154
2025-03-22 12:51:18,308 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - ae_loss: 6.6879 - kl_loss: 0.0958 - loss: 13.1605 - reconstruction_loss: 6.3959 - val_ae_loss: 6.1958 - val_kl_loss: 0.0450 - val_loss: 12.0233 - val_reconstruction_loss: 5.7916 - learning_rate: 0.0010
Epoch 2/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 6.1058 - kl_loss: 0.0491 - loss: 11.8287 - reconstruction_loss: 5.6837 - val_ae_loss: 5.3781 - val_kl_loss: 0.0471 - val_loss: 10.5784 - val_reconstruction_loss: 5.1626 - learning_rate: 0.0010
Epoch 3/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 5.1560 - kl_loss: 0.0602 - loss: 10.1362 - reconstruction_loss: 4.9321 - val_ae_loss: 4.1366 - val_kl_loss: 0.0826 - val_loss: 8.3917 - val_reconstruction_loss: 4.1890 - learning_rate: 0.0010
Epoch 4/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - ae_loss: 3.8281 - kl_loss: 0.1078 - loss: 7.887

2025-03-22 12:51:26,049 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 678us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 611us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 679us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 634us/step


2025-03-22 12:51:26,546 - INFO - Dynamic threshold: 0.637989 (fallback: 0.170936)
2025-03-22 12:51:26,666 - INFO - Latent dim 5: validation loss = 3.418460
2025-03-22 12:51:26,712 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - ae_loss: 6.6942 - kl_loss: 0.1117 - loss: 13.7246 - reconstruction_loss: 6.9410 - val_ae_loss: 6.2935 - val_kl_loss: 0.0706 - val_loss: 12.6008 - val_reconstruction_loss: 6.2508 - learning_rate: 0.0010
Epoch 2/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - ae_loss: 6.3048 - kl_loss: 0.0672 - loss: 12.6404 - reconstruction_loss: 6.2818 - val_ae_loss: 5.6420 - val_kl_loss: 0.0538 - val_loss: 11.3207 - val_reconstruction_loss: 5.6356 - learning_rate: 0.0010
Epoch 3/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 5.5060 - kl_loss: 0.0523 - loss: 11.1333 - reconstruction_loss: 5.5855 - val_ae_loss: 4.4203 - val_kl_loss: 0.0454 - val_loss: 9.3908 - val_reconstruction_loss: 4.9342 - learning_rate: 0.0010
Epoch 4/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 4.1994 - kl_loss: 0.0446 - loss: 9.095

2025-03-22 12:51:34,411 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 689us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 639us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 650us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 618us/step


2025-03-22 12:51:34,911 - INFO - Dynamic threshold: 0.711018 (fallback: 0.168702)
2025-03-22 12:51:35,032 - INFO - Latent dim 6: validation loss = 3.404155
2025-03-22 12:51:35,080 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - ae_loss: 6.7202 - kl_loss: 0.0489 - loss: 13.5652 - reconstruction_loss: 6.8058 - val_ae_loss: 6.3266 - val_kl_loss: 0.0277 - val_loss: 12.6093 - val_reconstruction_loss: 6.2605 - learning_rate: 0.0010
Epoch 2/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - ae_loss: 6.3647 - kl_loss: 0.0208 - loss: 12.6922 - reconstruction_loss: 6.3109 - val_ae_loss: 5.7763 - val_kl_loss: 0.0190 - val_loss: 11.5375 - val_reconstruction_loss: 5.7460 - learning_rate: 0.0010
Epoch 3/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - ae_loss: 5.6744 - kl_loss: 0.0168 - loss: 11.3765 - reconstruction_loss: 5.6887 - val_ae_loss: 4.7927 - val_kl_loss: 0.0238 - val_loss: 9.8019 - val_reconstruction_loss: 4.9902 - learning_rate: 0.0010
Epoch 4/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - ae_loss: 4.5460 - kl_loss: 0.0244 - loss: 9.443

2025-03-22 12:51:42,871 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 766us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 672us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 646us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 641us/step


2025-03-22 12:51:43,376 - INFO - Dynamic threshold: 0.585683 (fallback: 0.171040)
2025-03-22 12:51:43,769 - INFO - Latent dim 7: validation loss = 3.417625
2025-03-22 12:51:43,830 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - ae_loss: 6.6698 - kl_loss: 0.2162 - loss: 13.6694 - reconstruction_loss: 6.8266 - val_ae_loss: 6.3010 - val_kl_loss: 0.0712 - val_loss: 12.5160 - val_reconstruction_loss: 6.1580 - learning_rate: 0.0010
Epoch 2/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - ae_loss: 6.3045 - kl_loss: 0.0696 - loss: 12.5034 - reconstruction_loss: 6.1431 - val_ae_loss: 5.7325 - val_kl_loss: 0.0390 - val_loss: 11.2736 - val_reconstruction_loss: 5.5099 - learning_rate: 0.0010
Epoch 3/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - ae_loss: 5.6959 - kl_loss: 0.0458 - loss: 11.1870 - reconstruction_loss: 5.4544 - val_ae_loss: 4.7304 - val_kl_loss: 0.0434 - val_loss: 9.5539 - val_reconstruction_loss: 4.7888 - learning_rate: 0.0010
Epoch 4/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - ae_loss: 4.5410 - kl_loss: 0.0560 - loss: 9.195

2025-03-22 12:51:51,441 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 680us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 614us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 642us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 646us/step


2025-03-22 12:51:51,967 - INFO - Dynamic threshold: 0.704048 (fallback: 0.169800)
2025-03-22 12:51:52,898 - INFO - Latent dim 8: validation loss = 3.361253
2025-03-22 12:51:52,951 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - ae_loss: 6.7201 - kl_loss: 0.1123 - loss: 13.9768 - reconstruction_loss: 7.1669 - val_ae_loss: 6.3902 - val_kl_loss: 0.0729 - val_loss: 12.9524 - val_reconstruction_loss: 6.5038 - learning_rate: 0.0010
Epoch 2/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 6.4572 - kl_loss: 0.0592 - loss: 12.9775 - reconstruction_loss: 6.4728 - val_ae_loss: 5.9768 - val_kl_loss: 0.0450 - val_loss: 11.8850 - val_reconstruction_loss: 5.8721 - learning_rate: 0.0010
Epoch 3/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 5.9567 - kl_loss: 0.0382 - loss: 11.8200 - reconstruction_loss: 5.8328 - val_ae_loss: 5.2171 - val_kl_loss: 0.0289 - val_loss: 10.4678 - val_reconstruction_loss: 5.2275 - learning_rate: 0.0010
Epoch 4/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 5.0524 - kl_loss: 0.0263 - loss: 10.1

2025-03-22 12:52:00,466 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 653us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 616us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 650us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 626us/step


2025-03-22 12:52:00,952 - INFO - Dynamic threshold: 0.727145 (fallback: 0.171343)
2025-03-22 12:52:01,069 - INFO - Latent dim 9: validation loss = 3.425392
2025-03-22 12:52:01,115 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - ae_loss: 6.6303 - kl_loss: 0.2404 - loss: 13.6839 - reconstruction_loss: 6.8612 - val_ae_loss: 6.1826 - val_kl_loss: 0.1125 - val_loss: 12.4777 - val_reconstruction_loss: 6.2052 - learning_rate: 0.0010
Epoch 2/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - ae_loss: 6.1343 - kl_loss: 0.0973 - loss: 12.2909 - reconstruction_loss: 6.0788 - val_ae_loss: 5.3296 - val_kl_loss: 0.0737 - val_loss: 10.6998 - val_reconstruction_loss: 5.3113 - learning_rate: 0.0010
Epoch 3/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - ae_loss: 5.0993 - kl_loss: 0.0689 - loss: 10.3998 - reconstruction_loss: 5.2454 - val_ae_loss: 3.9037 - val_kl_loss: 0.0683 - val_loss: 8.5616 - val_reconstruction_loss: 4.6033 - learning_rate: 0.0010
Epoch 4/30
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - ae_loss: 3.6431 - kl_loss: 0.0681 - loss: 8.130

2025-03-22 12:52:08,530 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 707us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 603us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 614us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 819us/step


2025-03-22 12:52:09,049 - INFO - Dynamic threshold: 0.649912 (fallback: 0.168491)
2025-03-22 12:52:09,167 - INFO - Latent dim 10: validation loss = 3.410115
2025-03-22 12:52:09,224 - INFO - Optimal latent dimension: 8
2025-03-22 12:52:09,224 - INFO - Creating model with latent_dim=8, layers=[64, 32, 16]
2025-03-22 12:52:09,299 - INFO - Starting 5-fold cross-validation...
2025-03-22 12:52:09,302 - INFO - Training fold 1/5
2025-03-22 12:52:09,303 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - ae_loss: 6.7306 - kl_loss: 0.1156 - loss: 13.4252 - reconstruction_loss: 6.6021 - val_ae_loss: 6.6192 - val_kl_loss: 0.0739 - val_loss: 12.9999 - val_reconstruction_loss: 6.3216 - learning_rate: 1.0000e-04
Epoch 2/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - ae_loss: 6.6507 - kl_loss: 0.0661 - loss: 13.0768 - reconstruction_loss: 6.3733 - val_ae_loss: 6.5222 - val_kl_loss: 0.0461 - val_loss: 12.6037 - val_reconstruction_loss: 6.0446 - learning_rate: 1.0000e-04
Epoch 3/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - ae_loss: 6.5394 - kl_loss: 0.0468 - loss: 12.6800 - reconstruction_loss: 6.1031 - val_ae_loss: 6.3721 - val_kl_loss: 0.0320 - val_loss: 12.1992 - val_reconstruction_loss: 5.8015 - learning_rate: 1.0000e-04
Epoch 4/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - ae_loss: 6.3347 - kl_loss: 0.0313

2025-03-22 12:52:48,268 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 774us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 628us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 649us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 669us/step


2025-03-22 12:52:48,812 - INFO - Dynamic threshold: 1.029132 (fallback: 0.165687)
2025-03-22 12:52:48,933 - INFO - Fold 1 - Training time: 39.63s, Best val loss: 3.466235
2025-03-22 12:52:48,933 - INFO - Training fold 2/5
2025-03-22 12:52:48,934 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - ae_loss: 1.6669 - kl_loss: 1.7747e-05 - loss: 3.4312 - reconstruction_loss: 1.7643 - val_ae_loss: 1.6164 - val_kl_loss: 6.3498e-06 - val_loss: 3.3648 - val_reconstruction_loss: 1.7484 - learning_rate: 1.0000e-04
Epoch 2/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - ae_loss: 1.6242 - kl_loss: 1.1376e-05 - loss: 3.3560 - reconstruction_loss: 1.7319 - val_ae_loss: 1.6146 - val_kl_loss: 5.9724e-06 - val_loss: 3.3632 - val_reconstruction_loss: 1.7486 - learning_rate: 1.0000e-04
Epoch 3/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - ae_loss: 1.6875 - kl_loss: 9.2915e-06 - loss: 3.4888 - reconstruction_loss: 1.8013 - val_ae_loss: 1.6137 - val_kl_loss: 5.6136e-06 - val_loss: 3.3615 - val_reconstruction_loss: 1.7478 - learning_rate: 1.0000e-04
Epoch 4/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - ae_loss: 1.6645

2025-03-22 12:53:24,528 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 789us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 749us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 647us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 598us/step


2025-03-22 12:53:24,918 - INFO - Dynamic threshold: 0.665232 (fallback: 0.159999)
2025-03-22 12:53:25,045 - INFO - Fold 2 - Training time: 36.11s, Best val loss: 3.255355
2025-03-22 12:53:25,045 - INFO - Training fold 3/5
2025-03-22 12:53:25,046 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - ae_loss: 1.4816 - kl_loss: 5.2205e-07 - loss: 3.2187 - reconstruction_loss: 1.7371 - val_ae_loss: 1.5703 - val_kl_loss: 1.7602e-07 - val_loss: 3.3773 - val_reconstruction_loss: 1.8071 - learning_rate: 6.2500e-06
Epoch 2/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - ae_loss: 1.5040 - kl_loss: 5.6350e-07 - loss: 3.2632 - reconstruction_loss: 1.7592 - val_ae_loss: 1.5702 - val_kl_loss: 1.7535e-07 - val_loss: 3.3767 - val_reconstruction_loss: 1.8065 - learning_rate: 6.2500e-06
Epoch 3/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - ae_loss: 1.4949 - kl_loss: 7.8014e-07 - loss: 3.2540 - reconstruction_loss: 1.7591 - val_ae_loss: 1.5702 - val_kl_loss: 1.7150e-07 - val_loss: 3.3779 - val_reconstruction_loss: 1.8077 - learning_rate: 6.2500e-06
Epoch 4/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - ae_loss: 1.5324

2025-03-22 12:53:29,110 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 697us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 644us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 645us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 615us/step


2025-03-22 12:53:29,492 - INFO - Dynamic threshold: 0.143155 (fallback: 0.162367)
2025-03-22 12:53:29,604 - INFO - Fold 3 - Training time: 4.56s, Best val loss: 3.376719
2025-03-22 12:53:29,605 - INFO - Training fold 4/5
2025-03-22 12:53:29,605 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - ae_loss: 1.5155 - kl_loss: 6.2787e-07 - loss: 3.2858 - reconstruction_loss: 1.7703 - val_ae_loss: 1.5134 - val_kl_loss: 1.1575e-07 - val_loss: 3.2714 - val_reconstruction_loss: 1.7581 - learning_rate: 1.5625e-06
Epoch 2/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - ae_loss: 1.5179 - kl_loss: 5.8494e-07 - loss: 3.2758 - reconstruction_loss: 1.7579 - val_ae_loss: 1.5134 - val_kl_loss: 1.1415e-07 - val_loss: 3.2715 - val_reconstruction_loss: 1.7581 - learning_rate: 1.5625e-06
Epoch 3/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - ae_loss: 1.5194 - kl_loss: 1.0499e-06 - loss: 3.2824 - reconstruction_loss: 1.7631 - val_ae_loss: 1.5135 - val_kl_loss: 1.1322e-07 - val_loss: 3.2708 - val_reconstruction_loss: 1.7573 - learning_rate: 1.5625e-06
Epoch 4/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - ae_loss: 1.5

2025-03-22 12:53:42,838 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 751us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 688us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 646us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 630us/step


2025-03-22 12:53:43,269 - INFO - Dynamic threshold: 0.144352 (fallback: 0.162052)
2025-03-22 12:53:43,397 - INFO - Fold 4 - Training time: 13.79s, Best val loss: 3.270205
2025-03-22 12:53:43,397 - INFO - Training fold 5/5
2025-03-22 12:53:43,398 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - ae_loss: 1.4951 - kl_loss: 4.3927e-07 - loss: 3.2382 - reconstruction_loss: 1.7430 - val_ae_loss: 1.5670 - val_kl_loss: 1.4422e-07 - val_loss: 3.3614 - val_reconstruction_loss: 1.7944 - learning_rate: 1.0000e-07
Epoch 2/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - ae_loss: 1.5061 - kl_loss: 3.9915e-07 - loss: 3.2688 - reconstruction_loss: 1.7627 - val_ae_loss: 1.5670 - val_kl_loss: 1.4422e-07 - val_loss: 3.3617 - val_reconstruction_loss: 1.7947 - learning_rate: 1.0000e-07
Epoch 3/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - ae_loss: 1.4858 - kl_loss: 3.6313e-07 - loss: 3.2202 - reconstruction_loss: 1.7344 - val_ae_loss: 1.5670 - val_kl_loss: 1.4422e-07 - val_loss: 3.3631 - val_reconstruction_loss: 1.7961 - learning_rate: 1.0000e-07
Epoch 4/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - ae_loss: 1.5039

2025-03-22 12:53:50,507 - INFO - Hybrid Autoencoder-VAE training completed.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 608us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 589us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 609us/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 588us/step


2025-03-22 12:53:50,881 - INFO - Dynamic threshold: 0.709434 (fallback: 0.159233)
2025-03-22 12:53:50,994 - INFO - Fold 5 - Training time: 7.60s, Best val loss: 3.360989
2025-03-22 12:53:50,994 - INFO - Cross-validation complete - Avg val loss: 3.345901, Avg train time: 20.34s
2025-03-22 12:53:50,994 - INFO - Training final model on all normal data...
2025-03-22 12:53:50,994 - INFO - Starting hybrid Autoencoder-VAE training...


Epoch 1/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - ae_loss: 1.5441 - kl_loss: 6.1592e-07 - loss: 3.3448 - reconstruction_loss: 1.8007 - val_ae_loss: 1.5376 - val_kl_loss: 3.3062e-08 - val_loss: 3.2444 - val_reconstruction_loss: 1.7068 - learning_rate: 1.0000e-07
Epoch 2/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - ae_loss: 1.5046 - kl_loss: 5.3522e-07 - loss: 3.2527 - reconstruction_loss: 1.7482 - val_ae_loss: 1.5376 - val_kl_loss: 3.3062e-08 - val_loss: 3.2437 - val_reconstruction_loss: 1.7060 - learning_rate: 1.0000e-07
Epoch 3/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - ae_loss: 1.5118 - kl_loss: 3.6836e-07 - loss: 3.2843 - reconstruction_loss: 1.7725 - val_ae_loss: 1.5376 - val_kl_loss: 3.3062e-08 - val_loss: 3.2428 - val_reconstruction_loss: 1.7051 - learning_rate: 1.0000e-07
Epoch 4/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - ae_loss: 1.5193 

2025-03-22 12:53:59,515 - INFO - Hybrid Autoencoder-VAE training completed.


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


2025-03-22 12:54:00,265 - INFO - Dynamic threshold: 0.143639 (fallback: 0.161913)
2025-03-22 12:54:00,475 - INFO - Generating evaluation plots with labeled test data...


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 684us/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 654us/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 602us/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 571us/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 620us/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 591us/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 648us/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 576us/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 597us/step
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 626us/step


2025-03-22 12:54:02,037 - INFO - ROC AUC: 1.0000
2025-03-22 12:54:02,038 - INFO - PR AUC: 1.0000
2025-03-22 12:54:02,039 - INFO - Layer 1 VAE model training and evaluation complete.
