<a href="https://colab.research.google.com/github/supriyag123/PHD_Pub/blob/main/MLP%20Troubleshooting%20Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Troubleshooting MLP Agent - Enhanced Diagnostics and Fixes
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import math
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.regularizers import l1_l2
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

class TroubleshootingMLPAgent:
    """
    MLP Agent with comprehensive troubleshooting and diagnostics
    """

    def __init__(self, output_dir='/content/drive/MyDrive/PHD/2025/TEMP_OUTPUT_METROPM/'):
        self.output_dir = output_dir
        self.model = None
        self.transformer = StandardScaler()
        self.is_fitted = False

        # Diagnostic variables
        self.data_issues = []
        self.training_issues = []

        # Create directories
        self.mlp_dir = f"{output_dir}MLP_Models/"
        os.makedirs(self.mlp_dir, exist_ok=True)

    def comprehensive_data_diagnosis(self, x, y):
        """
        Comprehensive data quality diagnosis
        """
        print("="*80)
        print("🔍 COMPREHENSIVE DATA DIAGNOSIS")
        print("="*80)

        issues = []

        # Basic info
        print(f"📊 Basic Data Info:")
        print(f"   Features shape: {x.shape}")
        print(f"   Target shape: {y.shape}")
        print(f"   Memory usage: {(x.nbytes + y.nbytes) / 1024**2:.1f} MB")

        # 1. Check for NaN/Inf values
        print(f"\n🔍 Missing/Invalid Values:")
        x_nan = np.isnan(x).sum()
        y_nan = np.isnan(y).sum()
        x_inf = np.isinf(x).sum()
        y_inf = np.isinf(y).sum()

        print(f"   X NaN values: {x_nan}")
        print(f"   y NaN values: {y_nan}")
        print(f"   X Inf values: {x_inf}")
        print(f"   y Inf values: {y_inf}")

        if x_nan > 0 or y_nan > 0 or x_inf > 0 or y_inf > 0:
            issues.append("Missing or infinite values detected")

        # 2. Feature analysis
        print(f"\n📈 Feature Analysis:")
        feature_means = np.mean(x, axis=0)
        feature_stds = np.std(x, axis=0)
        feature_ranges = np.max(x, axis=0) - np.min(x, axis=0)

        print(f"   Feature means range: [{np.min(feature_means):.4f}, {np.max(feature_means):.4f}]")
        print(f"   Feature stds range: [{np.min(feature_stds):.4f}, {np.max(feature_stds):.4f}]")

        # Check for constant features
        constant_features = np.sum(feature_stds < 1e-8)
        print(f"   Constant features: {constant_features}/{x.shape[1]}")
        if constant_features > 0:
            issues.append(f"{constant_features} constant features found")

        # Check if features are already scaled
        feature_scale_check = np.abs(feature_means) < 0.1 and np.abs(feature_stds - 1.0) < 0.1
        print(f"   Features appear scaled: {np.all(feature_scale_check)}")

        # 3. Target analysis
        print(f"\n🎯 Target Analysis:")
        print(f"   Target mean: {np.mean(y):.4f}")
        print(f"   Target std: {np.std(y):.4f}")
        print(f"   Target range: [{np.min(y):.1f}, {np.max(y):.1f}]")
        print(f"   Unique target values: {len(np.unique(y))}")

        # Check target distribution
        skewness = stats.skew(y)
        kurtosis = stats.kurtosis(y)
        print(f"   Target skewness: {skewness:.4f}")
        print(f"   Target kurtosis: {kurtosis:.4f}")

        if abs(skewness) > 2:
            issues.append(f"Highly skewed target distribution (skew={skewness:.2f})")

        # 4. Feature-target correlations
        print(f"\n🔗 Feature-Target Relationships:")
        correlations = []
        for i in range(min(x.shape[1], 10)):  # Check first 10 features
            try:
                corr = np.corrcoef(x[:, i], y)[0, 1]
                if not np.isnan(corr):
                    correlations.append(abs(corr))
            except:
                correlations.append(0)

        if correlations:
            max_corr = max(correlations)
            mean_corr = np.mean(correlations)
            print(f"   Max |correlation|: {max_corr:.4f}")
            print(f"   Mean |correlation|: {mean_corr:.4f}")

            if max_corr < 0.05:
                issues.append("Extremely weak feature-target correlations")
            elif max_corr < 0.1:
                issues.append("Very weak feature-target correlations")

        # 5. Sample size analysis
        print(f"\n📏 Sample Size Analysis:")
        n_samples, n_features = x.shape
        samples_per_feature = n_samples / n_features
        print(f"   Samples per feature: {samples_per_feature:.1f}")

        if samples_per_feature < 10:
            issues.append("Very few samples per feature (overfitting risk)")
        elif samples_per_feature < 50:
            issues.append("Limited samples per feature")

        # 6. Data distribution visualization
        self.plot_data_distributions(x, y)

        # 7. Summary
        print(f"\n⚠️ Issues Found: {len(issues)}")
        for i, issue in enumerate(issues, 1):
            print(f"   {i}. {issue}")

        self.data_issues = issues

        # 8. Recommendations
        print(f"\n💡 Recommendations:")
        if "Missing or infinite values detected" in issues:
            print("   - Clean missing/infinite values before training")
        if any("correlation" in issue for issue in issues):
            print("   - Consider feature engineering or polynomial features")
            print("   - Try different model architectures (deeper/wider)")
            print("   - Consider ensemble methods")
        if "skewed" in str(issues):
            print("   - Consider log transformation of target")
        if "constant" in str(issues):
            print("   - Remove constant features")

        print("="*80)
        return issues

    def plot_data_distributions(self, x, y):
        """Plot data distributions for visual inspection"""
        plt.figure(figsize=(20, 12))

        # Target distribution
        plt.subplot(3, 4, 1)
        plt.hist(y, bins=50, alpha=0.7, color='skyblue', edgecolor='black')
        plt.title('Target Distribution')
        plt.xlabel('Target Value')
        plt.ylabel('Frequency')

        # Target boxplot
        plt.subplot(3, 4, 2)
        plt.boxplot(y)
        plt.title('Target Boxplot')
        plt.ylabel('Target Value')

        # Feature distributions (first 6 features)
        for i in range(min(6, x.shape[1])):
            plt.subplot(3, 4, i + 3)
            plt.hist(x[:, i], bins=30, alpha=0.7, color='lightcoral')
            plt.title(f'Feature {i+1}')
            plt.xlabel('Value')
            plt.ylabel('Frequency')

        # Feature-target scatter plots (first 4 features)
        for i in range(min(4, x.shape[1])):
            plt.subplot(3, 4, i + 9)
            # Sample for performance
            sample_size = min(5000, len(x))
            indices = np.random.choice(len(x), sample_size, replace=False)
            plt.scatter(x[indices, i], y[indices], alpha=0.5, s=1)
            corr = np.corrcoef(x[:, i], y)[0, 1]
            plt.title(f'Feature {i+1} vs Target\nr={corr:.3f}')
            plt.xlabel(f'Feature {i+1}')
            plt.ylabel('Target')

        plt.tight_layout()
        plt.show()

    def intelligent_preprocessing(self, x, y):
        """
        Intelligent preprocessing based on data characteristics
        """
        print("\n🧠 INTELLIGENT PREPROCESSING")
        print("="*50)

        x_processed = x.copy()
        y_processed = y.copy()
        preprocessing_steps = []

        # 1. Handle missing values
        if np.isnan(x_processed).any() or np.isnan(y_processed).any():
            print("🔧 Handling missing values...")
            x_processed = np.nan_to_num(x_processed, nan=np.nanmean(x_processed))
            y_processed = np.nan_to_num(y_processed, nan=np.nanmean(y_processed))
            preprocessing_steps.append("Missing values filled")

        # 2. Remove constant features
        feature_stds = np.std(x_processed, axis=0)
        non_constant_mask = feature_stds > 1e-8
        if not np.all(non_constant_mask):
            print(f"🔧 Removing {np.sum(~non_constant_mask)} constant features...")
            x_processed = x_processed[:, non_constant_mask]
            preprocessing_steps.append(f"Removed {np.sum(~non_constant_mask)} constant features")

        # 3. Handle extreme outliers in target
        y_q1, y_q3 = np.percentile(y_processed, [25, 75])
        y_iqr = y_q3 - y_q1
        y_lower = y_q1 - 3 * y_iqr
        y_upper = y_q3 + 3 * y_iqr

        extreme_outliers = (y_processed < y_lower) | (y_processed > y_upper)
        if np.sum(extreme_outliers) > 0:
            print(f"🔧 Clipping {np.sum(extreme_outliers)} extreme target outliers...")
            y_processed = np.clip(y_processed, y_lower, y_upper)
            preprocessing_steps.append(f"Clipped {np.sum(extreme_outliers)} extreme outliers")

        # 4. Target transformation if highly skewed
        skewness = stats.skew(y_processed)
        if abs(skewness) > 2:
            print(f"🔧 Target is highly skewed ({skewness:.2f}), applying log transformation...")
            if np.min(y_processed) > 0:
                y_processed = np.log1p(y_processed)
                preprocessing_steps.append("Log transformation applied to target")
            else:
                # Shift to positive values first
                y_processed = y_processed - np.min(y_processed) + 1
                y_processed = np.log1p(y_processed)
                preprocessing_steps.append("Shifted and log-transformed target")

        print(f"✅ Preprocessing complete: {len(preprocessing_steps)} steps")
        for step in preprocessing_steps:
            print(f"   - {step}")

        return x_processed, y_processed, preprocessing_steps

    def build_adaptive_mlp(self, input_dim, data_characteristics):
        """
        Build MLP architecture adapted to data characteristics
        """
        print(f"\n🏗️ Building adaptive MLP architecture...")

        # Determine architecture based on data
        n_samples = data_characteristics.get('n_samples', 10000)
        n_features = input_dim
        max_corr = data_characteristics.get('max_correlation', 0.1)

        # Base architecture decisions
        if max_corr < 0.05:
            # Very weak relationships - need more capacity
            architecture = 'deep_wide'
            dropout_rate = 0.3
            l1_reg = 0.001
            l2_reg = 0.01
        elif max_corr < 0.2:
            # Weak relationships - moderate capacity
            architecture = 'deep'
            dropout_rate = 0.2
            l1_reg = 0.0005
            l2_reg = 0.005
        else:
            # Strong relationships - standard architecture
            architecture = 'standard'
            dropout_rate = 0.1
            l1_reg = 0.0001
            l2_reg = 0.001

        print(f"   Selected architecture: {architecture}")
        print(f"   Dropout rate: {dropout_rate}")
        print(f"   L1/L2 regularization: {l1_reg}/{l2_reg}")

        model = Sequential()

        if architecture == 'deep_wide':
            # For very weak relationships
            model.add(Dense(512, input_dim=input_dim, activation='relu',
                          kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg)))
            model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

            model.add(Dense(256, activation='relu',
                          kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg)))
            model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

            model.add(Dense(128, activation='relu',
                          kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg)))
            model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

            model.add(Dense(64, activation='relu',
                          kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg)))
            model.add(BatchNormalization())
            model.add(Dropout(dropout_rate/2))

            model.add(Dense(32, activation='relu'))
            model.add(Dropout(dropout_rate/2))

        elif architecture == 'deep':
            # For weak relationships
            model.add(Dense(256, input_dim=input_dim, activation='relu',
                          kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg)))
            model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

            model.add(Dense(128, activation='relu',
                          kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg)))
            model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

            model.add(Dense(64, activation='relu',
                          kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg)))
            model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

            model.add(Dense(32, activation='relu'))
            model.add(Dropout(dropout_rate/2))

        else:
            # Standard architecture
            model.add(Dense(128, input_dim=input_dim, activation='relu',
                          kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg)))
            model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

            model.add(Dense(64, activation='relu',
                          kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg)))
            model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

            model.add(Dense(32, activation='relu'))
            model.add(Dropout(dropout_rate/2))

        # Output layer
        model.add(Dense(1, activation='linear'))

        # Adaptive optimizer settings
        if max_corr < 0.1:
            # Weak relationships - higher learning rate, more aggressive
            learning_rate = 0.002
            optimizer = keras.optimizers.Adam(
                learning_rate=learning_rate,
                clipnorm=1.0,
                beta_1=0.9,
                beta_2=0.999
            )
            loss = 'huber'  # More robust for weak relationships
        else:
            # Stronger relationships - standard settings
            learning_rate = 0.001
            optimizer = keras.optimizers.Adam(
                learning_rate=learning_rate,
                clipnorm=1.0
            )
            loss = 'mse'

        model.compile(
            loss=loss,
            optimizer=optimizer,
            metrics=['mae', 'mse']
        )

        print(f"   Total parameters: {model.count_params():,}")
        print(f"   Learning rate: {learning_rate}")
        print(f"   Loss function: {loss}")

        return model

    def train_with_advanced_callbacks(self, model, x_train, y_train, x_val, y_val,
                                    epochs=1000, batch_size=32):
        """
        Training with advanced callbacks and monitoring
        """
        print(f"\n🚀 Training with advanced monitoring...")

        # Advanced callbacks
        callbacks = [
            # Model checkpointing
            ModelCheckpoint(
                f"{self.mlp_dir}best_weights.h5",
                monitor='val_loss',
                save_best_only=True,
                save_weights_only=True,
                verbose=1
            ),

            # Early stopping with patience
            EarlyStopping(
                monitor='val_loss',
                patience=100,  # More patience for difficult problems
                verbose=1,
                min_delta=0.0001,
                restore_best_weights=True
            ),

            # Learning rate reduction
            ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=30,
                min_lr=1e-7,
                verbose=1
            ),
        ]

        # Train model
        history = model.fit(
            x_train, y_train,
            validation_data=(x_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=1
        )

        return history

    def comprehensive_evaluation(self, model, x_test, y_test, title="Model Evaluation"):
        """
        Comprehensive model evaluation with multiple metrics
        """
        print(f"\n📊 {title.upper()}")
        print("="*50)

        # Predictions
        y_pred_scaled = model.predict(x_test, verbose=0)
        y_pred = self.transformer.inverse_transform(y_pred_scaled).flatten()
        y_true = self.transformer.inverse_transform(y_test.reshape(-1, 1)).flatten()

        # Calculate comprehensive metrics
        r2 = r2_score(y_true, y_pred)
        mse = mean_squared_error(y_true, y_pred)
        mae = mean_absolute_error(y_true, y_pred)
        rmse = np.sqrt(mse)

        # Additional metrics
        mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-8))) * 100

        # Accuracy metrics for different tolerances
        tolerances = [0.5, 1.0, 1.5, 2.0]
        accuracies = {}
        for tol in tolerances:
            acc = np.mean(np.abs(y_true - y_pred) <= tol) * 100
            accuracies[tol] = acc

        # Print results
        print(f"R² Score: {r2:.6f}")
        print(f"MSE: {mse:.4f}")
        print(f"RMSE: {rmse:.4f}")
        print(f"MAE: {mae:.4f}")
        print(f"MAPE: {mape:.2f}%")
        print("Accuracy within tolerance:")
        for tol, acc in accuracies.items():
            print(f"  ±{tol}: {acc:.1f}%")

        # Visual evaluation
        self.plot_comprehensive_results(y_true, y_pred, title)

        return {
            'r2': r2, 'mse': mse, 'rmse': rmse, 'mae': mae, 'mape': mape,
            'accuracies': accuracies, 'y_true': y_true, 'y_pred': y_pred
        }

    def plot_comprehensive_results(self, y_true, y_pred, title):
        """Comprehensive results visualization"""
        plt.figure(figsize=(20, 15))

        # 1. Scatter plot
        plt.subplot(3, 4, 1)
        plt.scatter(y_true, y_pred, alpha=0.5, s=1)
        plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--', lw=2)
        plt.xlabel('True Values')
        plt.ylabel('Predictions')
        plt.title(f'{title} - Scatter Plot')
        plt.grid(True, alpha=0.3)

        # 2. Residuals vs Predictions
        plt.subplot(3, 4, 2)
        residuals = y_true - y_pred
        plt.scatter(y_pred, residuals, alpha=0.5, s=1)
        plt.axhline(y=0, color='r', linestyle='--')
        plt.xlabel('Predictions')
        plt.ylabel('Residuals')
        plt.title('Residuals vs Predictions')
        plt.grid(True, alpha=0.3)

        # 3. Residual distribution
        plt.subplot(3, 4, 3)
        plt.hist(residuals, bins=50, alpha=0.7, density=True)
        plt.xlabel('Residuals')
        plt.ylabel('Density')
        plt.title('Residual Distribution')
        plt.grid(True, alpha=0.3)

        # 4. Q-Q plot for residuals
        plt.subplot(3, 4, 4)
        stats.probplot(residuals, dist="norm", plot=plt)
        plt.title('Q-Q Plot (Residuals)')
        plt.grid(True, alpha=0.3)

        # 5. Time series comparison (sample)
        plt.subplot(3, 4, 5)
        sample_indices = np.random.choice(len(y_true), min(200, len(y_true)), replace=False)
        sample_indices = np.sort(sample_indices)
        plt.plot(y_true[sample_indices], 'b-', label='True', alpha=0.7)
        plt.plot(y_pred[sample_indices], 'r--', label='Predicted', alpha=0.7)
        plt.xlabel('Sample Index')
        plt.ylabel('Value')
        plt.title('Sample Predictions')
        plt.legend()
        plt.grid(True, alpha=0.3)

        # 6. Absolute errors vs predictions
        plt.subplot(3, 4, 6)
        abs_errors = np.abs(residuals)
        plt.scatter(y_pred, abs_errors, alpha=0.5, s=1)
        plt.xlabel('Predictions')
        plt.ylabel('Absolute Error')
        plt.title('Absolute Error vs Prediction')
        plt.grid(True, alpha=0.3)

        # 7. Percentage errors
        plt.subplot(3, 4, 7)
        pct_errors = 100 * residuals / (y_true + 1e-8)
        plt.hist(pct_errors, bins=50, alpha=0.7)
        plt.xlabel('Percentage Error (%)')
        plt.ylabel('Frequency')
        plt.title('Percentage Error Distribution')
        plt.grid(True, alpha=0.3)

        # 8. Accuracy at different tolerances
        plt.subplot(3, 4, 8)
        tolerances = np.arange(0.1, 5.1, 0.1)
        accuracies = [np.mean(abs_errors <= tol) * 100 for tol in tolerances]
        plt.plot(tolerances, accuracies, 'b-', linewidth=2)
        plt.xlabel('Error Tolerance')
        plt.ylabel('Accuracy (%)')
        plt.title('Accuracy vs Tolerance')
        plt.grid(True, alpha=0.3)

        # 9. True vs Predicted (binned)
        plt.subplot(3, 4, 9)
        n_bins = 10
        true_bins = np.linspace(y_true.min(), y_true.max(), n_bins)
        bin_indices = np.digitize(y_true, true_bins)
        bin_means_true = [y_true[bin_indices == i].mean() for i in range(1, n_bins)]
        bin_means_pred = [y_pred[bin_indices == i].mean() for i in range(1, n_bins)]
        bin_means_true = [x for x in bin_means_true if not np.isnan(x)]
        bin_means_pred = [x for x in bin_means_pred if not np.isnan(x)]
        plt.plot(bin_means_true, bin_means_pred, 'bo-')
        plt.plot([min(bin_means_true), max(bin_means_true)],
                [min(bin_means_true), max(bin_means_true)], 'r--')
        plt.xlabel('True (Binned)')
        plt.ylabel('Predicted (Binned)')
        plt.title('Binned True vs Predicted')
        plt.grid(True, alpha=0.3)

        # 10. Error by true value range
        plt.subplot(3, 4, 10)
        sorted_indices = np.argsort(y_true)
        window_size = len(y_true) // 20
        windowed_errors = []
        windowed_trues = []
        for i in range(0, len(sorted_indices) - window_size, window_size):
            window_indices = sorted_indices[i:i+window_size]
            windowed_errors.append(np.mean(abs_errors[window_indices]))
            windowed_trues.append(np.mean(y_true[window_indices]))
        plt.plot(windowed_trues, windowed_errors, 'ro-')
        plt.xlabel('True Value (Windowed)')
        plt.ylabel('Mean Absolute Error')
        plt.title('Error vs True Value Range')
        plt.grid(True, alpha=0.3)

        # 11. Prediction confidence intervals
        plt.subplot(3, 4, 11)
        sorted_pred_indices = np.argsort(y_pred)
        n_points = min(100, len(y_pred))
        step = len(y_pred) // n_points
        sample_indices = sorted_pred_indices[::step]

        pred_sample = y_pred[sample_indices]
        true_sample = y_true[sample_indices]
        error_sample = abs_errors[sample_indices]

        plt.errorbar(pred_sample, true_sample, yerr=error_sample,
                    fmt='o', alpha=0.7, capsize=2)
        plt.plot([pred_sample.min(), pred_sample.max()],
                [pred_sample.min(), pred_sample.max()], 'r--')
        plt.xlabel('Predictions')
        plt.ylabel('True Values')
        plt.title('Predictions with Error Bars')
        plt.grid(True, alpha=0.3)

        # 12. Feature importance proxy (if possible)
        plt.subplot(3, 4, 12)
        plt.text(0.5, 0.5, f'R² = {r2_score(y_true, y_pred):.4f}\n'
                           f'RMSE = {np.sqrt(mean_squared_error(y_true, y_pred)):.4f}\n'
                           f'MAE = {mean_absolute_error(y_true, y_pred):.4f}',
                ha='center', va='center', transform=plt.gca().transAxes,
                fontsize=14, bbox=dict(boxstyle="round,pad=0.3", facecolor="lightblue"))
        plt.title('Summary Metrics')
        plt.axis('off')

        plt.tight_layout()
        plt.show()

    def run_complete_troubleshooting_pipeline(self, data_file=None, windows_file=None):
        """
        Complete troubleshooting pipeline with comprehensive diagnostics
        """
        print("="*100)
        print("🔧 COMPREHENSIVE TROUBLESHOOTING PIPELINE")
        print("="*100)

        # Step 1: Load data
        print("\n1️⃣ LOADING DATA")
        try:
            if data_file is None:
                data_file = f'{self.output_dir}generated-data-OPTIMIZED.npy'
            if windows_file is None:
                windows_file = f'{self.output_dir}generated-data-true-window-OPTIMIZED.npy'

            x = np.load(data_file)
            y = np.load(windows_file)
            print(f"✅ Data loaded: X={x.shape}, y={y.shape}")
        except Exception as e:
            print(f"❌ Error loading data: {e}")
            return None

        # Step 2: Comprehensive diagnosis
        print("\n2️⃣ COMPREHENSIVE DIAGNOSIS")
        issues = self.comprehensive_data_diagnosis(x, y)

        # Step 3: Intelligent preprocessing
        print("\n3️⃣ INTELLIGENT PREPROCESSING")
        x_processed, y_processed, preprocessing_steps = self.intelligent_preprocessing(x, y)

        # Step 4: Calculate data characteristics for adaptive architecture
        print("\n4️⃣ CALCULATING DATA CHARACTERISTICS")
        correlations = []
        for i in range(min(x_processed.shape[1], 20)):  # Check first 20 features
            try:
                corr = np.corrcoef(x_processed[:, i], y_processed)[0, 1]
                if not np.isnan(corr):
                    correlations.append(abs(corr))
            except:
                pass

        data_characteristics = {
            'n_samples': len(x_processed),
            'n_features': x_processed.shape[1],
            'max_correlation': max(correlations) if correlations else 0.001,
            'mean_correlation': np.mean(correlations) if correlations else 0.001,
            'target_std': np.std(y_processed),
            'preprocessing_steps': preprocessing_steps
        }

        print(f"   Max correlation: {data_characteristics['max_correlation']:.4f}")
        print(f"   Mean correlation: {data_characteristics['mean_correlation']:.4f}")

        # Step 5: Prepare data
        print("\n5️⃣ PREPARING DATA")
        # Split data
        x_temp, x_test, y_temp, y_test = train_test_split(
            x_processed, y_processed, test_size=0.15, random_state=42
        )
        x_train, x_val, y_train, y_val = train_test_split(
            x_temp, y_temp, test_size=0.176, random_state=42  # 0.176 * 0.85 ≈ 0.15
        )

        # Transform targets
        y_train_scaled = self.transformer.fit_transform(y_train.reshape(-1, 1)).flatten()
        y_val_scaled = self.transformer.transform(y_val.reshape(-1, 1)).flatten()
        y_test_scaled = self.transformer.transform(y_test.reshape(-1, 1)).flatten()

        print(f"   Train: {x_train.shape[0]} samples")
        print(f"   Validation: {x_val.shape[0]} samples")
        print(f"   Test: {x_test.shape[0]} samples")

        # Step 6: Build adaptive model
        print("\n6️⃣ BUILDING ADAPTIVE MODEL")
        self.model = self.build_adaptive_mlp(x_train.shape[1], data_characteristics)

        # Step 7: Train with advanced monitoring
        print("\n7️⃣ TRAINING WITH ADVANCED MONITORING")
        history = self.train_with_advanced_callbacks(
            self.model, x_train, y_train_scaled, x_val, y_val_scaled,
            epochs=1000, batch_size=32
        )

        self.is_fitted = True

        # Step 8: Plot training history
        print("\n8️⃣ TRAINING HISTORY")
        self.plot_detailed_training_history(history)

        # Step 9: Comprehensive evaluation
        print("\n9️⃣ COMPREHENSIVE EVALUATION")
        train_results = self.comprehensive_evaluation(
            self.model, x_train, y_train_scaled, "Training Set"
        )
        val_results = self.comprehensive_evaluation(
            self.model, x_val, y_val_scaled, "Validation Set"
        )
        test_results = self.comprehensive_evaluation(
            self.model, x_test, y_test_scaled, "Test Set"
        )

        # Step 10: Final diagnosis and recommendations
        print("\n🔟 FINAL DIAGNOSIS AND RECOMMENDATIONS")
        self.final_diagnosis_and_recommendations(
            test_results, data_characteristics, issues, preprocessing_steps
        )

        # Step 11: Save results
        results = {
            'model': self.model,
            'data_characteristics': data_characteristics,
            'preprocessing_steps': preprocessing_steps,
            'train_results': train_results,
            'val_results': val_results,
            'test_results': test_results,
            'issues_found': issues,
            'history': history
        }

        return results

    def plot_detailed_training_history(self, history):
        """Plot detailed training history"""
        plt.figure(figsize=(20, 10))

        # Loss
        plt.subplot(2, 4, 1)
        plt.plot(history.history['loss'], label='Training Loss', linewidth=2)
        plt.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
        plt.title('Model Loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.yscale('log')

        # MAE
        plt.subplot(2, 4, 2)
        plt.plot(history.history['mae'], label='Training MAE', linewidth=2)
        plt.plot(history.history['val_mae'], label='Validation MAE', linewidth=2)
        plt.title('Mean Absolute Error')
        plt.ylabel('MAE')
        plt.xlabel('Epoch')
        plt.legend()
        plt.grid(True, alpha=0.3)

        # MSE
        plt.subplot(2, 4, 3)
        plt.plot(history.history['mse'], label='Training MSE', linewidth=2)
        plt.plot(history.history['val_mse'], label='Validation MSE', linewidth=2)
        plt.title('Mean Squared Error')
        plt.ylabel('MSE')
        plt.xlabel('Epoch')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.yscale('log')

        # Learning Rate (if available)
        plt.subplot(2, 4, 4)
        if 'lr' in history.history:
            plt.plot(history.history['lr'], label='Learning Rate', linewidth=2)
            plt.ylabel('Learning Rate')
            plt.yscale('log')
        else:
            # Show epoch vs best val_loss to see convergence
            val_loss = history.history['val_loss']
            best_val_loss = np.minimum.accumulate(val_loss)
            plt.plot(best_val_loss, label='Best Val Loss', linewidth=2)
            plt.ylabel('Best Validation Loss')
            plt.yscale('log')
        plt.title('Learning Rate / Convergence')
        plt.xlabel('Epoch')
        plt.legend()
        plt.grid(True, alpha=0.3)

        # Training vs Validation Gap
        plt.subplot(2, 4, 5)
        train_loss = np.array(history.history['loss'])
        val_loss = np.array(history.history['val_loss'])
        gap = val_loss - train_loss
        plt.plot(gap, label='Val - Train Loss', linewidth=2, color='red')
        plt.axhline(y=0, color='black', linestyle='--', alpha=0.7)
        plt.title('Overfitting Monitor')
        plt.ylabel('Validation - Training Loss')
        plt.xlabel('Epoch')
        plt.legend()
        plt.grid(True, alpha=0.3)

        # Loss smoothed (moving average)
        plt.subplot(2, 4, 6)
        window = min(10, len(history.history['loss']) // 10)
        if window > 1:
            train_smooth = np.convolve(history.history['loss'], np.ones(window)/window, mode='valid')
            val_smooth = np.convolve(history.history['val_loss'], np.ones(window)/window, mode='valid')
            plt.plot(train_smooth, label='Training (Smoothed)', linewidth=2)
            plt.plot(val_smooth, label='Validation (Smoothed)', linewidth=2)
        else:
            plt.plot(history.history['loss'], label='Training Loss', linewidth=2)
            plt.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
        plt.title('Smoothed Loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.yscale('log')

        # Improvement rate
        plt.subplot(2, 4, 7)
        val_loss = np.array(history.history['val_loss'])
        if len(val_loss) > 10:
            improvement = val_loss[:-10] - val_loss[10:]  # Improvement over 10 epochs
            plt.plot(improvement, label='10-Epoch Improvement', linewidth=2)
            plt.axhline(y=0, color='red', linestyle='--', alpha=0.7)
        plt.title('Learning Progress')
        plt.ylabel('Loss Improvement')
        plt.xlabel('Epoch')
        plt.legend()
        plt.grid(True, alpha=0.3)

        # Final metrics summary
        plt.subplot(2, 4, 8)
        final_train_loss = history.history['loss'][-1]
        final_val_loss = history.history['val_loss'][-1]
        final_train_mae = history.history['mae'][-1]
        final_val_mae = history.history['val_mae'][-1]

        metrics_text = f"""Final Training Metrics:

Loss: {final_train_loss:.6f}
MAE: {final_train_mae:.6f}

Final Validation Metrics:

Loss: {final_val_loss:.6f}
MAE: {final_val_mae:.6f}

Overfitting Gap:
{final_val_loss - final_train_loss:.6f}

Total Epochs: {len(history.history['loss'])}"""

        plt.text(0.05, 0.95, metrics_text, transform=plt.gca().transAxes,
                fontsize=10, verticalalignment='top',
                bbox=dict(boxstyle="round,pad=0.5", facecolor="lightblue"))
        plt.title('Final Metrics Summary')
        plt.axis('off')

        plt.tight_layout()
        plt.show()

    def final_diagnosis_and_recommendations(self, test_results, data_characteristics,
                                          initial_issues, preprocessing_steps):
        """
        Final diagnosis and actionable recommendations
        """
        print("="*80)
        print("🏥 FINAL DIAGNOSIS AND RECOMMENDATIONS")
        print("="*80)

        r2 = test_results['r2']
        mae = test_results['mae']

        # Performance classification
        if r2 >= 0.8:
            performance = "EXCELLENT"
            color = "🟢"
        elif r2 >= 0.6:
            performance = "GOOD"
            color = "🟡"
        elif r2 >= 0.3:
            performance = "MODERATE"
            color = "🟠"
        else:
            performance = "POOR"
            color = "🔴"

        print(f"{color} PERFORMANCE ASSESSMENT: {performance}")
        print(f"   Test R²: {r2:.6f}")
        print(f"   Test MAE: {mae:.4f}")
        print(f"   Accuracy ±1: {test_results['accuracies'][1.0]:.1f}%")
        print(f"   Accuracy ±2: {test_results['accuracies'][2.0]:.1f}%")

        # Detailed diagnosis
        print(f"\n🔍 DETAILED DIAGNOSIS:")

        # Data quality issues
        if initial_issues:
            print(f"   📊 Data Quality Issues Found: {len(initial_issues)}")
            for issue in initial_issues:
                print(f"      - {issue}")
        else:
            print(f"   📊 Data Quality: Good")

        # Model performance analysis
        max_corr = data_characteristics['max_correlation']
        mean_corr = data_characteristics['mean_correlation']

        print(f"   🔗 Feature-Target Relationships:")
        print(f"      - Max correlation: {max_corr:.4f}")
        print(f"      - Mean correlation: {mean_corr:.4f}")

        if max_corr < 0.05:
            print(f"      ⚠️ Extremely weak relationships detected")
        elif max_corr < 0.1:
            print(f"      ⚠️ Very weak relationships detected")
        elif max_corr < 0.2:
            print(f"      ℹ️ Weak but learnable relationships")
        else:
            print(f"      ✅ Strong relationships present")

        # Recommendations
        print(f"\n💡 SPECIFIC RECOMMENDATIONS:")

        if r2 < 0.3:
            print(f"   🚨 URGENT IMPROVEMENTS NEEDED:")
            if max_corr < 0.05:
                print(f"      1. Feature Engineering Priority:")
                print(f"         - Create polynomial features")
                print(f"         - Try interaction terms")
                print(f"         - Consider domain-specific transformations")
                print(f"         - Investigate time-based features")

            print(f"      2. Model Architecture:")
            print(f"         - Try ensemble methods (Random Forest, XGBoost)")
            print(f"         - Consider sequence models (LSTM, GRU)")
            print(f"         - Experiment with deeper architectures")

            print(f"      3. Data Investigation:")
            print(f"         - Check if VAE generated meaningful synthetic data")
            print(f"         - Verify target variable calculation")
            print(f"         - Investigate data leakage or preprocessing errors")

        elif r2 < 0.6:
            print(f"   🔧 IMPROVEMENT OPPORTUNITIES:")
            print(f"      1. Hyperparameter Tuning:")
            print(f"         - Grid search for learning rate and batch size")
            print(f"         - Experiment with different optimizers")
            print(f"         - Try different regularization strengths")

            print(f"      2. Advanced Techniques:")
            print(f"         - Cross-validation for better estimates")
            print(f"         - Ensemble multiple models")
            print(f"         - Feature selection methods")

        else:
            print(f"   ✅ MODEL PERFORMING WELL:")
            print(f"      1. Fine-tuning options:")
            print(f"         - Slight hyperparameter adjustments")
            print(f"         - Ensemble for marginal improvements")
            print(f"         - Production deployment considerations")

        # Code recommendations
        print(f"\n💻 IMMEDIATE CODE CHANGES TO TRY:")

        if max_corr < 0.1:
            print(f"""
# Try this enhanced architecture for weak relationships:
model = Sequential([
    Dense(1024, activation='relu', kernel_regularizer=l1_l2(0.001, 0.01)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(512, activation='relu', kernel_regularizer=l1_l2(0.001, 0.01)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dropout(0.1),
    Dense(1)
])

# Use higher learning rate and robust loss
optimizer = Adam(learning_rate=0.003, clipnorm=1.0)
model.compile(loss='huber', optimizer=optimizer)
""")

        if "skewed" in str(initial_issues):
            print(f"""
# Try log transformation of target:
y_transformed = np.log1p(y - np.min(y) + 1)
""")

        print(f"""
# Alternative models to try:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor

# Random Forest (often works well with weak relationships)
rf = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42)
rf.fit(x_train, y_train)

# XGBoost (excellent for tabular data)
xgb = XGBRegressor(n_estimators=200, max_depth=6, learning_rate=0.1, random_state=42)
xgb.fit(x_train, y_train)
""")

        # Preprocessing recommendations
        if preprocessing_steps:
            print(f"\n🔄 PREPROCESSING APPLIED:")
            for step in preprocessing_steps:
                print(f"      ✅ {step}")

        # Final action items
        print(f"\n📋 ACTION ITEMS (Priority Order):")
        if r2 < 0.1:
            print(f"      1. 🚨 CRITICAL: Verify data quality and target calculation")
            print(f"      2. 🚨 CRITICAL: Check VAE synthetic data quality")
            print(f"      3. Try Random Forest as baseline comparison")
            print(f"      4. Create polynomial/interaction features")
        elif r2 < 0.3:
            print(f"      1. Try Random Forest/XGBoost for comparison")
            print(f"      2. Feature engineering (polynomial, interactions)")
            print(f"      3. Deeper neural network architecture")
        elif r2 < 0.6:
            print(f"      1. Hyperparameter tuning")
            print(f"      2. Ensemble methods")
            print(f"      3. Cross-validation analysis")
        else:
            print(f"      1. Model is ready for production")
            print(f"      2. Consider incremental learning setup")

        print("="*80)

# Quick usage functions
def quick_diagnosis(data_file=None, windows_file=None):
    """Quick diagnosis of MLP training issues"""
    agent = TroubleshootingMLPAgent()
    return agent.run_complete_troubleshooting_pipeline(data_file, windows_file)

def compare_models(x_train, y_train, x_test, y_test):
    """Compare MLP with other models"""
    from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
    from sklearn.metrics import r2_score

    results = {}

    # Random Forest
    print("Training Random Forest...")
    rf = RandomForestRegressor(n_estimators=100, random_state=42)
    rf.fit(x_train, y_train)
    rf_pred = rf.predict(x_test)
    results['Random Forest'] = r2_score(y_test, rf_pred)

    # Gradient Boosting
    print("Training Gradient Boosting...")
    gb = GradientBoostingRegressor(n_estimators=100, random_state=42)
    gb.fit(x_train, y_train)
    gb_pred = gb.predict(x_test)
    results['Gradient Boosting'] = r2_score(y_test, gb_pred)

    print("\nModel Comparison (R² scores):")
    for model, score in results.items():
        print(f"  {model}: {score:.4f}")

    return results

# Main execution
if __name__ == "__main__":
    print("🔧 Running Comprehensive MLP Troubleshooting...")

    # Run complete troubleshooting pipeline
    results = quick_diagnosis()

    if results:
        print(f"\n🎯 SUMMARY:")
        print(f"   Test R²: {results['test_results']['r2']:.6f}")
        print(f"   Issues found: {len(results['issues_found'])}")
        print(f"   Preprocessing steps: {len(results['preprocessing_steps'])}")

        # If performance is still poor, suggest alternative approaches
        if results['test_results']['r2'] < 0.3:
            print(f"\n⚠️ Poor performance detected. Consider:")
            print(f"   1. Check if VAE synthetic data is meaningful")
            print(f"   2. Verify target variable (VAR window) calculation")
            print(f"   3. Try completely different approach (e.g., time series methods)")
    else:
        print("❌ Pipeline failed. Check data file paths and availability.")

FileNotFoundError: ignored