In [1]:
"""
Binary Synthetic Data Generator with Hybrid Rule + ML Architecture.

This module generates and optimizes synthetic binary datasets to match target
probability distributions defined in a YAML configuration file.

Architecture Overview (Rec #1 - Hybrid Design):
================================================
The system uses a two-phase hybrid approach:

1. StructuredAdjuster (Rules)
   - Handles hard constraints
   - Rules encode truth
   - Gradient-based probability correction
   - Row-level flipping preserves correlations

2. AttentionProposalModel (ML)
   - Handles soft, learned corrections
   - ML encodes interaction uncertainty
   - NOT a predictor - a learned proposal generator for optimization
   - Used within hill-climbing optimization loop

This separation prevents the model from "learning the wrong thing".

Key Features:
=============
- Structured Adjustment: Gradient-based probability correction with
  per-column learning rates (Rec #9) for rare variable stability

- Attention-Based Proposals: Self-attention learns column interactions
  and proposes probability corrections. Limited capacity (Rec #6) and
  automatic freezing (Rec #7) prevent overfitting.

- Better Metrics: KL divergence and total variation distance, plus
  conditional metrics per dependency group (Rec #4)

- Named Optimization Objective: Explicit weighted composite score (Rec #5)
  prevents silent metric drift

- Centralized RNG: Single RNGManager (Rec #12) for reproducibility

- Invariant Checks: Fail-fast validation after each iteration (Rec #13)

Main Entry Points:
==================
- ImprovedBinaryGenerator.generate(n_rows) - Full generation pipeline
- optimize_dataset(df, config) - Repair/optimize existing datasets (Rec #15)

Example Usage:
==============
    >>> config = load_config(CONFIG_YAML)
    >>> generator = ImprovedBinaryGenerator(config, seed=42)
    >>> df = generator.generate(n_rows=1000)

    # Or optimize an existing dataset:
    >>> df_fixed, metrics = optimize_dataset(existing_df, config)
"""

import yaml
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from copy import deepcopy


# ============================================================================
# Random Number Generator Manager (Rec #12)
# ============================================================================


class RNGManager: # Clear
    """
    Centralized random number generator manager (Rec #12).

    Provides a single source of randomness for reproducibility.
    Pass this to all components that need random number generation.

    Benefits:
    - Single seed controls all randomness
    - No silent reproducibility bugs from multiple RNGs
    - Easy to reset for testing
    - Thread-safe within a single manager instance

    Usage:
        rng_manager = RNGManager(seed=42)
        adjuster = StructuredAdjuster(config, rng_manager=rng_manager)
        generator = ImprovedBinaryGenerator(config, rng_manager=rng_manager)
    """

    def __init__(self, seed: int = 42):
        """Initialize with a single seed."""
        self.seed = seed
        self._numpy_rng = np.random.default_rng(seed)
        self._torch_seed = seed
        self._reset_torch()

    def _reset_torch(self) -> None:
        """Reset PyTorch random state."""
        torch.manual_seed(self._torch_seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(self._torch_seed)

    @property
    def numpy(self) -> np.random.Generator:
        """Get the numpy random generator."""
        return self._numpy_rng

    def reset(self, seed: Optional[int] = None) -> None:
        """Reset all RNGs to initial state or new seed."""
        if seed is not None:
            self.seed = seed
            self._torch_seed = seed
        self._numpy_rng = np.random.default_rng(self.seed)
        self._reset_torch()

    def choice(
        self,
        a: np.ndarray,
        size: Optional[int] = None,
        replace: bool = True,
        p: Optional[np.ndarray] = None,
    ) -> np.ndarray:
        """Wrapper for numpy random choice."""
        return self._numpy_rng.choice(a, size=size, replace=replace, p=p)

    def shuffle(self, x: list) -> None:
        """In-place shuffle using numpy RNG."""
        self._numpy_rng.shuffle(x)

    def random(self, size: Optional[int] = None) -> np.ndarray:
        """Generate random floats in [0, 1)."""
        return self._numpy_rng.random(size)

    def __repr__(self) -> str:
        return f"RNGManager(seed={self.seed})"


# ============================================================================
# Configuration and Data Classes
# ============================================================================


@dataclass
class ColumnConfig: # Clear
    """Stores parsed column configuration."""

    column_id: str
    true_value: int
    false_value: int
    target_prob: float
    distribution_type: str
    dependencies: Optional[Dict] = None
    conditional_probs: Optional[Dict] = None
    # Rec #9: Per-column learning rate
    learning_rate: Optional[float] = None


@dataclass # Clear
class GeneratorMetrics:
    """
    Stores evaluation metrics for the generator.

    Includes both marginal metrics (overall distribution match) and
    conditional metrics (per-dependency-group distribution match).
    """

    mse: float
    kl_divergence: float
    total_variation: float
    max_deviation: float
    per_column_errors: Dict[str, float]
    # Rec #4: Conditional metrics per dependency group
    conditional_kl: Optional[Dict[str, float]] = None
    conditional_tv: Optional[Dict[str, float]] = None


@dataclass
class OptimizationObjective:
    """
    Named composite objective for optimization (Rec #5).

    Makes acceptance criteria explicit to avoid silent metric drift
    when others modify the system. The objective is computed as:

        objective = (weight_kl * kl) + (weight_tv * tv) + (weight_max_dev * max_dev)
                    + conditional_penalty (if conditionals exceed tolerance)

    Attributes:
        weight_kl: Weight for KL divergence component
        weight_tv: Weight for total variation component
        weight_max_dev: Weight for maximum deviation penalty
        conditional_tolerance: Max allowed conditional drift before penalty
        conditional_penalty_weight: Penalty multiplier for conditional violations
    """

    weight_kl: float = 0.4
    weight_tv: float = 0.4
    weight_max_dev: float = 0.2
    conditional_tolerance: float = 0.05
    conditional_penalty_weight: float = 0.5

    def compute(self, metrics: GeneratorMetrics) -> float:
        """
        Compute the weighted composite objective score.

        Lower is better. Includes penalty for conditional distribution drift.
        """
        base_score = (
            self.weight_kl * metrics.kl_divergence
            + self.weight_tv * metrics.total_variation
            + self.weight_max_dev * metrics.max_deviation
        )

        # Add penalty for conditional distribution violations
        conditional_penalty = 0.0
        if metrics.conditional_tv:
            violations = [
                tv
                for tv in metrics.conditional_tv.values()
                if tv > self.conditional_tolerance
            ]
            if violations:
                conditional_penalty = (
                    self.conditional_penalty_weight * sum(violations) / len(violations)
                )

        return base_score + conditional_penalty

    def __str__(self) -> str:
        return (
            f"OptimizationObjective(kl={self.weight_kl}, tv={self.weight_tv}, "
            f"max_dev={self.weight_max_dev}, cond_tol={self.conditional_tolerance})"
        )


# ============================================================================
# Metrics Module - Better alternatives to MSE
# ============================================================================


class DistributionMetrics:
    """
    Provides multiple metrics for measuring how well generated distributions
    match target probabilities. Offers alternatives to simple MSE.
    """

    @staticmethod
    def mse(observed: np.ndarray, target: np.ndarray) -> float:
        """Mean Squared Error - original metric."""
        return float(np.mean((observed - target) ** 2))

    @staticmethod
    def kl_divergence(
        observed: np.ndarray, target: np.ndarray, epsilon: float = 1e-10
    ) -> float:
        """
        Kullback-Leibler Divergence - measures information loss when using
        observed distribution to approximate target distribution.

        Better than MSE because:
        - Asymmetric: penalizes underrepresentation more heavily
        - Information-theoretic interpretation
        - More sensitive to rare events (important for binary vars with low prob)
        """
        # Clip to avoid log(0)
        observed = np.clip(observed, epsilon, 1 - epsilon)
        target = np.clip(target, epsilon, 1 - epsilon)

        # KL for binary: D(p||q) = p*log(p/q) + (1-p)*log((1-p)/(1-q))
        kl = target * np.log(target / observed) + (1 - target) * np.log(
            (1 - target) / (1 - observed)
        )
        return float(np.mean(kl))

    @staticmethod
    def total_variation(observed: np.ndarray, target: np.ndarray) -> float:
        """
        Total Variation Distance - maximum difference between distributions.

        Better than MSE because:
        - Bounded between 0 and 1
        - Directly interpretable as "how different are the distributions"
        - For binary: TV = |p_obs - p_target|
        """
        return float(np.mean(np.abs(observed - target)))

    @staticmethod
    def max_deviation(observed: np.ndarray, target: np.ndarray) -> float:
        """Maximum absolute deviation across all columns."""
        return float(np.max(np.abs(observed - target)))

    @staticmethod
    def chi_squared(
        observed_counts: np.ndarray, expected_counts: np.ndarray, epsilon: float = 1e-10
    ) -> float:
        """
        Chi-squared statistic for goodness of fit.

        Better for categorical data as it accounts for sample size.
        """
        expected_counts = np.clip(expected_counts, epsilon, None)
        chi2 = np.sum((observed_counts - expected_counts) ** 2 / expected_counts)
        return float(chi2)

    @staticmethod
    def composite_score(
        observed: np.ndarray,
        target: np.ndarray,
        weights: Optional[Dict[str, float]] = None,
    ) -> float:
        """
        Weighted combination of metrics for comprehensive evaluation.

        Default weights emphasize KL divergence and total variation.
        """
        if weights is None:
            weights = {"kl": 0.4, "tv": 0.4, "max_dev": 0.2}

        metrics = DistributionMetrics
        score = (
            weights.get("kl", 0) * metrics.kl_divergence(observed, target)
            + weights.get("tv", 0) * metrics.total_variation(observed, target)
            + weights.get("max_dev", 0) * metrics.max_deviation(observed, target)
        )
        return score


# ============================================================================
# Self-Attention Module with Backpropagation
# ============================================================================


class BinaryAttentionAdjuster(nn.Module):
    """
    Self-attention mechanism for learning inter-column relationships
    and adjusting probabilities through backpropagation.

    The attention mechanism learns:
    1. Which columns influence each other
    2. How to adjust probabilities to minimize distribution error
    3. Optimal correction factors through gradient descent
    """

    def __init__(self, n_columns: int, hidden_dim: int = 32, n_heads: int = 4):
        super().__init__()
        self.n_columns = n_columns
        self.hidden_dim = hidden_dim

        # Learnable probability adjustment parameters
        self.prob_adjustments = nn.Parameter(torch.zeros(n_columns))

        # Self-attention layers
        self.query = nn.Linear(1, hidden_dim)
        self.key = nn.Linear(1, hidden_dim)
        self.value = nn.Linear(1, hidden_dim)

        # Multi-head attention
        self.attention = nn.MultiheadAttention(
            embed_dim=hidden_dim, num_heads=n_heads, batch_first=True
        )

        # Output projection to probability adjustment
        self.output_proj = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
            nn.Tanh(),  # Bound adjustments to [-1, 1]
        )

        # Scale factor for adjustments (learnable)
        self.adjustment_scale = nn.Parameter(torch.tensor(0.1))

    def forward(
        self, current_probs: torch.Tensor, target_probs: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Compute probability adjustments using self-attention.

        Args:
            current_probs: Current observed probabilities [n_columns]
            target_probs: Target probabilities [n_columns]

        Returns:
            Adjusted probabilities [n_columns]
        """
        # Compute error signal
        error = target_probs - current_probs  # [n_columns]

        # Reshape for attention: [1, n_columns, 1]
        x = error.unsqueeze(0).unsqueeze(-1)

        # Project to query, key, value
        q = self.query(x)  # [1, n_columns, hidden_dim]
        k = self.key(x)
        v = self.value(x)

        # Self-attention to learn column interactions
        attended, attention_weights = self.attention(q, k, v)

        # Project to adjustment values
        adjustments = self.output_proj(attended).squeeze(-1).squeeze(0)  # [n_columns]

        # Scale adjustments
        adjustments = adjustments * self.adjustment_scale

        # Add learnable base adjustments
        adjustments = adjustments + self.prob_adjustments

        # Apply adjustments to current probabilities
        adjusted_probs = current_probs + adjustments

        # Clamp to valid probability range
        adjusted_probs = torch.clamp(adjusted_probs, 0.0, 1.0)

        return adjusted_probs, attention_weights


class AttentionProposalModel:
    """
    Learned proposal generator for probability corrections in optimization.

    IMPORTANT: This model is NOT used for inference or prediction.
    It is used exclusively for optimization guidance - learning how to propose
    probability corrections within a hill-climbing optimization loop.

    The model learns:
    - How to propose probability adjustments that minimize distribution error
    - Optimal correction magnitudes through gradient descent
    - Column interaction patterns that inform adjustment proposals

    This is part of a hybrid rule + ML architecture where:
    - StructuredAdjuster handles hard constraints (rules encode truth)
    - AttentionProposalModel handles soft, learned corrections (ML encodes uncertainty)
    """

    def __init__(
        self,
        n_columns: int,
        learning_rate: float = 0.01,
        hidden_dim: Optional[int] = None,
        n_heads: Optional[int] = None,
    ):
        # Rec #6: Limit attention capacity based on column count
        # Rule of thumb: hidden_dim <= 4 * n_columns, n_heads <= 2 for n_columns < 10
        if hidden_dim is None:
            hidden_dim = min(32, 4 * n_columns)
        if n_heads is None:
            n_heads = 2 if n_columns < 10 else 4

        self.model = BinaryAttentionAdjuster(
            n_columns=n_columns, hidden_dim=hidden_dim, n_heads=n_heads
        )
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
        self.loss_history = []

        # Rec #7: Freezing state tracking
        self.frozen = False
        self.early_stop_patience = 10
        self.early_stop_min_delta = 1e-6
        self._no_improvement_count = 0
        self._best_loss = float("inf")

    def freeze(self) -> None:
        """
        Freeze the model after convergence (Rec #7).

        Once frozen, the model will only apply learned adjustments
        without further training. This prevents:
        - Chasing noise during continued optimization
        - Destabilizing already-converged solutions
        """
        self.frozen = True
        self.model.eval()
        for param in self.model.parameters():
            param.requires_grad = False

    def is_frozen(self) -> bool:
        """Check if model is frozen."""
        return self.frozen

    def _check_early_stopping(self, loss: float) -> bool:
        """
        Check if training should stop due to convergence (Rec #7).

        Returns True if model should be frozen.
        """
        if loss < self._best_loss - self.early_stop_min_delta:
            self._best_loss = loss
            self._no_improvement_count = 0
        else:
            self._no_improvement_count += 1

        return self._no_improvement_count >= self.early_stop_patience

    def compute_loss(
        self, adjusted_probs: torch.Tensor, target_probs: torch.Tensor
    ) -> torch.Tensor:
        """
        Compute differentiable loss combining multiple metrics.

        Uses a combination of:
        - MSE loss (smooth gradients)
        - Soft KL-like loss (penalizes rare event errors)
        """
        epsilon = 1e-10

        # MSE component
        mse_loss = F.mse_loss(adjusted_probs, target_probs)

        # KL-like component (symmetric version for stability)
        p = torch.clamp(adjusted_probs, epsilon, 1 - epsilon)
        q = torch.clamp(target_probs, epsilon, 1 - epsilon)

        kl_forward = q * torch.log(q / p) + (1 - q) * torch.log((1 - q) / (1 - p))
        kl_backward = p * torch.log(p / q) + (1 - p) * torch.log((1 - p) / (1 - q))
        kl_symmetric = 0.5 * (kl_forward + kl_backward).mean()

        # Total variation component
        tv_loss = torch.abs(adjusted_probs - target_probs).mean()

        # Combined loss with weights
        total_loss = 0.3 * mse_loss + 0.4 * kl_symmetric + 0.3 * tv_loss

        return total_loss

    def train_step(
        self, current_probs: np.ndarray, target_probs: np.ndarray
    ) -> Tuple[np.ndarray, float]:
        """
        Perform one training step with backpropagation.

        Args:
            current_probs: Currently observed probabilities
            target_probs: Target probabilities from config

        Returns:
            Tuple of (adjusted_probs, loss_value)

        Note: If the model is frozen (Rec #7), this will only apply
        learned adjustments without training.
        """
        # Convert to tensors
        current_t = torch.tensor(current_probs, dtype=torch.float32)
        target_t = torch.tensor(target_probs, dtype=torch.float32)

        # Rec #7: If frozen, only apply adjustments without training
        if self.frozen:
            self.model.eval()
            with torch.no_grad():
                adjusted_probs, _ = self.model(current_t, target_t)
            return adjusted_probs.numpy(), self._best_loss

        # Normal training path
        self.model.train()
        self.optimizer.zero_grad()

        # Forward pass
        adjusted_probs, attention_weights = self.model(current_t, target_t)

        # Compute loss
        loss = self.compute_loss(adjusted_probs, target_t)

        # Backward pass
        loss.backward()

        # Gradient clipping for stability
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)

        # Update weights
        self.optimizer.step()

        loss_value = loss.item()
        self.loss_history.append(loss_value)

        # Rec #7: Check for early stopping and freeze if converged
        if self._check_early_stopping(loss_value):
            self.freeze()

        return adjusted_probs.detach().numpy(), loss_value

    def get_adjustments(
        self, current_probs: np.ndarray, target_probs: np.ndarray
    ) -> np.ndarray:
        """Get probability adjustments without training."""
        self.model.eval()
        with torch.no_grad():
            current_t = torch.tensor(current_probs, dtype=torch.float32)
            target_t = torch.tensor(target_probs, dtype=torch.float32)
            adjusted_probs, _ = self.model(current_t, target_t)
        return adjusted_probs.numpy()


# ============================================================================
# Structured Adjustment Mechanism
# ============================================================================


class StructuredAdjuster:
    """
    Implements structured probability adjustment for binary variables.

    Instead of random jitter, uses:
    1. Gradient-based correction: adjust proportionally to error
    2. Constraint satisfaction: maintain conditional dependencies
    3. Iterative refinement: progressively reduce error

    Part of a hybrid rule + ML architecture (Rec #1):
    - This class handles hard constraints (rules encode truth)
    - AttentionProposalModel handles soft, learned corrections
    """

    def __init__(
        self,
        config: dict,
        rng_manager: Optional[RNGManager] = None,
        seed: int = 42,
        default_learning_rate: float = 0.5,
        column_learning_rates: Optional[Dict[str, float]] = None,
    ):
        """
        Initialize the structured adjuster.

        Args:
            config: YAML configuration dictionary
            rng_manager: Centralized RNG manager (Rec #12). If None, creates one.
            seed: Random seed (used only if rng_manager is None)
            default_learning_rate: Default learning rate for all columns
            column_learning_rates: Optional per-column learning rates (Rec #9)
                Rare variables (e.g., returns with ~8% probability) benefit from
                smaller learning rates for stability.
        """
        self.config = config
        # Rec #12: Use centralized RNG manager
        self._rng_manager = rng_manager or RNGManager(seed)
        self.default_learning_rate = default_learning_rate
        self.column_learning_rates = column_learning_rates or {}
        self.column_configs = self._parse_columns()
        self.adjustment_history = []

    @property
    def rng(self) -> np.random.Generator:
        """Get the numpy RNG from the manager."""
        return self._rng_manager.numpy

    def _parse_columns(self) -> Dict[str, ColumnConfig]:
        """Parse column configurations from YAML config."""
        columns = {}
        for col in self.config["columns"]:
            col_id = col["column_id"]
            dist = col["distribution"]

            deps = None
            cond_probs = None
            if dist["type"] == "conditional":
                deps = dist["dependencies"]["depend_on"]
                cond_probs = dist["dependencies"]["conditional_probs"]

            # Rec #9: Determine column-specific learning rate
            # Rare variables (low target_prob) get smaller learning rates
            target_prob = dist["probabilities"]["true_prob"]
            if col_id in self.column_learning_rates:
                lr = self.column_learning_rates[col_id]
            elif target_prob < 0.1:  # Rare variable
                lr = self.default_learning_rate * 0.5
            elif target_prob > 0.9:  # Very common variable
                lr = self.default_learning_rate * 0.5
            else:
                lr = self.default_learning_rate

            columns[col_id] = ColumnConfig(
                column_id=col_id,
                true_value=col["values"]["true_value"],
                false_value=col["values"]["false_value"],
                target_prob=target_prob,
                distribution_type=dist["type"],
                dependencies=deps,
                conditional_probs=cond_probs,
                learning_rate=lr,
            )
        return columns

    def compute_error(self, df: pd.DataFrame) -> Dict[str, float]:
        """Compute per-column probability errors."""
        errors = {}
        for col_id, cfg in self.column_configs.items():
            observed = (df[col_id] == cfg.true_value).mean()
            errors[col_id] = cfg.target_prob - observed
        return errors

    def compute_gradient_adjustment(
        self, df: pd.DataFrame, learning_rate: Optional[float] = None
    ) -> Dict[str, float]:
        """
        Compute structured adjustment factors using gradient-like updates.

        For binary variables:
        - If observed_prob < target_prob: need to flip some 0s to 1s
        - If observed_prob > target_prob: need to flip some 1s to 0s

        The adjustment is proportional to the error magnitude.

        Uses per-column learning rates (Rec #9) where rare variables
        get smaller learning rates for stability.

        Args:
            df: DataFrame to compute adjustments for
            learning_rate: Override learning rate (uses per-column rates if None)
        """
        errors = self.compute_error(df)
        adjustments = {}

        for col_id, error in errors.items():
            cfg = self.column_configs[col_id]

            # Rec #9: Use column-specific learning rate
            if learning_rate is not None:
                col_lr = learning_rate
            elif cfg.learning_rate is not None:
                col_lr = cfg.learning_rate
            else:
                col_lr = self.default_learning_rate

            # Gradient-based adjustment factor
            # Positive error = need more 1s, negative error = need more 0s
            adjustment = col_lr * error

            # Adaptive learning rate: larger adjustments for larger errors
            if abs(error) > 0.1:
                adjustment *= 1.5
            elif abs(error) < 0.01:
                adjustment *= 0.5

            adjustments[col_id] = adjustment

        self.adjustment_history.append(adjustments)
        return adjustments

    def apply_structured_adjustment(
        self, df: pd.DataFrame, adjustments: Dict[str, float]
    ) -> pd.DataFrame:
        """
        Apply structured adjustments by selectively flipping binary values.

        Uses targeted flipping rather than random re-generation:
        - Flip values that most reduce error
        - Respect conditional dependencies
        - Maintain overall distribution shape
        """
        df = df.copy()

        for col_id, adjustment in adjustments.items():
            if abs(adjustment) < 1e-6:
                continue

            cfg = self.column_configs[col_id]
            n_rows = len(df)

            # Calculate how many rows need to be flipped
            n_to_flip = int(abs(adjustment) * n_rows)
            if n_to_flip == 0:
                continue

            if adjustment > 0:
                # Need to flip 0s to 1s
                candidates = df.index[df[col_id] == cfg.false_value].tolist()
            else:
                # Need to flip 1s to 0s
                candidates = df.index[df[col_id] == cfg.true_value].tolist()

            if len(candidates) == 0:
                continue

            # Select candidates to flip (with some randomness for diversity)
            n_to_flip = min(n_to_flip, len(candidates))
            flip_indices = self.rng.choice(candidates, size=n_to_flip, replace=False)

            # Perform the flip
            current_val = df.loc[flip_indices[0], col_id]
            new_val = (
                cfg.true_value if current_val == cfg.false_value else cfg.false_value
            )
            df.loc[flip_indices, col_id] = new_val

        return df

    def adjust_with_constraints(
        self, df: pd.DataFrame, max_iterations: int = 10
    ) -> pd.DataFrame:
        """
        Apply adjustments while respecting conditional dependencies.

        Iteratively adjusts until constraints are satisfied or max_iterations reached.
        """
        for iteration in range(max_iterations):
            adjustments = self.compute_gradient_adjustment(df)

            # Check if converged
            max_adjustment = max(abs(a) for a in adjustments.values())
            if max_adjustment < 0.001:
                break

            df = self.apply_structured_adjustment(df, adjustments)

            # Re-apply conditional dependencies
            df = self._enforce_conditional_dependencies(df)

        return df

    def _enforce_conditional_dependencies(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Enforce conditional probability constraints.

        Refactored to minimize Python loops by:
        - Pre-building group masks using vectorized operations
        - Using numpy arrays for candidate selection
        - Batch updating values where possible
        """
        df = df.copy()

        for col_id, cfg in self.column_configs.items():
            if cfg.distribution_type != "conditional":
                continue

            if cfg.conditional_probs is None:
                continue

            # Rec #11: Pre-compute column values as numpy arrays for speed
            col_values = np.asarray(df[col_id].values)

            for condition_key, probs in cfg.conditional_probs.items():
                cond = self._parse_condition_key(condition_key)

                mask = np.ones(len(df), dtype=bool)
                for k, v in cond.items():
                    mask &= np.asarray(df[k].values) == v

                n = int(mask.sum())
                if n == 0:
                    continue

                # Get indices where mask is True
                idx = np.where(mask)[0]

                # Compute target count for this group
                target_prob = probs["true_prob"]
                target_count = int(round(n * target_prob))

                # Current count using vectorized operation
                group_values = col_values[mask]
                current_count = int((group_values == cfg.true_value).sum())

                # Adjust if needed
                diff = target_count - current_count
                if diff > 0:
                    # Need more 1s - find candidates with false_value
                    candidates_mask = np.asarray(group_values == cfg.false_value)
                    candidates = idx[candidates_mask]
                    n_flip = min(diff, len(candidates))
                    if n_flip > 0:
                        flip_idx = self.rng.choice(
                            candidates, size=n_flip, replace=False
                        )
                        df.iloc[flip_idx, df.columns.get_loc(col_id)] = cfg.true_value
                elif diff < 0:
                    # Need fewer 1s - find candidates with true_value
                    candidates_mask = np.asarray(group_values == cfg.true_value)
                    candidates = idx[candidates_mask]
                    n_flip = min(-diff, len(candidates))
                    if n_flip > 0:
                        flip_idx = self.rng.choice(
                            candidates, size=n_flip, replace=False
                        )
                        df.iloc[flip_idx, df.columns.get_loc(col_id)] = cfg.false_value

        return df

    @staticmethod
    def _parse_condition_key(condition_key: str) -> Dict[str, int]:
        """Parse condition key like 'is_online_order=1, has_loyalty_card=0'."""
        result = {}
        for part in condition_key.split(","):
            key, value = part.strip().split("=")
            result[key] = int(value)
        return result


# ============================================================================
# Main Generator Class with All Improvements
# ============================================================================


class ImprovedBinaryGenerator:
    """
    Main generator class combining all improvements:
    1. Structured adjustment mechanism (rules encode truth)
    2. Self-attention with backpropagation (ML encodes uncertainty)
    3. Better metrics (KL, TV, composite)

    This is a hybrid rule + ML architecture (Rec #1) that:
    - Uses StructuredAdjuster for hard constraints
    - Uses AttentionProposalModel for soft, learned corrections

    The system excels at repairing/optimizing distributions,
    not just initial sampling (see optimize_dataset for direct API).
    """

    def __init__(
        self,
        config: dict,
        seed: int = 42,
        rng_manager: Optional[RNGManager] = None,
        use_attention: bool = True,
        learning_rate: float = 0.01,
        verbose: bool = True,
    ):
        """
        Initialize the generator.

        Args:
            config: YAML configuration dictionary
            seed: Random seed (used only if rng_manager is None)
            rng_manager: Centralized RNG manager (Rec #12). If None, creates one.
            use_attention: Whether to use attention-based proposal model
            learning_rate: Learning rate for attention model
            verbose: Whether to print progress messages
        """
        self.config = config
        self.seed = seed
        self.use_attention = use_attention
        self.verbose = verbose

        # Rec #12: Use centralized RNG manager
        self._rng_manager = rng_manager or RNGManager(seed)

        # Initialize structured adjuster with shared RNG
        self.adjuster = StructuredAdjuster(config, rng_manager=self._rng_manager)

        # Initialize attention proposal model if enabled
        n_columns = len(config["columns"])
        self.attention_model = (
            AttentionProposalModel(n_columns=n_columns, learning_rate=learning_rate)
            if use_attention
            else None
        )

        # Metrics calculator
        self.metrics = DistributionMetrics()

        # Training history
        self.history = {"mse": [], "kl": [], "tv": [], "composite": []}

    @property
    def rng(self) -> np.random.Generator:
        """Get the numpy RNG from the manager (Rec #12)."""
        return self._rng_manager.numpy

    def log(self, message: str):
        """Print message if verbose mode is on."""
        if self.verbose:
            print(message)

    def generate_initial_dataset(self, n_rows: int) -> pd.DataFrame:
        """Generate initial dataset ignoring conditional logic."""
        data = {}

        for col_cfg in self.config["columns"]:
            col_id = col_cfg["column_id"]
            values = col_cfg["values"]
            probs = col_cfg["distribution"]["probabilities"]

            true_val = values["true_value"]
            false_val = values["false_value"]
            p = probs["true_prob"]

            # Generate exact counts for target probability
            n_true = int(round(n_rows * p))
            column = [true_val] * n_true + [false_val] * (n_rows - n_true)

            self.rng.shuffle(column)
            data[col_id] = column
            self.log(f"[INIT] {col_id}: target_prob={p:.3f}, n_true={n_true}")

        return pd.DataFrame(data)

    def evaluate(self, df: pd.DataFrame) -> GeneratorMetrics:
        """
        Evaluate dataset against target distributions using multiple metrics.

        Includes both marginal metrics and conditional metrics per dependency group.
        Conditional metrics track distribution match within each conditional slice,
        e.g., P(is_return | online=1, loyalty=1).
        """
        observed = []
        target = []
        per_column = {}

        for col_cfg in self.config["columns"]:
            col_id = col_cfg["column_id"]
            true_val = col_cfg["values"]["true_value"]
            p_target = col_cfg["distribution"]["probabilities"]["true_prob"]

            p_observed = (df[col_id] == true_val).mean()
            observed.append(p_observed)
            target.append(p_target)
            per_column[col_id] = abs(p_observed - p_target)

        observed = np.array(observed)
        target = np.array(target)

        # Rec #4: Compute conditional metrics per dependency group
        conditional_kl, conditional_tv = self._compute_conditional_metrics(df)

        return GeneratorMetrics(
            mse=self.metrics.mse(observed, target),
            kl_divergence=self.metrics.kl_divergence(observed, target),
            total_variation=self.metrics.total_variation(observed, target),
            max_deviation=self.metrics.max_deviation(observed, target),
            per_column_errors=per_column,
            conditional_kl=conditional_kl,
            conditional_tv=conditional_tv,
        )

    def _compute_conditional_metrics(
        self, df: pd.DataFrame
    ) -> Tuple[Dict[str, float], Dict[str, float]]:
        """
        Compute KL and TV metrics for each conditional slice.

        Tracks metrics per conditional group, e.g.:
        - "is_return|is_online_order=1,has_loyalty_card=1"

        This catches distribution errors that hide inside conditionals.
        """
        conditional_kl = {}
        conditional_tv = {}
        epsilon = 1e-10

        for col_cfg in self.config["columns"]:
            dist = col_cfg["distribution"]
            if dist["type"] != "conditional":
                continue

            col_id = col_cfg["column_id"]
            true_val = col_cfg["values"]["true_value"]
            cond_probs = dist["dependencies"].get("conditional_probs", {})

            for condition_key, probs in cond_probs.items():
                # Parse condition
                cond = StructuredAdjuster._parse_condition_key(condition_key)

                # Build mask for this condition
                mask = pd.Series(True, index=df.index)
                for k, v in cond.items():
                    mask &= df[k] == v

                subset = df[mask]
                n = len(subset)
                if n == 0:
                    continue

                p_target = probs["true_prob"]
                p_observed = (subset[col_id] == true_val).mean()

                # Clip for numerical stability
                p_obs_clipped = np.clip(p_observed, epsilon, 1 - epsilon)
                p_tgt_clipped = np.clip(p_target, epsilon, 1 - epsilon)

                # KL divergence for binary
                kl = p_tgt_clipped * np.log(p_tgt_clipped / p_obs_clipped) + (
                    1 - p_tgt_clipped
                ) * np.log((1 - p_tgt_clipped) / (1 - p_obs_clipped))
                tv = abs(p_observed - p_target)

                metric_key = f"{col_id}|{condition_key}"
                conditional_kl[metric_key] = float(kl)
                conditional_tv[metric_key] = float(tv)

        return conditional_kl, conditional_tv

    def get_current_probs(self, df: pd.DataFrame) -> np.ndarray:
        """Get current observed probabilities for all columns."""
        probs = []
        for col_cfg in self.config["columns"]:
            col_id = col_cfg["column_id"]
            true_val = col_cfg["values"]["true_value"]
            probs.append((df[col_id] == true_val).mean())
        return np.array(probs)

    def get_target_probs(self) -> np.ndarray:
        """Get target probabilities from config."""
        return np.array(
            [
                col["distribution"]["probabilities"]["true_prob"]
                for col in self.config["columns"]
            ]
        )

    def optimize_with_attention(
        self, df: pd.DataFrame, n_epochs: int = 100
    ) -> pd.DataFrame:
        """
        Optimize dataset using attention-based proposal model with backpropagation.

        Note: The attention model is a learned proposal generator, not a predictor.
        It learns how to propose probability corrections within the optimization loop.
        """
        if self.attention_model is None:
            return df

        df = df.copy()
        target_probs = self.get_target_probs()

        self.log("\n[ATTENTION PROPOSAL OPTIMIZATION]")

        for epoch in range(n_epochs):
            current_probs = self.get_current_probs(df)

            # Train attention proposal model
            adjusted_probs, loss = self.attention_model.train_step(
                current_probs, target_probs
            )

            # Compute adjustment factors
            adjustments = {}
            col_ids = [col["column_id"] for col in self.config["columns"]]
            for i, col_id in enumerate(col_ids):
                adjustments[col_id] = adjusted_probs[i] - current_probs[i]

            # Apply adjustments
            df = self.adjuster.apply_structured_adjustment(df, adjustments)

            # Enforce conditional dependencies
            df = self.adjuster._enforce_conditional_dependencies(df)

            if epoch % 20 == 0:
                metrics = self.evaluate(df)
                self.log(
                    f"  Epoch {epoch}: loss={loss:.6f}, KL={metrics.kl_divergence:.6f}, TV={metrics.total_variation:.6f}"
                )

            # Early stopping
            if loss < 1e-6:
                self.log(f"  Converged at epoch {epoch}")
                break

        return df

    def optimize(
        self,
        df: pd.DataFrame,
        max_iters: int = 50,
        tolerance: float = 1e-6,
        objective: Optional[OptimizationObjective] = None,
    ) -> pd.DataFrame:
        """
        Main optimization loop combining structured adjustment and attention.

        Uses a named composite objective (Rec #5) for explicit acceptance criteria.
        This prevents silent metric drift when the system is modified.

        Args:
            df: Input DataFrame to optimize
            max_iters: Maximum optimization iterations
            tolerance: Convergence tolerance for objective score
            objective: Custom optimization objective (uses default if None)

        Returns:
            Optimized DataFrame with distributions matching targets
        """
        # Rec #5: Use explicit named objective instead of implicit score = kl + tv
        if objective is None:
            objective = OptimizationObjective()

        self.log(f"\n[OBJECTIVE] {objective}")

        best_df = df.copy()
        best_metrics = self.evaluate(best_df)
        best_score = objective.compute(best_metrics)

        # Rec #10: Track for early exit on no-op
        prev_score = best_score
        no_improvement_count = 0

        self.log(
            f"\n[START] KL={best_metrics.kl_divergence:.6f}, "
            f"TV={best_metrics.total_variation:.6f}, Score={best_score:.6f}"
        )

        for it in range(1, max_iters + 1):
            # Phase 1: Structured adjustment
            candidate = self.adjuster.adjust_with_constraints(best_df, max_iterations=5)

            # Phase 2: Attention-based refinement (if enabled)
            if self.use_attention:
                candidate = self.optimize_with_attention(candidate, n_epochs=20)

            # Rec #13: Invariant checks after adjustment
            self._check_invariants(candidate)

            # Evaluate using composite objective
            metrics = self.evaluate(candidate)
            score = objective.compute(metrics)

            # Track history
            self.history["mse"].append(metrics.mse)
            self.history["kl"].append(metrics.kl_divergence)
            self.history["tv"].append(metrics.total_variation)
            self.history["composite"].append(score)

            self.log(f"\n--- ITERATION {it} ---")
            self.log(
                f"  KL={metrics.kl_divergence:.6f}, TV={metrics.total_variation:.6f}, "
                f"Score={score:.6f}"
            )

            # Log conditional metrics if any violations
            if metrics.conditional_tv:
                violations = {
                    k: v
                    for k, v in metrics.conditional_tv.items()
                    if v > objective.conditional_tolerance
                }
                if violations:
                    self.log(f"  Conditional violations: {violations}")

            if score < best_score:
                self.log("  [ACCEPTED]")
                best_df = candidate
                best_metrics = metrics
                best_score = score
                no_improvement_count = 0
            else:
                self.log("  [REJECTED]")
                no_improvement_count += 0

            # Rec #10: Early exit on no-op (no improvement for several iterations)
            if no_improvement_count >= 5:
                self.log("  [EARLY EXIT] No improvement for 5 iterations")
                break

            if best_score <= tolerance:
                self.log("  [CONVERGED]")
                break

        self.log(
            f"\n[FINAL] KL={best_metrics.kl_divergence:.6f}, "
            f"TV={best_metrics.total_variation:.6f}, Score={best_score:.6f}"
        )
        self._print_final_summary(best_df, best_metrics)

        return best_df

    def _check_invariants(self, df: pd.DataFrame) -> None:
        """
        Assert invariants after each iteration (Rec #13).

        Fail fast on silent corruption rather than propagating errors.
        """
        for col_cfg in self.config["columns"]:
            col_id = col_cfg["column_id"]
            true_val = col_cfg["values"]["true_value"]
            false_val = col_cfg["values"]["false_value"]

            # Check all values are valid binary
            valid_values = df[col_id].isin([true_val, false_val])
            if not bool(valid_values.all()):
                invalid_vals = df.loc[~valid_values, col_id].unique().tolist()
                raise ValueError(
                    f"Invariant violation: {col_id} contains invalid values. "
                    f"Expected {true_val} or {false_val}, got: {invalid_vals}"
                )

            # Check probability is in valid range (implicit, but verify no NaN)
            if bool(df[col_id].isna().any()):
                raise ValueError(f"Invariant violation: {col_id} contains NaN values")

    def _print_final_summary(self, df: pd.DataFrame, metrics: GeneratorMetrics):
        """Print final summary of generation results."""
        self.log("\n[FINAL DISTRIBUTION SUMMARY]")
        for col_cfg in self.config["columns"]:
            col_id = col_cfg["column_id"]
            true_val = col_cfg["values"]["true_value"]
            target = col_cfg["distribution"]["probabilities"]["true_prob"]
            observed = (df[col_id] == true_val).mean()
            error = metrics.per_column_errors[col_id]
            self.log(
                f"  {col_id}: target={target:.3f}, observed={observed:.3f}, error={error:.4f}"
            )

    def generate(self, n_rows: int) -> pd.DataFrame:
        """Full generation pipeline."""
        self.log(f"\n[GENERATING {n_rows} ROWS]")

        # Step 1: Initial generation
        df = self.generate_initial_dataset(n_rows)

        # Step 2: Apply conditional dependencies
        df = self.adjuster._enforce_conditional_dependencies(df)

        # Step 3: Optimize
        df = self.optimize(df)

        return df


# ============================================================================
# Top-Level API Functions (Rec #15)
# ============================================================================


def optimize_dataset(
    df: pd.DataFrame,
    config: dict,
    objective: Optional[OptimizationObjective] = None,
    max_iters: int = 50,
    tolerance: float = 1e-6,
    use_attention: bool = True,
    seed: int = 42,
    verbose: bool = True,
) -> Tuple[pd.DataFrame, GeneratorMetrics]:
    """
    Optimize an existing dataset to match target distributions (Rec #15).

    This is the recommended entry point for distribution repair/optimization.
    The system excels at repairing distributions, not just initial sampling.

    Args:
        df: Input DataFrame to optimize
        config: YAML configuration dictionary with target distributions
        objective: Custom optimization objective (uses default if None)
        max_iters: Maximum optimization iterations
        tolerance: Convergence tolerance for objective score
        use_attention: Whether to use attention-based proposal model
        seed: Random seed for reproducibility
        verbose: Whether to print progress messages

    Returns:
        Tuple of (optimized_df, final_metrics)

    Example:
        >>> config = load_config(CONFIG_YAML)
        >>> df_optimized, metrics = optimize_dataset(
        ...     df=my_dataset,
        ...     config=config,
        ...     objective=OptimizationObjective(weight_kl=0.5, weight_tv=0.5),
        ... )
        >>> print(f"Final KL: {metrics.kl_divergence:.6f}")
    """
    generator = ImprovedBinaryGenerator(
        config=config,
        seed=seed,
        use_attention=use_attention,
        verbose=verbose,
    )

    optimized_df = generator.optimize(
        df=df,
        max_iters=max_iters,
        tolerance=tolerance,
        objective=objective,
    )

    final_metrics = generator.evaluate(optimized_df)

    return optimized_df, final_metrics


# ============================================================================
# Utility Functions
# ============================================================================


def load_config(yaml_str: str) -> dict:
    """Load YAML config and print basic metadata."""
    config = yaml.safe_load(yaml_str)

    print("\n[CONFIG LOADED]")
    print(f"  Name: {config['metadata']['name']}")
    print(f"  Version: {config['metadata']['version']}")
    print(f"  Columns: {len(config['columns'])}")

    return config


def validate_distribution(df: pd.DataFrame, config: dict) -> Dict:
    """Validate that generated data matches config probabilities."""
    results = {}

    for col_cfg in config["columns"]:
        col_id = col_cfg["column_id"]
        true_val = col_cfg["values"]["true_value"]
        target = col_cfg["distribution"]["probabilities"]["true_prob"]
        observed = (df[col_id] == true_val).mean()

        results[col_id] = {
            "target": target,
            "observed": observed,
            "error": abs(target - observed),
            "passed": abs(target - observed) < 0.05,  # 5% tolerance
        }

    return results


# ============================================================================
# Demo Configuration (same as notebook)
# ============================================================================

CONFIG_YAML = """
      # Binary Configuration for Sector Retail Shop
      # Retail shop operational binary variables

      metadata:
        name: "sector_retail_shop_binary"
        version: "1.0.0"
        description: "Binary variables for retail shop operations"
        author: "Retail Analytics Team"
        created_date: "2025-01-19"
        modified_date: "2025-01-19"
        dataset_id: "sector_retail_shop"
        config_type: "binary"
        schema_version: "2.0"

      columns:
        # Loyalty membership flag
        - column_id: "has_loyalty_card"
          column_name: "HasLoyaltyCard"
          column_description: "Whether customer has a loyalty card membership"
          
          data_type:
            type: "binary"
            representation: "integer"
            
          values:
            true_value: 1
            false_value: 0
            labels:
              true_label: "Member"
              false_label: "Non-Member"
              
          distribution:
            type: "bernoulli"
            probabilities:
              true_prob: 0.35
              false_prob: 0.65
              
            mutation:
              enabled: false
              rate: 0.0
              
          generation:
            global: true
            missing_value_rate: 0.0
            missing_value: null
            
          validation:
            required: true
            unique: false
            balance_threshold: null
            
        # Discount applied flag
        - column_id: "discount_applied"
          column_name: "DiscountApplied"
          column_description: "Whether a discount was applied to the transaction"
          
          data_type:
            type: "binary"
            representation: "integer"
            
          values:
            true_value: 1
            false_value: 0
            labels:
              true_label: "Discounted"
              false_label: "Full Price"
              
          distribution:
            type: "conditional"
            
            probabilities:
              true_prob: 0.20
              false_prob: 0.80
              
            dependencies:
              depend_on: ["has_loyalty_card"]
              conditional_probs:
                "has_loyalty_card=1": { true_prob: 0.65, false_prob: 0.35 }
                "has_loyalty_card=0": { true_prob: 0.15, false_prob: 0.85 }
                
            mutation:
              enabled: false
              rate: 0.0
              
          generation:
            global: true
            missing_value_rate: 0.0
            missing_value: null
            
          validation:
            required: true
            unique: false
            balance_threshold: null
            
        # Online order flag
        - column_id: "is_online_order"
          column_name: "IsOnlineOrder"
          column_description: "Whether the order was placed online"
          
          data_type:
            type: "binary"
            representation: "integer"
            
          values:
            true_value: 1
            false_value: 0
            labels:
              true_label: "Online"
              false_label: "In-Store"
              
          distribution:
            type: "bernoulli"
            probabilities:
              true_prob: 0.42
              false_prob: 0.58
              
            mutation:
              enabled: false
              rate: 0.0
              
          generation:
            global: true
            missing_value_rate: 0.0
            missing_value: null
            
          validation:
            required: true
            unique: false
            balance_threshold: null
            
        # Return flag
        - column_id: "is_return"
          column_name: "IsReturn"
          column_description: "Whether the transaction is a return"
          
          data_type:
            type: "binary"
            representation: "integer"
            
          values:
            true_value: 1
            false_value: 0
            labels:
              true_label: "Return"
              false_label: "Purchase"
              
          distribution:
            type: "conditional"
            
            probabilities:
              true_prob: 0.08
              false_prob: 0.92
              
            dependencies:
              depend_on: ["is_online_order", "has_loyalty_card"]
              conditional_probs:
                "is_online_order=1, has_loyalty_card=1": { true_prob: 0.12, false_prob: 0.88 }
                "is_online_order=1, has_loyalty_card=0": { true_prob: 0.05, false_prob: 0.95 }
                "is_online_order=0, has_loyalty_card=1": { true_prob: 0.07, false_prob: 0.93 }
                "is_online_order=0, has_loyalty_card=0": { true_prob: 0.04, false_prob: 0.96 }
                
            mutation:
              enabled: false
              rate: 0.0
              
          generation:
            global: false
            missing_value_rate: 0.0
            missing_value: null
            
          validation:
            required: true
            unique: false
            balance_threshold: null

      template_config:
        validate_probabilities: true
        validate_logic: true
        validate_dependencies: true
        
        defaults:
          representation: "integer"
          true_value: 1
          false_value: 0
          missing_value: null
          mutation_rate: 0.01
          
        processing_hints:
          optimize_memory: true
          parallelize: true
          cache_logical: true
"""


# ============================================================================
# Main Entry Point
# ============================================================================

if __name__ == "__main__":
    # Load configuration
    config = load_config(CONFIG_YAML)

    # Create generator with all improvements
    generator = ImprovedBinaryGenerator(
        config=config, seed=42, use_attention=True, learning_rate=0.01, verbose=True
    )

    # Generate dataset
    ROWS = 1000
    final_df = generator.generate(ROWS)

    print("\n[FINAL DATASET SAMPLE]")
    print(final_df.head(10))

    print("\n[VALUE COUNTS PER COLUMN]")
    for col in final_df.columns:
        print(f"\n{col}:")
        print(final_df[col].value_counts())

    # Validate results
    print("\n[VALIDATION RESULTS]")
    validation = validate_distribution(final_df, config)
    for col_id, result in validation.items():
        status = "PASS" if result["passed"] else "FAIL"
        print(
            f"  {col_id}: target={result['target']:.3f}, observed={result['observed']:.3f}, [{status}]"
        )

    # Show metrics comparison
    print("\n[METRICS COMPARISON]")
    metrics = generator.evaluate(final_df)
    print(f"  MSE: {metrics.mse:.6f}")
    print(f"  KL Divergence: {metrics.kl_divergence:.6f}")
    print(f"  Total Variation: {metrics.total_variation:.6f}")
    print(f"  Max Deviation: {metrics.max_deviation:.6f}")



[CONFIG LOADED]
  Name: sector_retail_shop_binary
  Version: 1.0.0
  Columns: 4

[GENERATING 1000 ROWS]
[INIT] has_loyalty_card: target_prob=0.350, n_true=350
[INIT] discount_applied: target_prob=0.200, n_true=200
[INIT] is_online_order: target_prob=0.420, n_true=420
[INIT] is_return: target_prob=0.080, n_true=80

[OBJECTIVE] OptimizationObjective(kl=0.4, tv=0.4, max_dev=0.2, cond_tol=0.05)

[START] KL=0.010655, TV=0.036500, Score=0.044062

[ATTENTION PROPOSAL OPTIMIZATION]
  Epoch 0: loss=0.018759, KL=0.011089, TV=0.041500

--- ITERATION 1 ---
  KL=0.186301, TV=0.254500, Score=0.247520
  [REJECTED]

[ATTENTION PROPOSAL OPTIMIZATION]
  Epoch 0: loss=0.011107, KL=0.013743, TV=0.055250

--- ITERATION 2 ---
  KL=0.014969, TV=0.058250, Score=0.058688
  [REJECTED]

[ATTENTION PROPOSAL OPTIMIZATION]
  Epoch 0: loss=0.002741, KL=0.010657, TV=0.037000

--- ITERATION 3 ---
  KL=0.011585, TV=0.046750, Score=0.048534
  [REJECTED]

[ATTENTION PROPOSAL OPTIMIZATION]
  Epoch 0: loss=0.002741, KL=0.

In [None]:
try:
except:

In [1]:
class absClass:
    def __init__(self):
        self

def hello_world():
    print("Hello World")


x = 19
dict = {1: 1, 2: 2}
string = "Hello_World"
flo = 1.23456
arr = [1,2,3,4,5]

import pandas as pd
df = pd.DataFrame()

import numpy as np

vec1 = np.array([1, 2, 3, 4])
vec2 = np.array([[1, 2, 3, 4],
                [1, 1, 1, 1]])

log = True

chto = (1,2,3,4,5)

print(type(x))
print(type(dict))
print(type(string))
print(type(flo))
print(type(absClass))
print(type(hello_world()))
print(type(df))
print(type(arr))
print(type(vec1))
print(type(vec2))
print(type(log))
print(type(chto))

<class 'int'>
<class 'dict'>
<class 'str'>
<class 'float'>
<class 'type'>
Hello World
<class 'NoneType'>
<class 'pandas.core.frame.DataFrame'>
<class 'list'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'bool'>
<class 'tuple'>


In [None]:
with PDF_Reader

In [17]:
words = ['abs', 'cdbdsa', 'qqqqqqq', '123123123']

for word in words:
    print(word, len(word))

abs 3
cdbdsa 6
qqqqqqq 7
123123123 9


In [22]:
card_data = {'****_****_1234_4321': "active", '****_****_6712_0920': "inactive", '****_****_1221_0099': "active"}

for card, status in card_data.copy().items():
    if status == "inactive":
        print(card + " Your card is inactive please top-up your balance")
    else:
        print(card + " Your card is active")

****_****_1234_4321 Your card is active
****_****_6712_0920 Your card is inactive please top-up your balance
****_****_1221_0099 Your card is active
