In [None]:
from yellowbrick import target
import inspect

In [None]:
# 1) Get all items from yellowbrick.target
all_items = [name for name in dir(target) if not name.startswith('_')]
print("All items in yellowbrick.target module:")
print(all_items)

In [None]:
# Categorize items by type
functions = []
classes = []
submodules = []

for name in all_items:
    obj = getattr(target, name)
    if inspect.isfunction(obj):
        functions.append(name)
    elif inspect.isclass(obj):
        classes.append(name)
    elif inspect.ismodule(obj):
        submodules.append(name)

print(f"Functions (quick methods): {len(functions)}")
print(functions)
print(f"\nClasses (visualizers): {len(classes)}")
print(classes)
print(f"\nSubmodules: {len(submodules)}")
print(submodules)

In [None]:
# =============================================================================
# YELLOWBRICK TARGET MODULE OVERVIEW
# =============================================================================
#
# yellowbrick.target provides visualizers for target variable analysis:
# - Understand target distribution and class balance
# - Detect class imbalance issues before training
# - Analyze feature-target correlations
# - Guide binning decisions for regression-to-classification conversion
#
# All visualizers implement the scikit-learn API:
# - fit(y) or fit(X, y) - Analyze the target
# - show() - Display the visualization
#
# =============================================================================
# AVAILABLE VISUALIZERS (3 total):
# =============================================================================
#
# CLASSIFICATION VISUALIZERS:
# 1. ClassBalance - Class distribution bar chart (single or train/test compare)
# 2. FeatureCorrelation - Feature-target correlation (pearson, mutual info)
#
# REGRESSION/BINNING VISUALIZERS:
# 3. BalancedBinningReference - Histogram with optimal bin boundaries
#
# =============================================================================
print("Yellowbrick Target Module - 3 Visualizers Available")
print("  - 2 Classification-focused (ClassBalance, FeatureCorrelation)")
print("  - 1 Regression/Binning (BalancedBinningReference)")

In [None]:
# =============================================================================
# 1. CLASS BALANCE VISUALIZER
# =============================================================================
#
# Purpose: Visual inspection of target class distribution to understand
# class imbalance before training. Critical for fraud detection!
#
# Modes:
# - Balance Mode: fit(y) - Shows distribution of single dataset
# - Compare Mode: fit(y_train, y_test) - Compares train vs test distribution
#
# Use Case: Detect class imbalance that may require:
# - Resampling (SMOTE, undersampling)
# - Class weights in model
# - Different evaluation metrics (PR-AUC vs ROC-AUC)
#
# Best For:
# - Fraud Detection: Visualize fraud vs non-fraud ratio (~1-5% fraud)
# - Stratification Check: Ensure train/test have similar class ratios
# - Imbalance Quantification: See exact class support
#
# =============================================================================

class_balance_class = target.ClassBalance

class_balance_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "labels": None,                # Class names for x-axis (default: None, auto-detect)
                                   # Must be ordered lexicographically matching target values
    
    # Color Configuration
    "colors": None,                # List of colors for bars (overrides colormap)
    "colormap": None,              # matplotlib colormap for class colors
}

# Quick method signature
# class_balance(y, ax=None, labels=None, colors=None, colormap=None, show=True, **kwargs)
#
# Compare mode:
# viz = ClassBalance()
# viz.fit(y_train, y_test)  # Pass both for comparison
# viz.show()

print("ClassBalance kwargs:")
for key, value in class_balance_kwargs.items():
    print(f"  {key}: {value}")

In [None]:
# =============================================================================
# 2. FEATURE CORRELATION VISUALIZER
# =============================================================================
#
# Purpose: Plot correlation between features and the target variable.
# Helps identify which features have predictive power.
#
# Correlation Methods:
# - 'pearson': Pearson correlation coefficient (linear relationships)
#              Best for: continuous features, linear relationships
# - 'mutual_info-regression': Mutual information for regression targets
#              Best for: non-linear relationships, continuous target
# - 'mutual_info-classification': Mutual information for classification
#              Best for: non-linear relationships, discrete target (FRAUD!)
#
# Use Case: Feature selection, understanding feature importance before training.
#
# Best For:
# - Fraud Detection: Identify features most correlated with fraud
# - Feature Selection: Remove low-correlation features
# - Feature Engineering: Understand which features to combine/transform
#
# =============================================================================

feature_correlation_class = target.FeatureCorrelation

feature_correlation_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "method": "pearson",           # 'pearson', 'mutual_info-regression', 'mutual_info-classification'
    "labels": None,                # Feature names (auto-populated from DataFrame)
    "sort": False,                 # Sort features by correlation value (ascending)
    
    # Feature Selection
    "feature_index": None,         # List of feature indices to include (default: all)
    "feature_names": None,         # List of feature names to include (ignored if feature_index set)
    
    # Appearance
    "color": None,                 # Bar color specification
}

# Quick method signature
# feature_correlation(X, y, ax=None, method='pearson', labels=None, sort=False,
#                     feature_index=None, feature_names=None, color=None,
#                     show=True, **kwargs)
#
# IMPORTANT: For mutual information methods, specify discrete_features in fit():
# viz = FeatureCorrelation(method='mutual_info-classification')
# viz.fit(X, y, discrete_features=[0, 1, 5])  # indices of discrete features

print("FeatureCorrelation kwargs:")
for key, value in feature_correlation_kwargs.items():
    print(f"  {key}: {value}")

In [None]:
# =============================================================================
# 3. BALANCED BINNING REFERENCE VISUALIZER
# =============================================================================
#
# Purpose: Generate histogram with vertical lines showing recommended
# bin boundaries to create evenly distributed bins.
#
# Use Case: Convert continuous regression target to classification bins.
# Useful when you want to discretize a continuous target.
#
# Note: Less relevant for fraud detection (already binary classification)
# but useful for regression problems like ETA prediction.
#
# Best For:
# - Regression to Classification: Binning continuous targets
# - Target Discretization: Creating balanced ordinal categories
# - Distribution Analysis: Understanding target distribution
#
# =============================================================================

balanced_binning_class = target.BalancedBinningReference

balanced_binning_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "target": "y",                 # Name of the target variable for labeling
    "bins": 4,                     # Number of bins to generate (default: 4)
}

# Quick method signature
# balanced_binning_reference(y, ax=None, target='y', bins=4, show=True, **kwargs)

print("BalancedBinningReference kwargs:")
for key, value in balanced_binning_kwargs.items():
    print(f"  {key}: {value}")

In [None]:
# =============================================================================
# RECOMMENDED CONFIGURATION FOR TRANSACTION FRAUD DETECTION (TFD)
# =============================================================================
#
# TFD Characteristics:
# - Binary classification (fraud=1, non-fraud=0)
# - Highly imbalanced (~1-5% fraud rate)
# - Many numerical features (transaction amounts, velocities)
# - Categorical features encoded as numerical
#
# Target Analysis Goals:
# 1. Quantify class imbalance (fraud vs non-fraud ratio)
# 2. Verify stratification in train/test split
# 3. Identify features most correlated with fraud
# 4. Guide sampling/weighting decisions
#
# =============================================================================

# Binary classes for TFD
tfd_classes = [0, 1]  # or ["Non-Fraud", "Fraud"]
tfd_labels = ["Non-Fraud", "Fraud"]  # Human-readable labels

# -----------------------------------------------------------------------------
# PRIMARY TARGET VISUALIZERS - Essential for fraud detection
# -----------------------------------------------------------------------------

primary_target_visualizers = {
    # ClassBalance: CRITICAL for fraud detection!
    # Shows the fraud vs non-fraud ratio
    "ClassBalance": {
        "labels": tfd_labels,       # Human-readable class names
        "colors": ["#2ecc71", "#e74c3c"],  # Green for non-fraud, Red for fraud
    },
    
    # FeatureCorrelation with mutual_info-classification
    # Best for fraud detection (captures non-linear relationships)
    "FeatureCorrelation": {
        "method": "mutual_info-classification",  # Best for binary classification
        "sort": True,               # Sort by correlation (most important first)
        "color": "#3498db",         # Blue bars
    },
}

print("Primary Target Visualizers for TFD:")
for name, kwargs in primary_target_visualizers.items():
    print(f"\n  {name}:")
    for key, value in kwargs.items():
        print(f"    {key}: {value}")

In [None]:
# -----------------------------------------------------------------------------
# SECONDARY TARGET VISUALIZERS - Additional analysis options
# -----------------------------------------------------------------------------

secondary_target_visualizers = {
    # FeatureCorrelation with Pearson (faster, linear only)
    # Use as quick check before mutual information
    "FeatureCorrelation_Pearson": {
        "method": "pearson",        # Linear correlation
        "sort": True,               # Sort by correlation
        "color": "#9b59b6",         # Purple bars
    },
    
    # BalancedBinningReference - Not typically used for TFD
    # But useful for ETA (regression) to classify into time buckets
    "BalancedBinningReference": {
        "target": "is_fraud",       # Target name for labeling
        "bins": 2,                  # Binary classification (already 2 classes)
    },
}

print("Secondary Target Visualizers for TFD:")
for name, kwargs in secondary_target_visualizers.items():
    print(f"\n  {name}:")
    for key, value in kwargs.items():
        print(f"    {key}: {value}")

In [None]:
# =============================================================================
# CONSOLIDATED CONFIGURATION FOR NOTEBOOK 010 INTEGRATION
# =============================================================================

def yellowbrick_target_kwargs(
    project_name,
    metric_name,
    labels=None,
    features_list=None,
    verbose=True
):
    """
    Generate kwargs for a specific yellowbrick.target visualizer.

    Parameters:
    -----------
    project_name : str
        Project identifier (e.g., 'Transaction Fraud Detection')
    metric_name : str
        Visualizer name: 'ClassBalance', 'FeatureCorrelation', 'BalancedBinningReference'
    labels : list, optional
        Class labels (default: None, auto-detected)
    features_list : list, optional
        List of feature names (default: None)
    verbose : bool
        Print configuration details (default: True)

    Returns:
    --------
    dict : kwargs for the specified visualizer
    """
    # Default labels for classification
    if labels is None:
        labels = ["Non-Fraud", "Fraud"] if "Fraud" in project_name else None
    
    # Visualizer-specific configurations
    configs = {
        "ClassBalance": {
            "labels": labels,
            "colors": ["#2ecc71", "#e74c3c"],  # Green/Red for binary classification
        },
        "FeatureCorrelation": {
            "method": "mutual_info-classification",  # Best for classification
            "labels": features_list,
            "sort": True,
            "color": "#3498db",
        },
        "FeatureCorrelation_Pearson": {
            "method": "pearson",
            "labels": features_list,
            "sort": True,
            "color": "#9b59b6",
        },
        "BalancedBinningReference": {
            "target": "y",
            "bins": 4,
        },
    }
    
    kwargs = configs.get(metric_name, {})
    
    if verbose and kwargs:
        print(f"\n{metric_name} kwargs for {project_name}:")
        for key, value in kwargs.items():
            print(f"  {key}: {value}")
    
    return kwargs


def yellowbrick_target_visualizers(
    yb_target_kwargs,
    X,
    y,
    metric_name=None
):
    """
    Create and fit a yellowbrick.target visualizer.

    Parameters:
    -----------
    yb_target_kwargs : dict
        Output from yellowbrick_target_kwargs()
    X : pd.DataFrame or np.ndarray
        Feature matrix (only needed for FeatureCorrelation)
    y : pd.Series or np.ndarray
        Target vector
    metric_name : str, optional
        Visualizer name to override key detection

    Returns:
    --------
    Visualizer object (fitted)
    """
    from yellowbrick.target import (
        ClassBalance,
        FeatureCorrelation,
        BalancedBinningReference
    )
    
    # Map metric names to classes
    visualizer_map = {
        "ClassBalance": ClassBalance,
        "FeatureCorrelation": FeatureCorrelation,
        "FeatureCorrelation_Pearson": FeatureCorrelation,
        "BalancedBinningReference": BalancedBinningReference,
    }
    
    if metric_name is None:
        # Try to infer from kwargs
        if "method" in yb_target_kwargs:
            metric_name = "FeatureCorrelation"
        elif "bins" in yb_target_kwargs:
            metric_name = "BalancedBinningReference"
        else:
            metric_name = "ClassBalance"
    
    visualizer_class = visualizer_map.get(metric_name)
    if visualizer_class is None:
        raise ValueError(f"Unknown visualizer: {metric_name}")
    
    # Create visualizer
    visualizer = visualizer_class(**yb_target_kwargs)
    
    # Fit based on visualizer type
    if metric_name in ["FeatureCorrelation", "FeatureCorrelation_Pearson"]:
        visualizer.fit(X, y)
    elif metric_name == "BalancedBinningReference":
        visualizer.fit(y)
    else:  # ClassBalance
        visualizer.fit(y)
    
    return visualizer


print("Functions defined for notebook 010 integration:")
print("  - yellowbrick_target_kwargs(project_name, metric_name, labels, features_list, verbose)")
print("  - yellowbrick_target_visualizers(kwargs, X, y, metric_name)")
print()
print("ALL 3 TARGET VISUALIZERS SUPPORTED:")
print("  Classification: ClassBalance, FeatureCorrelation")
print("  Binning: BalancedBinningReference")

In [None]:
# =============================================================================
# KEY INSIGHTS FOR TFD TARGET ANALYSIS
# =============================================================================
#
# 1. VISUALIZER SELECTION STRATEGY:
#    - ALWAYS start with ClassBalance for fraud detection
#    - Use FeatureCorrelation (mutual_info) to rank features by importance
#    - Use FeatureCorrelation (pearson) for quick linear correlation check
#    - BalancedBinningReference is NOT useful for binary classification
#
# 2. PERFORMANCE CONSIDERATIONS:
#    | Visualizer              | Speed    | Requires X |
#    |-------------------------|----------|------------|
#    | ClassBalance            | Fast     | No         |
#    | FeatureCorrelation      | Moderate | YES        |
#    | BalancedBinningReference| Fast     | No         |
#
# 3. FRAUD DETECTION SPECIFIC:
#    - ClassBalance shows imbalance ratio (expect 1-5% fraud)
#    - Use compare mode to verify train/test stratification
#    - FeatureCorrelation helps identify fraud indicators
#    - mutual_info-classification captures non-linear fraud patterns
#
# 4. INTEGRATION WITH SKLEARN WORKFLOW:
#    - Run ClassBalance FIRST to understand imbalance
#    - Run FeatureCorrelation to guide feature selection
#    - Use insights to choose:
#      * Sampling strategy (SMOTE, undersampling)
#      * Class weights in model
#      * Evaluation metrics (PR-AUC for imbalanced data)
#
# 5. USAGE EXAMPLES:
#
#    # Class balance analysis:
#    kwargs = yellowbrick_target_kwargs("TFD", "ClassBalance")
#    viz = yellowbrick_target_visualizers(kwargs, X, y, "ClassBalance")
#    viz.show()
#
#    # Feature correlation with mutual info:
#    kwargs = yellowbrick_target_kwargs("TFD", "FeatureCorrelation", features_list=X.columns.tolist())
#    viz = yellowbrick_target_visualizers(kwargs, X, y, "FeatureCorrelation")
#    viz.show()
#
#    # Compare train/test class balance:
#    from yellowbrick.target import ClassBalance
#    viz = ClassBalance(labels=["Non-Fraud", "Fraud"])
#    viz.fit(y_train, y_test)  # Compare mode
#    viz.show()
#
# =============================================================================
print("Key insights documented above.")
print()
print("YELLOWBRICK TARGET SUMMARY:")
print("  - Total Visualizers: 3")
print()
print("  FOR FRAUD DETECTION (Classification):")
print("    Primary: ClassBalance (imbalance detection)")
print("    Primary: FeatureCorrelation (mutual_info-classification)")
print()
print("  FOR REGRESSION (ETA, etc.):")
print("    FeatureCorrelation (pearson or mutual_info-regression)")
print("    BalancedBinningReference (target discretization)")

In [None]:
# =============================================================================
# COMPLETE PARAMETER REFERENCE TABLE
# =============================================================================

print("+" + "=" * 80 + "+")
print("|" + " YELLOWBRICK TARGET VISUALIZERS - COMPLETE PARAMETER REFERENCE ".center(80) + "|")
print("+" + "=" * 80 + "+")
print()

# ClassBalance
print("1. ClassBalance")
print("-" * 40)
print("  Purpose: Visualize class distribution and detect imbalance")
print("  Fit: fit(y) for single dataset, fit(y_train, y_test) for compare mode")
print("  Parameters:")
print("    ax          : matplotlib.axes.Axes  - Axes to plot on (default: None)")
print("    labels      : list                  - Class names for x-axis (default: None)")
print("    colors      : list                  - Bar colors per class (default: None)")
print("    colormap    : str/cmap              - Colormap for classes (default: None)")
print()

# FeatureCorrelation
print("2. FeatureCorrelation")
print("-" * 40)
print("  Purpose: Show correlation between features and target")
print("  Fit: fit(X, y) or fit(X, y, discrete_features=[...])")
print("  Parameters:")
print("    ax            : matplotlib.axes.Axes  - Axes to plot on (default: None)")
print("    method        : str                   - 'pearson', 'mutual_info-regression',")
print("                                            'mutual_info-classification' (default: 'pearson')")
print("    labels        : list                  - Feature names (default: None, auto)")
print("    sort          : bool                  - Sort by correlation (default: False)")
print("    feature_index : list                  - Feature indices to include (default: None)")
print("    feature_names : list                  - Feature names to include (default: None)")
print("    color         : str                   - Bar color (default: None)")
print()

# BalancedBinningReference
print("3. BalancedBinningReference")
print("-" * 40)
print("  Purpose: Show optimal bin boundaries for target discretization")
print("  Fit: fit(y)")
print("  Parameters:")
print("    ax     : matplotlib.axes.Axes  - Axes to plot on (default: None)")
print("    target : str                   - Target variable name (default: 'y')")
print("    bins   : int                   - Number of bins (default: 4)")
print()
print("+" + "=" * 80 + "+")