In [None]:
from yellowbrick import classifier
import inspect

In [None]:
# 1) Get all items from yellowbrick.classifier
all_items = [name for name in dir(classifier) if not name.startswith('_')]
print("All items in yellowbrick.classifier module:")
print(all_items)

In [None]:
# Categorize items by type
functions = []
classes = []
submodules = []

for name in all_items:
    obj = getattr(classifier, name)
    if inspect.isfunction(obj):
        functions.append(name)
    elif inspect.isclass(obj):
        classes.append(name)
    elif inspect.ismodule(obj):
        submodules.append(name)

print(f"Functions (quick methods): {len(functions)}")
print(functions)
print(f"\nClasses (visualizers): {len(classes)}")
print(classes)
print(f"\nSubmodules: {len(submodules)}")
print(submodules)

In [None]:
# =============================================================================
# YELLOWBRICK CLASSIFIER MODULE OVERVIEW
# =============================================================================
#
# yellowbrick.classifier provides visualizers for classification model evaluation:
# - Confusion matrices and classification reports
# - ROC curves and AUC metrics
# - Precision-Recall curves
# - Class prediction errors
# - Threshold analysis for binary classifiers
#
# IMPORTANT: CatBoost Compatibility
# ---------------------------------
# YellowBrick visualizers expect sklearn-compatible estimators.
# CatBoost is NOT fully sklearn-compatible but has similar interface.
#
# Solutions:
# 1. Use `force_model=True` to bypass sklearn validation
# 2. Use `is_fitted=True` to skip fit checks
# 3. For some visualizers, use pre-computed predictions directly
#
# =============================================================================
# AVAILABLE VISUALIZERS (6 total):
# =============================================================================
#
# METRICS VISUALIZERS (use predictions):
# 1. ClassificationReport - Precision, Recall, F1 heatmap per class
# 2. ConfusionMatrix - Confusion matrix heatmap
# 3. ClassPredictionError - Bar chart showing prediction errors per class
#
# PROBABILITY-BASED VISUALIZERS (use predict_proba):
# 4. ROCAUC - ROC curve and AUC score
# 5. PrecisionRecallCurve - PR curve with optional ISO F1 curves
#
# THRESHOLD VISUALIZERS (binary only):
# 6. DiscriminationThreshold - Metrics across decision thresholds
#
# =============================================================================
print("Yellowbrick Classifier Module - 6 Visualizers Available")
print("  - 3 Metrics-based (ClassificationReport, ConfusionMatrix, ClassPredictionError)")
print("  - 2 Probability-based (ROCAUC, PrecisionRecallCurve)")
print("  - 1 Threshold-based (DiscriminationThreshold)")
print()
print("CatBoost Compatibility: Use force_model=True and is_fitted=True")

In [None]:
# =============================================================================
# CATBOOST COMPATIBILITY ANALYSIS
# =============================================================================
#
# YellowBrick uses sklearn's check_is_fitted() and is_classifier() functions
# which may fail with CatBoost. Here's how to handle each visualizer:
#
# | Visualizer              | CatBoost Compatible | Solution                    |
# |-------------------------|---------------------|-----------------------------|
# | ClassificationReport    | Yes (with workaround) | force_model=True, is_fitted=True |
# | ConfusionMatrix         | Yes (with workaround) | force_model=True, is_fitted=True |
# | ClassPredictionError    | Yes (with workaround) | force_model=True, is_fitted=True |
# | ROCAUC                  | Yes (with workaround) | force_model=True, is_fitted=True |
# | PrecisionRecallCurve    | Yes (with workaround) | force_model=True, is_fitted=True |
# | DiscriminationThreshold | PROBLEMATIC          | Requires multiple fit calls     |
#
# Key CatBoost methods that YellowBrick uses:
# - predict(X) -> Returns class labels (0 or 1)
# - predict_proba(X) -> Returns probability matrix [[p_0, p_1], ...]
# - classes_ -> Class labels (CatBoost has this after fit)
#
# =============================================================================
print("CatBoost Compatibility Summary:")
print("  - Most visualizers work with force_model=True")
print("  - DiscriminationThreshold may have issues (requires CV)")
print("  - Always use is_fitted=True with pre-trained CatBoost models")

In [None]:
# =============================================================================
# 1. CLASSIFICATION REPORT VISUALIZER
# =============================================================================
#
# Purpose: Displays precision, recall, F1-score, and support for each class
# as a color-coded heatmap. Essential for understanding per-class performance.
#
# Use Case: Compare model performance across different classes,
# identify classes where model struggles (low recall/precision).
#
# Best For:
# - Fraud Detection: See precision/recall for fraud vs non-fraud separately
# - Multi-class: Compare performance across all classes
# - Imbalanced Data: Support column shows class distribution
#
# CatBoost Compatibility: YES with force_model=True, is_fitted=True
#
# =============================================================================

classification_report_class = classifier.ClassificationReport

classification_report_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "classes": None,               # Class labels ordered by sorted class index
    "cmap": "YlOrRd",              # Colormap: 'YlOrRd', 'Blues', 'RdBu_r', etc.
    "support": "count",            # Support display: True, False, None, 'percent', 'count'
    "encoder": None,               # LabelEncoder or dict for class name mapping
    "colorbar": True,              # Show colorbar legend (default: True)
    "fontsize": None,              # Font size for labels (default: None = auto)
    
    # CatBoost Compatibility
    "is_fitted": True,             # REQUIRED for CatBoost: skip fit check
    "force_model": True,           # REQUIRED for CatBoost: bypass sklearn validation
}

# Quick method signature
# classification_report(estimator, X_train, y_train, X_test=None, y_test=None,
#                       ax=None, classes=None, cmap='YlOrRd', support=None,
#                       encoder=None, is_fitted='auto', force_model=False,
#                       colorbar=True, fontsize=None, show=True, **kwargs)

print("ClassificationReport kwargs:")
for key, value in classification_report_kwargs.items():
    print(f"  {key}: {value}")

In [None]:
# =============================================================================
# 2. CONFUSION MATRIX VISUALIZER
# =============================================================================
#
# Purpose: Visualizes the confusion matrix as a heatmap showing
# true vs predicted class distributions.
#
# Use Case: Understand where model confuses classes,
# see TP, TN, FP, FN distributions.
#
# Best For:
# - Fraud Detection: See false positives vs false negatives
# - Error Analysis: Identify which classes are confused
# - Model Comparison: Compare confusion patterns between models
#
# CatBoost Compatibility: YES with force_model=True, is_fitted=True
#
# =============================================================================

confusion_matrix_class = classifier.ConfusionMatrix

confusion_matrix_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "classes": None,               # Class labels for display
    "cmap": "YlOrRd",              # Colormap for heatmap
    "percent": False,              # Show percentages instead of counts
    "fontsize": None,              # Font size for cell values
    "encoder": None,               # LabelEncoder or dict for class names
    "sample_weight": None,         # Sample weights for confusion_matrix
    
    # CatBoost Compatibility
    "is_fitted": True,             # REQUIRED for CatBoost
    "force_model": True,           # REQUIRED for CatBoost
}

# Quick method signature
# confusion_matrix(estimator, X_train, y_train, X_test=None, y_test=None,
#                  ax=None, sample_weight=None, percent=False, classes=None,
#                  encoder=None, cmap='YlOrRd', fontsize=None, is_fitted='auto',
#                  force_model=False, show=True, **kwargs)

print("ConfusionMatrix kwargs:")
for key, value in confusion_matrix_kwargs.items():
    print(f"  {key}: {value}")

In [None]:
# =============================================================================
# 3. ROCAUC VISUALIZER
# =============================================================================
#
# Purpose: Plots Receiver Operating Characteristic curve and calculates
# Area Under the Curve (AUC) score.
#
# Use Case: Evaluate classifier's ability to discriminate between classes,
# compare models using AUC metric.
#
# Best For:
# - Fraud Detection: See trade-off between TPR and FPR
# - Model Selection: AUC is robust to class imbalance
# - Threshold Selection: Identify optimal operating point
#
# CatBoost Compatibility: YES with force_model=True, is_fitted=True
# Requires: predict_proba() method (CatBoost has this)
#
# =============================================================================

rocauc_class = classifier.ROCAUC

rocauc_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "classes": None,               # Class labels for legend
    "encoder": None,               # LabelEncoder or dict for class names
    
    # Curve Options (for multi-class)
    "micro": True,                 # Show micro-average ROC (aggregated TP/FP)
    "macro": True,                 # Show macro-average ROC (average across classes)
    "per_class": True,             # Show individual class ROC curves
    
    # Binary Classification Shortcut
    "binary": False,               # If True, sets micro=macro=per_class=False
    
    # CatBoost Compatibility
    "is_fitted": True,             # REQUIRED for CatBoost
    "force_model": True,           # REQUIRED for CatBoost
}

# Quick method signature
# roc_auc(estimator, X_train, y_train, X_test=None, y_test=None,
#         ax=None, micro=True, macro=True, per_class=True, binary=False,
#         classes=None, encoder=None, is_fitted='auto', force_model=False,
#         show=True, **kwargs)

print("ROCAUC kwargs:")
for key, value in rocauc_kwargs.items():
    print(f"  {key}: {value}")

In [None]:
# =============================================================================
# 4. PRECISION-RECALL CURVE VISUALIZER
# =============================================================================
#
# Purpose: Plots precision vs recall trade-off at different thresholds.
# Better than ROC for imbalanced datasets!
#
# Use Case: Evaluate model on imbalanced data where precision matters,
# understand threshold effects on precision/recall trade-off.
#
# Best For:
# - Fraud Detection: CRITICAL - fraud is rare, PR curve is more informative
# - Imbalanced Data: PR curve doesn't inflate with majority class
# - Threshold Tuning: Find optimal precision/recall balance
#
# CatBoost Compatibility: YES with force_model=True, is_fitted=True
# Requires: predict_proba() method (CatBoost has this)
#
# =============================================================================

precision_recall_class = classifier.PrecisionRecallCurve

precision_recall_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "classes": None,               # Class labels for legend
    "encoder": None,               # LabelEncoder or dict for class names
    
    # Curve Options
    "fill_area": True,             # Fill area under curve
    "fill_opacity": 0.2,           # Fill transparency (0-1)
    "line_opacity": 0.8,           # Line transparency (0-1)
    "ap_score": True,              # Show Average Precision score
    
    # Multi-class Options
    "micro": True,                 # Micro-average PR curve
    "per_class": False,            # Individual class curves
    
    # ISO F1 Curves (optional)
    "iso_f1_curves": False,        # Show ISO F1 score curves
    "iso_f1_values": (0.2, 0.4, 0.6, 0.8),  # F1 values for ISO curves
    
    # Color Options
    "colors": None,                # Custom colors for per_class curves
    "cmap": None,                  # Colormap for per_class curves
    
    # CatBoost Compatibility
    "is_fitted": True,             # REQUIRED for CatBoost
    "force_model": True,           # REQUIRED for CatBoost
}

# Quick method signature
# precision_recall_curve(estimator, X_train, y_train, X_test=None, y_test=None,
#                        ax=None, classes=None, colors=None, cmap=None,
#                        encoder=None, fill_area=True, ap_score=True,
#                        micro=True, iso_f1_curves=False, iso_f1_values=(0.2,0.4,0.6,0.8),
#                        per_class=False, fill_opacity=0.2, line_opacity=0.8,
#                        is_fitted='auto', force_model=False, show=True, **kwargs)

print("PrecisionRecallCurve kwargs:")
for key, value in precision_recall_kwargs.items():
    print(f"  {key}: {value}")

In [None]:
# =============================================================================
# 5. CLASS PREDICTION ERROR VISUALIZER
# =============================================================================
#
# Purpose: Bar chart showing both support and prediction errors per class.
# Alternative to confusion matrix that's easier to read.
#
# Use Case: Quickly see which classes have most errors,
# understand class-level prediction patterns.
#
# Best For:
# - Multi-class: See error distribution across classes
# - Imbalanced Data: Visualize how minority class is predicted
# - Quick Analysis: Faster to interpret than confusion matrix
#
# CatBoost Compatibility: YES with force_model=True, is_fitted=True
#
# =============================================================================

class_prediction_error_class = classifier.ClassPredictionError

class_prediction_error_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "classes": None,               # Class labels for legend
    "encoder": None,               # LabelEncoder or dict for class names
    
    # CatBoost Compatibility
    "is_fitted": True,             # REQUIRED for CatBoost
    "force_model": True,           # REQUIRED for CatBoost
}

# Quick method signature
# class_prediction_error(estimator, X_train, y_train, X_test=None, y_test=None,
#                        ax=None, classes=None, encoder=None,
#                        is_fitted='auto', force_model=False, show=True, **kwargs)

print("ClassPredictionError kwargs:")
for key, value in class_prediction_error_kwargs.items():
    print(f"  {key}: {value}")

In [None]:
# =============================================================================
# 6. DISCRIMINATION THRESHOLD VISUALIZER
# =============================================================================
#
# Purpose: Shows precision, recall, F1, and queue rate across all
# decision thresholds. Helps find optimal threshold.
#
# Use Case: Tune decision threshold for binary classifier,
# understand trade-offs at different operating points.
#
# Best For:
# - Fraud Detection: Find threshold balancing false positives/negatives
# - Cost-Sensitive: When FP and FN have different costs
# - Threshold Optimization: Data-driven threshold selection
#
# CatBoost Compatibility: PROBLEMATIC
# - Requires multiple fit calls with cross-validation
# - May not work reliably with CatBoost
# - Alternative: Use sklearn's precision_recall_curve manually
#
# =============================================================================

discrimination_threshold_class = classifier.DiscriminationThreshold

discrimination_threshold_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    
    # Cross-validation Settings
    "n_trials": 50,                # Number of shuffle/split trials
    "cv": 0.1,                     # Test split fraction or CV generator
    
    # Metric Options
    "fbeta": 1.0,                  # Beta for F-beta score (1.0 = F1)
    "argmax": "fscore",            # Metric to highlight: 'precision', 'recall', 'queue_rate', 'fscore'
    "exclude": None,               # Metrics to exclude from plot
    
    # Visualization Options
    "quantiles": (0.1, 0.5, 0.9),  # Quantiles for uncertainty bands
    "random_state": 42,            # Reproducibility seed
    
    # CatBoost Compatibility (may not work)
    "is_fitted": False,            # Must be False - needs to fit multiple times
    "force_model": True,           # Try to bypass validation
}

# Quick method signature
# discrimination_threshold(estimator, X, y, ax=None, n_trials=50, cv=0.1,
#                          fbeta=1.0, argmax='fscore', exclude=None,
#                          quantiles=(0.1, 0.5, 0.9), random_state=None,
#                          is_fitted='auto', force_model=False, show=True, **kwargs)

print("DiscriminationThreshold kwargs:")
print("  WARNING: May not work with CatBoost (requires CV refitting)")
for key, value in discrimination_threshold_kwargs.items():
    print(f"  {key}: {value}")

In [None]:
# =============================================================================
# RECOMMENDED CONFIGURATION FOR TRANSACTION FRAUD DETECTION (TFD)
# =============================================================================
#
# TFD Characteristics:
# - Binary classification (fraud=1, non-fraud=0)
# - Highly imbalanced (~1-5% fraud rate)
# - Model: CatBoost (not sklearn-native)
#
# Classification Goals:
# 1. High Recall (catch most fraud)
# 2. Acceptable Precision (minimize false alarms)
# 3. Understand trade-offs at different thresholds
#
# =============================================================================

# Binary class labels for TFD
tfd_classes = ["Non-Fraud", "Fraud"]

# -----------------------------------------------------------------------------
# PRIMARY CLASSIFIER VISUALIZERS - Most useful for fraud detection
# -----------------------------------------------------------------------------

primary_classifier_visualizers = {
    # ConfusionMatrix: See TP, TN, FP, FN distribution
    # CRITICAL for understanding fraud detection performance
    "ConfusionMatrix": {
        "classes": tfd_classes,
        "cmap": "Blues",            # Blue heatmap (professional look)
        "percent": True,            # Show percentages (better for imbalanced)
        "is_fitted": True,
        "force_model": True,
    },
    
    # ClassificationReport: Per-class precision, recall, F1
    # Shows separate performance for fraud vs non-fraud
    "ClassificationReport": {
        "classes": tfd_classes,
        "cmap": "YlOrRd",           # Yellow-Orange-Red (default)
        "support": "percent",       # Show class distribution as %
        "colorbar": True,
        "is_fitted": True,
        "force_model": True,
    },
    
    # ROCAUC: ROC curve for binary classification
    # AUC is threshold-independent metric
    "ROCAUC": {
        "classes": tfd_classes,
        "binary": True,             # Binary mode for cleaner plot
        "is_fitted": True,
        "force_model": True,
    },
    
    # PrecisionRecallCurve: BEST for imbalanced data!
    # More informative than ROC for fraud detection
    "PrecisionRecallCurve": {
        "classes": tfd_classes,
        "fill_area": True,
        "ap_score": True,           # Show Average Precision
        "iso_f1_curves": True,      # Show F1 score reference curves
        "is_fitted": True,
        "force_model": True,
    },
}

print("Primary Classifier Visualizers for TFD:")
for name, kwargs in primary_classifier_visualizers.items():
    print(f"\n  {name}:")
    for key, value in kwargs.items():
        print(f"    {key}: {value}")

In [None]:
# -----------------------------------------------------------------------------
# SECONDARY CLASSIFIER VISUALIZERS - Additional analysis
# -----------------------------------------------------------------------------

secondary_classifier_visualizers = {
    # ClassPredictionError: Bar chart of prediction errors
    # Alternative to confusion matrix
    "ClassPredictionError": {
        "classes": tfd_classes,
        "is_fitted": True,
        "force_model": True,
    },
    
    # DiscriminationThreshold: Threshold optimization
    # WARNING: May not work with CatBoost
    # "DiscriminationThreshold": {
    #     "n_trials": 10,           # Reduce trials for speed
    #     "cv": 0.2,                # 20% test split
    #     "argmax": "fscore",       # Highlight best F1 threshold
    #     "random_state": 42,
    #     "force_model": True,
    # },
}

print("Secondary Classifier Visualizers for TFD:")
for name, kwargs in secondary_classifier_visualizers.items():
    print(f"\n  {name}:")
    for key, value in kwargs.items():
        print(f"    {key}: {value}")

In [None]:
# =============================================================================
# CONSOLIDATED CONFIGURATION FOR NOTEBOOK 010 INTEGRATION
# =============================================================================

def yellowbrick_classification_kwargs(
    project_name,
    metric_name,
    y_train=None,
    classes=None,
    verbose=True
):
    """
    Generate kwargs for a specific yellowbrick.classifier visualizer.
    
    Reference: https://www.scikit-yb.org/en/latest/api/classifier/index.html

    Parameters:
    -----------
    project_name : str
        Project identifier (e.g., 'Transaction Fraud Detection')
    metric_name : str
        Visualizer name: 'ConfusionMatrix', 'ClassificationReport', etc.
    y_train : array-like, optional
        Training labels for class detection
    classes : list, optional
        Class labels (default: auto-detected or ['Non-Fraud', 'Fraud'])
    verbose : bool
        Print configuration details (default: True)

    Returns:
    --------
    dict : kwargs for the specified visualizer
    
    CatBoost Compatibility:
    -----------------------
    All visualizers use force_model=True and is_fitted=True to work with CatBoost.
    """
    # Default classes for fraud detection
    if classes is None:
        classes = ["Non-Fraud", "Fraud"] if "Fraud" in project_name else None
    
    # Visualizer-specific configurations optimized for CatBoost + fraud detection
    configs = {
        # ConfusionMatrix: Essential for fraud detection
        "ConfusionMatrix": {
            "classes": classes,
            "cmap": "Blues",
            "percent": True,        # Percentages better for imbalanced data
            "is_fitted": True,      # CatBoost compatibility
            "force_model": True,    # CatBoost compatibility
        },
        
        # ClassificationReport: Per-class metrics heatmap
        "ClassificationReport": {
            "classes": classes,
            "cmap": "YlOrRd",
            "support": "percent",   # Show class distribution
            "colorbar": True,
            "is_fitted": True,
            "force_model": True,
        },
        
        # ROCAUC: ROC curve with AUC score
        "ROCAUC": {
            "classes": classes,
            "binary": True,         # Binary mode for TFD
            "is_fitted": True,
            "force_model": True,
        },
        
        # PrecisionRecallCurve: BEST for imbalanced data
        "PrecisionRecallCurve": {
            "classes": classes,
            "fill_area": True,
            "ap_score": True,
            "iso_f1_curves": True,  # Show F1 reference curves
            "is_fitted": True,
            "force_model": True,
        },
        
        # ClassPredictionError: Bar chart of errors
        "ClassPredictionError": {
            "classes": classes,
            "is_fitted": True,
            "force_model": True,
        },
        
        # DiscriminationThreshold: NOT recommended for CatBoost
        # Requires multiple fit calls which may fail
        "DiscriminationThreshold": {
            "n_trials": 10,
            "cv": 0.2,
            "argmax": "fscore",
            "random_state": 42,
            "is_fitted": False,     # Must be False for CV
            "force_model": True,
        },
    }
    
    kwargs = configs.get(metric_name, {})
    
    if verbose and kwargs:
        print(f"\n{metric_name} kwargs for {project_name}:")
        for key, value in kwargs.items():
            print(f"  {key}: {value}")
    
    return kwargs


def yellowbrick_classification_visualizers(
    yb_classification_kwargs,
    estimator,
    X_train,
    X_test,
    y_train,
    y_test,
    metric_name=None
):
    """
    Create and fit a yellowbrick.classifier visualizer.
    
    Reference: https://www.scikit-yb.org/en/latest/api/classifier/index.html

    Parameters:
    -----------
    yb_classification_kwargs : dict
        Output from yellowbrick_classification_kwargs()
    estimator : fitted model
        CatBoost or sklearn classifier (must be pre-fitted)
    X_train, X_test : array-like
        Training and test features
    y_train, y_test : array-like
        Training and test labels
    metric_name : str, optional
        Visualizer name to specify which visualizer to use

    Returns:
    --------
    Visualizer object (fitted and scored)
    
    Fit Methods:
    ------------
    All classifiers use: fit(X_train, y_train) then score(X_test, y_test)
    For pre-fitted estimators with is_fitted=True, fit() skips training.
    """
    from yellowbrick.classifier import (
        ConfusionMatrix,
        ClassificationReport,
        ROCAUC,
        PrecisionRecallCurve,
        ClassPredictionError,
        DiscriminationThreshold,
    )
    
    # Map visualizer names to classes
    visualizer_map = {
        "ConfusionMatrix": ConfusionMatrix,
        "ClassificationReport": ClassificationReport,
        "ROCAUC": ROCAUC,
        "PrecisionRecallCurve": PrecisionRecallCurve,
        "ClassPredictionError": ClassPredictionError,
        "DiscriminationThreshold": DiscriminationThreshold,
    }
    
    if metric_name is None:
        # Try to infer from kwargs keys
        for key in yb_classification_kwargs.keys():
            if key in visualizer_map:
                metric_name = key
                break
    
    visualizer_class = visualizer_map.get(metric_name)
    if visualizer_class is None:
        raise ValueError(f"Unknown visualizer: {metric_name}")
    
    # Create visualizer with estimator
    visualizer = visualizer_class(estimator, **yb_classification_kwargs)
    
    # Fit and score
    # With is_fitted=True, fit() just validates, doesn't retrain
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    
    return visualizer


print("Functions defined for notebook 010 integration:")
print("  - yellowbrick_classification_kwargs(project_name, metric_name, y_train, classes, verbose)")
print("  - yellowbrick_classification_visualizers(kwargs, estimator, X_train, X_test, y_train, y_test)")
print()
print("ALL 6 CLASSIFIER VISUALIZERS SUPPORTED:")
print("  Metrics: ConfusionMatrix, ClassificationReport, ClassPredictionError")
print("  Probability: ROCAUC, PrecisionRecallCurve")
print("  Threshold: DiscriminationThreshold (may not work with CatBoost)")

In [None]:
# =============================================================================
# KEY INSIGHTS FOR TFD CLASSIFICATION ANALYSIS
# =============================================================================
#
# 1. VISUALIZER SELECTION STRATEGY:
#    - Start with ConfusionMatrix to see TP/TN/FP/FN
#    - Use ClassificationReport for per-class metrics
#    - Use PrecisionRecallCurve (better than ROC for imbalanced data!)
#    - Use ROCAUC for threshold-independent comparison
#
# 2. CATBOOST COMPATIBILITY:
#    | Visualizer              | Works? | Notes                           |
#    |-------------------------|--------|--------------------------------|
#    | ConfusionMatrix         | YES    | force_model=True, is_fitted=True |
#    | ClassificationReport    | YES    | force_model=True, is_fitted=True |
#    | ROCAUC                  | YES    | force_model=True, is_fitted=True |
#    | PrecisionRecallCurve    | YES    | force_model=True, is_fitted=True |
#    | ClassPredictionError    | YES    | force_model=True, is_fitted=True |
#    | DiscriminationThreshold | MAYBE  | Requires CV refitting          |
#
# 3. FRAUD DETECTION SPECIFIC:
#    - PrecisionRecallCurve is MORE INFORMATIVE than ROCAUC for imbalanced data
#    - ConfusionMatrix with percent=True shows relative performance
#    - ClassificationReport shows support to understand class distribution
#    - iso_f1_curves=True helps visualize F1 trade-offs
#
# 4. PERFORMANCE CONSIDERATIONS:
#    | Visualizer              | Speed    | Requires predict_proba |
#    |-------------------------|----------|------------------------|
#    | ConfusionMatrix         | Fast     | No                     |
#    | ClassificationReport    | Fast     | No                     |
#    | ClassPredictionError    | Fast     | No                     |
#    | ROCAUC                  | Moderate | YES                    |
#    | PrecisionRecallCurve    | Moderate | YES                    |
#    | DiscriminationThreshold | SLOW     | YES (+ CV)             |
#
# 5. USAGE EXAMPLES:
#
#    # With CatBoost model:
#    from catboost import CatBoostClassifier
#    model = CatBoostClassifier().fit(X_train, y_train)
#
#    # Confusion Matrix
#    kwargs = yellowbrick_classification_kwargs("TFD", "ConfusionMatrix")
#    viz = yellowbrick_classification_visualizers(kwargs, model, X_train, X_test, y_train, y_test)
#    viz.show()
#
#    # Precision-Recall Curve (best for fraud detection)
#    kwargs = yellowbrick_classification_kwargs("TFD", "PrecisionRecallCurve")
#    viz = yellowbrick_classification_visualizers(kwargs, model, X_train, X_test, y_train, y_test)
#    viz.show()
#
# =============================================================================
print("Key insights documented above.")
print()
print("YELLOWBRICK CLASSIFIER SUMMARY:")
print("  - Total Visualizers: 6")
print()
print("  FOR FRAUD DETECTION (Classification):")
print("    Primary: ConfusionMatrix, ClassificationReport")
print("    Primary: PrecisionRecallCurve (BEST for imbalanced!)")
print("    Primary: ROCAUC")
print()
print("  SECONDARY:")
print("    ClassPredictionError (alternative to ConfusionMatrix)")
print("    DiscriminationThreshold (may not work with CatBoost)")

In [None]:
# =============================================================================
# COMPLETE PARAMETER REFERENCE TABLE
# =============================================================================

print("+" + "=" * 80 + "+")
print("|" + " YELLOWBRICK CLASSIFIER VISUALIZERS - COMPLETE PARAMETER REFERENCE ".center(80) + "|")
print("+" + "=" * 80 + "+")
print()

# ConfusionMatrix
print("1. ConfusionMatrix")
print("-" * 40)
print("  Purpose: Heatmap of true vs predicted class distributions")
print("  CatBoost: YES (force_model=True, is_fitted=True)")
print("  Parameters:")
print("    classes      : list         - Class labels (default: None)")
print("    cmap         : str          - Colormap (default: 'YlOrRd')")
print("    percent      : bool         - Show percentages (default: False)")
print("    fontsize     : int          - Font size (default: None)")
print("    sample_weight: array        - Sample weights (default: None)")
print("    is_fitted    : bool/str     - Pre-fitted estimator (default: 'auto')")
print("    force_model  : bool         - Bypass sklearn check (default: False)")
print()

# ClassificationReport
print("2. ClassificationReport")
print("-" * 40)
print("  Purpose: Precision, recall, F1 heatmap per class")
print("  CatBoost: YES (force_model=True, is_fitted=True)")
print("  Parameters:")
print("    classes      : list         - Class labels (default: None)")
print("    cmap         : str          - Colormap (default: 'YlOrRd')")
print("    support      : str/bool     - Show support: True/False/'percent'/'count' (default: None)")
print("    colorbar     : bool         - Show colorbar (default: True)")
print("    fontsize     : int          - Font size (default: None)")
print("    is_fitted    : bool/str     - Pre-fitted estimator (default: 'auto')")
print("    force_model  : bool         - Bypass sklearn check (default: False)")
print()

# ROCAUC
print("3. ROCAUC")
print("-" * 40)
print("  Purpose: ROC curve with AUC score")
print("  CatBoost: YES (force_model=True, is_fitted=True)")
print("  Requires: predict_proba() method")
print("  Parameters:")
print("    classes      : list         - Class labels (default: None)")
print("    micro        : bool         - Micro-average curve (default: True)")
print("    macro        : bool         - Macro-average curve (default: True)")
print("    per_class    : bool         - Per-class curves (default: True)")
print("    binary       : bool         - Binary mode (sets above to False) (default: False)")
print("    is_fitted    : bool/str     - Pre-fitted estimator (default: 'auto')")
print("    force_model  : bool         - Bypass sklearn check (default: False)")
print()

# PrecisionRecallCurve
print("4. PrecisionRecallCurve")
print("-" * 40)
print("  Purpose: Precision vs recall trade-off (BEST for imbalanced data!)")
print("  CatBoost: YES (force_model=True, is_fitted=True)")
print("  Requires: predict_proba() method")
print("  Parameters:")
print("    classes       : list        - Class labels (default: None)")
print("    fill_area     : bool        - Fill under curve (default: True)")
print("    fill_opacity  : float       - Fill transparency (default: 0.2)")
print("    line_opacity  : float       - Line transparency (default: 0.8)")
print("    ap_score      : bool        - Show Average Precision (default: True)")
print("    micro         : bool        - Micro-average curve (default: True)")
print("    per_class     : bool        - Per-class curves (default: False)")
print("    iso_f1_curves : bool        - Show ISO F1 curves (default: False)")
print("    iso_f1_values : tuple       - F1 values for ISO curves (default: (0.2,0.4,0.6,0.8))")
print("    is_fitted     : bool/str    - Pre-fitted estimator (default: 'auto')")
print("    force_model   : bool        - Bypass sklearn check (default: False)")
print()

# ClassPredictionError
print("5. ClassPredictionError")
print("-" * 40)
print("  Purpose: Bar chart showing prediction errors per class")
print("  CatBoost: YES (force_model=True, is_fitted=True)")
print("  Parameters:")
print("    classes      : list         - Class labels (default: None)")
print("    is_fitted    : bool/str     - Pre-fitted estimator (default: 'auto')")
print("    force_model  : bool         - Bypass sklearn check (default: False)")
print()

# DiscriminationThreshold
print("6. DiscriminationThreshold")
print("-" * 40)
print("  Purpose: Metrics across decision thresholds (binary only)")
print("  CatBoost: PROBLEMATIC (requires CV refitting)")
print("  Requires: predict_proba() method + CV")
print("  Parameters:")
print("    n_trials     : int          - Number of CV trials (default: 50)")
print("    cv           : float/CV     - Test split or CV generator (default: 0.1)")
print("    fbeta        : float        - F-beta weight (default: 1.0 = F1)")
print("    argmax       : str          - Metric to highlight (default: 'fscore')")
print("    exclude      : list         - Metrics to exclude (default: None)")
print("    quantiles    : tuple        - Uncertainty bands (default: (0.1, 0.5, 0.9))")
print("    random_state : int          - Reproducibility seed (default: None)")
print("    is_fitted    : bool/str     - Pre-fitted estimator (default: 'auto')")
print("    force_model  : bool         - Bypass sklearn check (default: False)")
print()
print("+" + "=" * 80 + "+")