In [None]:
from yellowbrick import model_selection
from sklearn.model_selection import StratifiedKFold
from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np
import inspect

In [None]:
# 1) Get all items from yellowbrick.model_selection
all_items = [name for name in dir(model_selection) if not name.startswith('_')]
print("All items in yellowbrick.model_selection module:")
print(all_items)

In [None]:
# Categorize items by type
functions = []
classes = []
submodules = []

for name in all_items:
    obj = getattr(model_selection, name)
    if inspect.isfunction(obj):
        functions.append(name)
    elif inspect.isclass(obj):
        classes.append(name)
    elif inspect.ismodule(obj):
        submodules.append(name)

print(f"Functions (quick methods): {len(functions)}")
print(functions)
print(f"\nClasses (visualizers): {len(classes)}")
print(classes)
print(f"\nSubmodules: {len(submodules)}")
print(submodules)

In [None]:
# =============================================================================
# YELLOWBRICK MODEL SELECTION MODULE OVERVIEW
# =============================================================================
#
# yellowbrick.model_selection provides visualizers for model evaluation:
# - Hyperparameter tuning (ValidationCurve)
# - Dataset size impact (LearningCurve)
# - Cross-validation performance (CVScores)
# - Feature importance analysis (FeatureImportances)
# - Feature selection (RFECV, DroppingCurve)
#
# Reference: https://www.scikit-yb.org/en/latest/api/model_selection/index.html
#
# =============================================================================
# AVAILABLE VISUALIZERS (6 total):
# =============================================================================
#
# 1. ValidationCurve     - Hyperparameter tuning (bias-variance tradeoff)
# 2. LearningCurve       - Training size vs performance
# 3. CVScores            - Cross-validation scores bar chart
# 4. FeatureImportances  - Feature importance ranking
# 5. RFECV               - Recursive Feature Elimination with CV
# 6. DroppingCurve       - Feature dropping impact analysis
#
# =============================================================================
# CATBOOST COMPATIBILITY ANALYSIS
# =============================================================================
#
# CatBoost is NOT sklearn-compatible:
# - Does NOT inherit from sklearn.base.BaseEstimator
# - Cannot be cloned with sklearn.base.clone()
# - Missing __sklearn_tags__ attribute
#
# BUT CatBoost HAS:
# - feature_importances_ attribute (for FeatureImportances)
# - classes_ attribute
# - fit(), predict(), predict_proba() methods
#
# SOLUTION: Use CatBoostWrapperCV for CV-based visualizers
# (see apps/sklearn/functions.py for implementation)
#
# =============================================================================
print("Yellowbrick Model Selection Module - 6 Visualizers Available")
print("  - ALL require an estimator")
print("  - Most require CV (cloning) - need CatBoostWrapperCV")
print("  - FeatureImportances works with pre-fitted CatBoost (has feature_importances_)")

In [None]:
# =============================================================================
# CATBOOST SKLEARN-COMPATIBLE WRAPPER
# =============================================================================
# Required for all CV-based visualizers (ValidationCurve, LearningCurve, etc.)
# These visualizers clone the estimator for each CV fold.
# =============================================================================

class CatBoostWrapperCV(BaseEstimator, ClassifierMixin):
    """CatBoost wrapper for CV-based visualizers (can be cloned and re-fitted).
    
    This wrapper inherits from sklearn's BaseEstimator and ClassifierMixin,
    making it compatible with sklearn's clone() function and YellowBrick
    visualizers that require cross-validation.
    """
    _estimator_type = 'classifier'

    def __init__(self, iterations=100, depth=6, learning_rate=0.1,
                 auto_class_weights='Balanced', random_state=42):
        self.iterations = iterations
        self.depth = depth
        self.learning_rate = learning_rate
        self.auto_class_weights = auto_class_weights
        self.random_state = random_state
        self.model_ = None
        self.classes_ = None
        self.feature_importances_ = None

    def fit(self, X, y):
        from catboost import CatBoostClassifier
        self.model_ = CatBoostClassifier(
            iterations=self.iterations,
            depth=self.depth,
            learning_rate=self.learning_rate,
            auto_class_weights=self.auto_class_weights,
            random_seed=self.random_state,
            verbose=0
        )
        self.model_.fit(X, y)
        self.classes_ = np.array(self.model_.classes_)
        self.feature_importances_ = self.model_.feature_importances_
        return self

    def predict(self, X):
        return self.model_.predict(X).flatten()

    def predict_proba(self, X):
        return self.model_.predict_proba(X)
    
    def score(self, X, y):
        from sklearn.metrics import accuracy_score
        return accuracy_score(y, self.predict(X))


# Test cloning
from sklearn.base import clone
wrapper = CatBoostWrapperCV(iterations=10)
try:
    cloned = clone(wrapper)
    print("CatBoostWrapperCV can be cloned")
except Exception as e:
    print(f"Cloning failed: {e}")

In [None]:
# =============================================================================
# 1. VALIDATION CURVE VISUALIZER
# =============================================================================
#
# Purpose: Examines how adjusting a hyperparameter affects training and test
# scores, helping identify the optimal value and detect overfitting.
#
# Use Case: Tune hyperparameters by visualizing bias-variance tradeoff.
#
# CatBoost Compatibility: REQUIRES CatBoostWrapperCV
# - Clones estimator for each param value x CV fold
# - Very computationally expensive
#
# Reference: https://www.scikit-yb.org/en/latest/api/model_selection/validation_curve.html
# =============================================================================

validation_curve_class = model_selection.ValidationCurve

validation_curve_kwargs = {
    # REQUIRED
    "estimator": None,             # sklearn-compatible estimator (use CatBoostWrapperCV)
    "param_name": "depth",         # Hyperparameter to vary
    "param_range": [2, 4, 6, 8],   # Values to test
    
    # Cross-validation
    "cv": None,                    # CV strategy (default: 5-fold)
    "scoring": "f1",               # Metric: 'f1', 'accuracy', 'roc_auc', etc.
    
    # Display options
    "ax": None,                    # matplotlib Axes
    "logx": False,                 # Log scale for x-axis
    "n_jobs": 1,                   # Parallel jobs (-1 for all cores)
}

print("ValidationCurve kwargs:")
for key, value in validation_curve_kwargs.items():
    print(f"  {key}: {value}")

print("\nWARNING: ValidationCurve is VERY SLOW!")
print("  Fits: n_params * n_splits models")
print(f"  Example: {len(validation_curve_kwargs['param_range'])} params * 5 folds = {len(validation_curve_kwargs['param_range']) * 5} fits")

In [None]:
# =============================================================================
# 2. LEARNING CURVE VISUALIZER
# =============================================================================
#
# Purpose: Shows how model performance changes with training set size,
# helping diagnose high variance (overfitting) vs high bias (underfitting).
#
# Use Case: Determine if more data would help improve performance.
#
# CatBoost Compatibility: REQUIRES CatBoostWrapperCV
# - Clones estimator for each train_size x CV fold
#
# Reference: https://www.scikit-yb.org/en/latest/api/model_selection/learning_curve.html
# =============================================================================

learning_curve_class = model_selection.LearningCurve

learning_curve_kwargs = {
    # REQUIRED
    "estimator": None,             # sklearn-compatible estimator
    
    # Training sizes (default: [0.1, 0.325, 0.55, 0.775, 1.0])
    "train_sizes": np.linspace(0.1, 1.0, 5),
    
    # Cross-validation
    "cv": None,                    # CV strategy (default: 3-fold)
    "scoring": "f1",               # Metric
    
    # Display options
    "ax": None,                    # matplotlib Axes
    "n_jobs": 1,                   # Parallel jobs
    "random_state": 42,            # Reproducibility
}

print("LearningCurve kwargs:")
for key, value in learning_curve_kwargs.items():
    print(f"  {key}: {value}")

print("\nInterpretation:")
print("  - Training score >> Test score: Overfitting (high variance)")
print("  - Both scores low: Underfitting (high bias)")
print("  - Both scores high and close: Good fit")

In [None]:
# =============================================================================
# 3. CV SCORES VISUALIZER
# =============================================================================
#
# Purpose: Displays cross-validated scores as a bar chart with mean line,
# showing performance consistency across folds.
#
# Use Case: Evaluate model stability and identify problematic folds.
#
# CatBoost Compatibility: REQUIRES CatBoostWrapperCV
# - Wraps sklearn.model_selection.cross_val_score
#
# Reference: https://www.scikit-yb.org/en/latest/api/model_selection/cross_validation.html
# =============================================================================

cv_scores_class = model_selection.CVScores

cv_scores_kwargs = {
    # REQUIRED
    "estimator": None,             # sklearn-compatible estimator
    
    # Cross-validation
    "cv": None,                    # CV strategy (int or cross-validator)
    "scoring": "f1",               # Metric
    
    # Display options
    "ax": None,                    # matplotlib Axes
    "color": None,                 # Bar color
}

print("CVScores kwargs:")
for key, value in cv_scores_kwargs.items():
    print(f"  {key}: {value}")

print("\nBest practices for TFD:")
print("  - Use StratifiedKFold for imbalanced data")
print("  - Use scoring='f1' or 'average_precision' (not 'accuracy')")

In [None]:
# =============================================================================
# 4. FEATURE IMPORTANCES VISUALIZER
# =============================================================================
#
# Purpose: Displays feature importances as a horizontal bar chart,
# showing which features the model relies on most.
#
# Use Case: Model interpretation, feature selection, debugging.
#
# CatBoost Compatibility: WORKS with pre-fitted CatBoost!
# - CatBoost HAS feature_importances_ attribute
# - Use is_fitted=True to skip refitting
#
# Reference: https://www.scikit-yb.org/en/latest/api/model_selection/importances.html
# =============================================================================

feature_importances_class = model_selection.FeatureImportances

feature_importances_kwargs = {
    # REQUIRED
    "estimator": None,             # Estimator with feature_importances_
    
    # Display options
    "ax": None,                    # matplotlib Axes
    "labels": None,                # Feature names
    "relative": True,              # Show as % of max importance
    "absolute": False,             # Use absolute values
    "stack": False,                # Stack for multi-class
    "topn": None,                  # Show only top N features
    "colors": None,                # Bar colors
    "colormap": None,              # Colormap
    
    # CatBoost compatibility
    "is_fitted": True,             # IMPORTANT: Skip refitting for CatBoost
}

print("FeatureImportances kwargs:")
for key, value in feature_importances_kwargs.items():
    print(f"  {key}: {value}")

print("\nCatBoost Note:")
print("  - Set is_fitted=True to use pre-fitted CatBoost model")
print("  - CatBoost provides feature_importances_ after fit()")

In [None]:
# =============================================================================
# 5. RFECV VISUALIZER (Recursive Feature Elimination with CV)
# =============================================================================
#
# Purpose: Progressively removes features and plots CV scores,
# finding the optimal number of features.
#
# Use Case: Automated feature selection with performance optimization.
#
# CatBoost Compatibility: REQUIRES CatBoostWrapperCV
# - Requires coef_ OR feature_importances_
# - CatBoostWrapperCV exposes feature_importances_
#
# WARNING: EXTREMELY SLOW! Fits n_features * n_splits models.
#
# Reference: https://www.scikit-yb.org/en/latest/api/model_selection/rfecv.html
# =============================================================================

rfecv_class = model_selection.RFECV

rfecv_kwargs = {
    # REQUIRED
    "estimator": None,             # Estimator with coef_ or feature_importances_
    
    # Feature elimination
    "step": 1,                     # Features to remove per iteration
    "min_features_to_select": 1,   # Minimum features to keep
    
    # Cross-validation
    "cv": None,                    # CV strategy
    "scoring": "f1",               # Metric
    "groups": None,                # Group labels for GroupKFold
    
    # Display options
    "ax": None,                    # matplotlib Axes
}

print("RFECV kwargs:")
for key, value in rfecv_kwargs.items():
    print(f"  {key}: {value}")

print("\nWARNING: RFECV is EXTREMELY SLOW!")
print("  For 16 features with 5-fold CV:")
print("  16 * 5 = 80 model fits minimum")
print("  Consider using step=2 or higher to reduce fits")

In [None]:
# =============================================================================
# 6. DROPPING CURVE VISUALIZER
# =============================================================================
#
# Purpose: Evaluates performance while randomly removing feature subsets,
# showing how sensitive the model is to feature count.
#
# Use Case: Understand feature redundancy and model robustness.
#
# CatBoost Compatibility: REQUIRES CatBoostWrapperCV
# - Similar to LearningCurve but varies feature count
#
# Reference: https://www.scikit-yb.org/en/latest/api/model_selection/dropping_curve.html
# =============================================================================

dropping_curve_class = model_selection.DroppingCurve

dropping_curve_kwargs = {
    # REQUIRED
    "estimator": None,             # sklearn-compatible estimator
    
    # Feature sizes (default: [0.1, 0.325, 0.55, 0.775, 1.0])
    "feature_sizes": np.linspace(0.1, 1.0, 5),
    
    # Cross-validation
    "cv": None,                    # CV strategy (default: 3-fold)
    "scoring": "f1",               # Metric
    
    # Display options
    "ax": None,                    # matplotlib Axes
    "n_jobs": 1,                   # Parallel jobs
    "random_state": 42,            # Reproducibility
}

print("DroppingCurve kwargs:")
for key, value in dropping_curve_kwargs.items():
    print(f"  {key}: {value}")

print("\nInterpretation:")
print("  - Score stable across feature counts: Redundant features")
print("  - Score drops sharply: Important features being removed")

In [None]:
# =============================================================================
# CATBOOST COMPATIBILITY SUMMARY
# =============================================================================

print("="*72)
print(" CATBOOST COMPATIBILITY SUMMARY ".center(72))
print("="*72)
print()
print("| Visualizer          | CatBoost Direct | With Wrapper | Speed     |")
print("|---------------------|-----------------|--------------|-----------|")
print("| ValidationCurve     | NO (needs CV)   | YES          | VERY SLOW |")
print("| LearningCurve       | NO (needs CV)   | YES          | SLOW      |")
print("| CVScores            | NO (needs CV)   | YES          | MODERATE  |")
print("| FeatureImportances  | YES (is_fitted) | N/A          | FAST      |")
print("| RFECV               | NO (needs CV)   | YES          | VERY SLOW |")
print("| DroppingCurve       | NO (needs CV)   | YES          | SLOW      |")
print()
print("RECOMMENDATION FOR TFD:")
print("  1. FeatureImportances - ALWAYS USE (fast, works with CatBoost)")
print("  2. CVScores - USE for model stability assessment")
print("  3. LearningCurve - USE if time permits")
print("  4. ValidationCurve - SKIP unless hyperparameter tuning needed")
print("  5. DroppingCurve - OPTIONAL for feature redundancy analysis")
print("  6. RFECV - SKIP (too slow, use FeatureImportances instead)")

In [None]:
# =============================================================================
# BEST CONFIGURATION FOR TRANSACTION FRAUD DETECTION (TFD)
# =============================================================================

def yellowbrick_model_selection_kwargs(
    classes=None,
    feature_names=None,
    verbose=True
):
    """
    Generate kwargs for ALL yellowbrick.model_selection visualizers (TFD optimized).
    
    Parameters:
    -----------
    classes : list, optional
        Class labels (default: ["Non-Fraud", "Fraud"])
    feature_names : list, optional
        Feature names for labels (default: None)
    verbose : bool
        Print configuration details (default: True)
    
    Returns:
    --------
    dict : Dictionary of visualizer name -> kwargs
    
    Note:
    -----
    All CV-based visualizers require CatBoostWrapperCV.
    FeatureImportances works with pre-fitted CatBoost (is_fitted=True).
    """
    if classes is None:
        classes = ["Non-Fraud", "Fraud"]
    
    kwargs = {
        # =================================================================
        # PRIMARY: FeatureImportances (FAST, works with CatBoost directly)
        # =================================================================
        "FeatureImportances": {
            "labels": feature_names,
            "relative": True,              # Show as % of max
            "absolute": False,
            "topn": None,                  # Show all features
            "is_fitted": True,             # CatBoost compatibility
        },
        
        # =================================================================
        # SECONDARY: CVScores (moderate speed with CatBoostWrapperCV)
        # =================================================================
        "CVScores": {
            "cv": 5,                       # 5-fold stratified CV
            "scoring": "f1",               # F1 score for imbalanced data
        },
        
        # =================================================================
        # OPTIONAL: LearningCurve (slow but insightful)
        # =================================================================
        "LearningCurve": {
            "train_sizes": np.linspace(0.1, 1.0, 5),
            "cv": 3,                       # Reduce folds for speed
            "scoring": "f1",
            "random_state": 42,
        },
        
        # =================================================================
        # OPTIONAL: DroppingCurve (slow, for feature redundancy)
        # =================================================================
        "DroppingCurve": {
            "feature_sizes": np.linspace(0.1, 1.0, 5),
            "cv": 3,
            "scoring": "f1",
            "random_state": 42,
        },
        
        # =================================================================
        # SLOW: ValidationCurve (only for hyperparameter tuning)
        # Uncomment if needed for specific hyperparameter analysis
        # =================================================================
        # "ValidationCurve": {
        #     "param_name": "depth",
        #     "param_range": [4, 6, 8, 10],
        #     "cv": 3,
        #     "scoring": "f1",
        # },
        
        # =================================================================
        # VERY SLOW: RFECV (skip for TFD, use FeatureImportances instead)
        # =================================================================
        # "RFECV": {
        #     "step": 2,                   # Remove 2 features per iteration
        #     "cv": 3,
        #     "scoring": "f1",
        # },
    }
    
    if verbose:
        print()
        print("+" + "=" * 72 + "+")
        print("|" + " YELLOWBRICK MODEL SELECTION CONFIGURATION ".center(72) + "|")
        print("+" + "=" * 72 + "+")
        print(f"| {'Classes:':<20} {str(classes):<50} |")
        print(f"| {'Features:':<20} {(str(len(feature_names)) + ' features') if feature_names else 'auto-detect':<50} |")
        print("+" + "-" * 72 + "+")
        print(f"| {'Configured ' + str(len(kwargs)) + ' visualizers:':<72} |")
        print("+" + "-" * 72 + "+")
        
        speed_map = {
            "FeatureImportances": "FAST",
            "CVScores": "MODERATE",
            "LearningCurve": "SLOW",
            "DroppingCurve": "SLOW",
            "ValidationCurve": "VERY SLOW",
            "RFECV": "VERY SLOW",
        }
        
        for i, name in enumerate(kwargs.keys(), 1):
            speed = speed_map.get(name, "UNKNOWN")
            wrapper = "CatBoost direct" if name == "FeatureImportances" else "CatBoostWrapperCV"
            print(f"|   {i:>2}. {name:<25} [{speed:<10}] {wrapper:<20} |")
        
        print("+" + "=" * 72 + "+")
        print()
    
    return kwargs


# Test the configuration
test_kwargs = yellowbrick_model_selection_kwargs()

In [None]:
# =============================================================================
# VISUALIZERS EXECUTION FUNCTION
# =============================================================================

def yellowbrick_model_selection_visualizers(
    yb_model_selection_kwargs,
    estimator,
    X_train,
    X_test,
    y_train,
    y_test,
    YELLOWBRICK_PATH,
    verbose=True
):
    """
    Run all model selection visualizers and save to disk.
    
    Parameters:
    -----------
    yb_model_selection_kwargs : dict
        Output from yellowbrick_model_selection_kwargs()
    estimator : fitted model
        Pre-fitted CatBoost model (for FeatureImportances)
    X_train, X_test : array-like
        Training and test features
    y_train, y_test : array-like
        Training and test labels
    YELLOWBRICK_PATH : str
        Path to save visualizations
    verbose : bool
        Print progress details (default: True)
    
    Returns:
    --------
    dict : Summary with successful, failed counts and timing info
    
    CatBoost Handling:
    ------------------
    - FeatureImportances: Uses pre-fitted estimator with is_fitted=True
    - All others: Creates CatBoostWrapperCV for CV-based training
    """
    import matplotlib.pyplot as plt
    import os
    import time
    from datetime import datetime
    import pandas as pd
    from yellowbrick.model_selection import (
        FeatureImportances,
        CVScores,
        LearningCurve,
        DroppingCurve,
        ValidationCurve,
        RFECV,
    )

    os.makedirs(f"{YELLOWBRICK_PATH}/model_selection", exist_ok=True)

    # Map visualizer names to classes
    visualizer_map = {
        "FeatureImportances": FeatureImportances,
        "CVScores": CVScores,
        "LearningCurve": LearningCurve,
        "DroppingCurve": DroppingCurve,
        "ValidationCurve": ValidationCurve,
        "RFECV": RFECV,
    }

    total = len(yb_model_selection_kwargs)
    results = []
    start_total = time.time()

    if verbose:
        print()
        print("+" + "=" * 72 + "+")
        print("|" + " YELLOWBRICK MODEL SELECTION ANALYSIS ".center(72) + "|")
        print("+" + "=" * 72 + "+")
        print(f"| {'Started:':<15} {datetime.now().strftime('%Y-%m-%d %H:%M:%S'):<55} |")
        print(f"| {'Train shape:':<15} {str(X_train.shape[0]) + ' samples':<55} |")
        print(f"| {'Test shape:':<15} {str(X_test.shape[0]) + ' samples':<55} |")
        print(f"| {'Model:':<15} {type(estimator).__name__:<55} |")
        print(f"| {'Output:':<15} {YELLOWBRICK_PATH + '/model_selection/':<55} |")
        print(f"| {'Visualizers:':<15} {str(total) + ' to process':<55} |")
        print("+" + "=" * 72 + "+")
        print()

    # Combine data for CV-based visualizers
    X_full = pd.concat([X_train, X_test], ignore_index=True) if hasattr(X_train, 'concat') else np.vstack([X_train, X_test])
    y_full = pd.concat([y_train, y_test], ignore_index=True) if hasattr(y_train, 'concat') else np.hstack([y_train, y_test])

    for idx, (visualizer_name, kwargs) in enumerate(yb_model_selection_kwargs.items(), 1):
        start_time = time.time()
        status = "OK"
        error_msg = None
        output_path = None

        if verbose:
            progress = f"[{idx}/{total}]"
            bar_width = 20
            filled = int(bar_width * idx / total)
            bar = "\u2588" * filled + "\u2591" * (bar_width - filled)
            print(f"{progress} |{bar}| {visualizer_name}...", end=" ", flush=True)

        try:
            vis_class = visualizer_map.get(visualizer_name)
            if vis_class is None:
                raise ValueError(f"Unknown visualizer: {visualizer_name}")

            if visualizer_name == "FeatureImportances":
                # FeatureImportances works with pre-fitted CatBoost
                visualizer = vis_class(estimator, **kwargs)
                visualizer.fit(X_train, y_train)
            else:
                # All other visualizers need CatBoostWrapperCV
                cv_wrapper = CatBoostWrapperCV(
                    iterations=100,
                    depth=6,
                    learning_rate=0.1,
                )
                visualizer = vis_class(cv_wrapper, **kwargs)
                visualizer.fit(X_full, y_full)

            visualizer.show()
            output_path = f"{YELLOWBRICK_PATH}/model_selection/{visualizer_name}.png"
            visualizer.fig.savefig(output_path, dpi=150, bbox_inches="tight")
            plt.clf()
            plt.close('all')

        except Exception as e:
            status = "FAILED"
            error_msg = str(e)
            plt.clf()
            plt.close('all')

        elapsed = time.time() - start_time
        results.append({
            "name": visualizer_name,
            "status": status,
            "time": elapsed,
            "output": output_path,
            "error": error_msg
        })

        if verbose:
            if status == "OK":
                print(f"\u2713 ({elapsed:.2f}s)")
            else:
                print(f"\u2717 FAILED ({elapsed:.2f}s)")
                err_display = error_msg[:60] + "..." if len(str(error_msg)) > 60 else error_msg
                print(f"         \u2514\u2500 Error: {err_display}")

    total_time = time.time() - start_total
    successful = sum(1 for r in results if r["status"] == "OK")
    failed = sum(1 for r in results if r["status"] == "FAILED")

    if verbose:
        print()
        print("+" + "=" * 72 + "+")
        print("|" + " SUMMARY ".center(72) + "|")
        print("+" + "=" * 72 + "+")
        print(f"| {'Completed:':<15} {datetime.now().strftime('%Y-%m-%d %H:%M:%S'):<55} |")
        print(f"| {'Total time:':<15} {f'{total_time:.2f} seconds':<55} |")
        print(f"| {'Successful:':<15} {f'{successful}/{total} visualizers':<55} |")
        if failed > 0:
            print(f"| {'Failed:':<15} {f'{failed}/{total} visualizers':<55} |")
        print("+" + "-" * 72 + "+")
        print("|" + " TIMING BREAKDOWN ".center(72) + "|")
        print("+" + "-" * 72 + "+")

        sorted_results = sorted(results, key=lambda x: x["time"], reverse=True)
        for r in sorted_results:
            status_icon = "\u2713" if r["status"] == "OK" else "\u2717"
            time_bar_width = 20
            max_time = max(r["time"] for r in results) if results else 1
            filled = int(time_bar_width * r["time"] / max_time) if max_time > 0 else 0
            time_bar = "\u2593" * filled + "\u2591" * (time_bar_width - filled)
            print(f"| {status_icon} {r['name']:<30} |{time_bar}| {r['time']:>8.2f}s |")

        print("+" + "=" * 72 + "+")

        if successful > 0:
            print()
            print("Saved visualizations:")
            for r in results:
                if r["status"] == "OK" and r["output"]:
                    print(f"  \u2192 {r['output']}")
        print()

    return {
        "successful": successful,
        "failed": failed,
        "total_time": total_time,
        "results": results
    }


print("yellowbrick_model_selection_visualizers() function defined")
print("  - FeatureImportances: Uses pre-fitted CatBoost directly")
print("  - All others: Uses CatBoostWrapperCV for CV-based training")

In [None]:
# =============================================================================
# KEY INSIGHTS FOR TFD MODEL SELECTION
# =============================================================================
#
# 1. VISUALIZER PRIORITY FOR TFD:
#    - FeatureImportances: ESSENTIAL (understand model decisions)
#    - CVScores: RECOMMENDED (validate model stability)
#    - LearningCurve: OPTIONAL (check if more data helps)
#    - DroppingCurve: OPTIONAL (feature redundancy analysis)
#    - ValidationCurve: SKIP (use Optuna for hyperparameter tuning)
#    - RFECV: SKIP (too slow, use FeatureImportances + domain knowledge)
#
# 2. CATBOOST COMPATIBILITY:
#    | Visualizer          | Works with CatBoost | Wrapper Needed |
#    |---------------------|---------------------|----------------|
#    | FeatureImportances  | YES (is_fitted=True)| NO             |
#    | CVScores            | NO                  | CatBoostWrapperCV |
#    | LearningCurve       | NO                  | CatBoostWrapperCV |
#    | DroppingCurve       | NO                  | CatBoostWrapperCV |
#    | ValidationCurve     | NO                  | CatBoostWrapperCV |
#    | RFECV               | NO                  | CatBoostWrapperCV |
#
# 3. PERFORMANCE CONSIDERATIONS:
#    - FeatureImportances: O(1) - just reads model attributes
#    - CVScores: O(n_folds) - moderate
#    - LearningCurve: O(n_sizes * n_folds) - slow
#    - DroppingCurve: O(n_sizes * n_folds) - slow
#    - ValidationCurve: O(n_params * n_folds) - very slow
#    - RFECV: O(n_features * n_folds) - extremely slow
#
# 4. BEST PRACTICES:
#    - Always use FeatureImportances after training
#    - Use CVScores with StratifiedKFold for imbalanced data
#    - Use scoring='f1' or 'average_precision' (not 'accuracy')
#    - Reduce CV folds (cv=3) for slow visualizers
#    - Sample data for very slow visualizers
#
# =============================================================================

print("KEY INSIGHTS DOCUMENTED ABOVE")
print()
print("YELLOWBRICK MODEL SELECTION SUMMARY:")
print("  Total Visualizers: 6")
print()
print("  RECOMMENDED FOR TFD:")
print("    1. FeatureImportances (FAST, CatBoost-native)")
print("    2. CVScores (MODERATE, model stability)")
print()
print("  OPTIONAL:")
print("    3. LearningCurve (SLOW, data sufficiency)")
print("    4. DroppingCurve (SLOW, feature redundancy)")
print()
print("  SKIP FOR TFD:")
print("    5. ValidationCurve (use Optuna instead)")
print("    6. RFECV (too slow, use FeatureImportances)")