In [1]:
from yellowbrick import features
from sklearn.model_selection import StratifiedKFold
import inspect

In [2]:
# 1) Get all items from yellowbrick.features
all_items = [name for name in dir(features) if not name.startswith('_')]
print("All items in yellowbrick.features module:")
print(all_items)

All items in yellowbrick.features module:
['FeatureImportances', 'JointPlot', 'JointPlotVisualizer', 'Manifold', 'PCA', 'PCADecomposition', 'ParallelCoordinates', 'RFECV', 'RadViz', 'RadialVisualizer', 'Rank1D', 'Rank2D', 'TargetType', 'base', 'feature_importances', 'joint_plot', 'jointplot', 'manifold', 'manifold_embedding', 'parallel_coordinates', 'pca', 'pca_decomposition', 'pcoords', 'projection', 'radviz', 'rank1d', 'rank2d', 'rankd', 'rfecv']


In [3]:
# Categorize items by type
functions = []
classes = []
submodules = []

for name in all_items:
    obj = getattr(features, name)
    if inspect.isfunction(obj):
        functions.append(name)
    elif inspect.isclass(obj):
        classes.append(name)
    elif inspect.ismodule(obj):
        submodules.append(name)

print(f"Functions (quick methods): {len(functions)}")
print(functions)
print(f"\nClasses (visualizers): {len(classes)}")
print(classes)
print(f"\nSubmodules: {len(submodules)}")
print(submodules)

Functions (quick methods): 9
['feature_importances', 'joint_plot', 'manifold_embedding', 'parallel_coordinates', 'pca_decomposition', 'radviz', 'rank1d', 'rank2d', 'rfecv']

Classes (visualizers): 13
['FeatureImportances', 'JointPlot', 'JointPlotVisualizer', 'Manifold', 'PCA', 'PCADecomposition', 'ParallelCoordinates', 'RFECV', 'RadViz', 'RadialVisualizer', 'Rank1D', 'Rank2D', 'TargetType']

Submodules: 7
['base', 'jointplot', 'manifold', 'pca', 'pcoords', 'projection', 'rankd']


In [None]:
# =============================================================================
# YELLOWBRICK FEATURES MODULE OVERVIEW
# =============================================================================
#
# yellowbrick.features provides visualizers for feature analysis, helping to:
# - Understand feature distributions and relationships
# - Detect class separability in feature space
# - Identify feature correlations and covariance
# - Perform dimensionality reduction visualization
# - Analyze feature importances from trained models
#
# All visualizers implement the scikit-learn Transformer API:
# - fit(X, y) - Train the visualizer
# - transform(X) - Transform data (some visualizers)
# - fit_transform(X, y) - Fit and transform in one step
# - show() - Display the visualization
#
# =============================================================================
# AVAILABLE VISUALIZERS (9 total):
# =============================================================================
#
# DATA-ONLY VISUALIZERS (no estimator required):
# 1. ParallelCoordinates - Multi-dimensional line plot
# 2. RadViz - Radial visualization on circle circumference
# 3. Rank1D - Single feature ranking (bar plot)
# 4. Rank2D - Pairwise feature ranking (heatmap)
# 5. PCA - Principal Component Analysis projection
# 6. Manifold - Non-linear dimensionality reduction (t-SNE, MDS, etc.)
# 7. JointPlot - 2D feature correlation with histograms
#
# ESTIMATOR-BASED VISUALIZERS (require sklearn estimator):
# 8. FeatureImportances - Feature importance from tree-based models
# 9. RFECV - Recursive Feature Elimination with Cross-Validation
#
# =============================================================================
print("Yellowbrick Features Module - 9 Visualizers Available")
print("  - 7 Data-only (no model needed)")
print("  - 2 Estimator-based (require trained model)")

In [5]:
# =============================================================================
# 1. PARALLEL COORDINATES VISUALIZER
# =============================================================================
#
# Purpose: Multi-dimensional feature visualization where each feature is a
# vertical axis and instances are line segments connecting feature values.
#
# Use Case: Detect "braids" of similar instances, visualize class separability,
# identify which features best separate classes.
#
# Best For:
# - Fraud Detection: See how fraudulent vs legitimate transactions differ
# - Feature Selection: Identify features that show class separation
# - Pattern Discovery: Find groups of instances with similar profiles
#
# =============================================================================

parallel_coordinates_class = features.ParallelCoordinates

parallel_coordinates_kwargs = {
    # Visualization Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "features": None,              # List of feature names (default: None)
    "classes": None,               # List of class names for legend (default: None)
    "colors": None,                # Single color or list per class (default: None)
    "colormap": None,              # String or matplotlib colormap (default: None)
    
    # Data Processing
    "normalize": None,             # 'minmax', 'maxabs', 'standard', 'l1', 'l2', or None
    "sample": 1.0,                 # Float (0-1) or int for sampling (default: 1.0)
    "shuffle": False,              # Randomize sampling (default: False)
    "random_state": None,          # Seed for reproducibility (default: None)
    
    # Display Options
    "alpha": None,                 # Transparency 0-1 (default: 0.5 fast, 0.25 otherwise)
    "fast": False,                 # Fast drawing mode (default: False)
    "vlines": True,                # Display vertical axis lines (default: True)
    "vlines_kwds": None,           # Dict for styling vertical lines (default: None)
}

# Quick method signature
# parallel_coordinates(X, y, ax=None, features=None, classes=None, colors=None,
#                      colormap=None, normalize=None, sample=1.0, shuffle=False,
#                      random_state=None, alpha=None, fast=False, vlines=True,
#                      vlines_kwds=None, show=True, **kwargs)

print("ParallelCoordinates kwargs:")
for key, value in parallel_coordinates_kwargs.items():
    print(f"  {key}: {value}")

ParallelCoordinates kwargs:
  ax: None
  features: None
  classes: None
  colors: None
  colormap: None
  normalize: None
  sample: 1.0
  shuffle: False
  random_state: None
  alpha: None
  fast: False
  vlines: True
  vlines_kwds: None


In [6]:
# =============================================================================
# 2. RADVIZ (RadialVisualizer) VISUALIZER
# =============================================================================
#
# Purpose: Plots features uniformly around a circle circumference, then plots
# data points in the interior based on normalized feature values.
#
# How it works: Each point is positioned as if springs connect it to each
# feature anchor on the circle, with spring strength = feature value.
#
# Use Case: Detect class separability in high-dimensional data,
# visualize clustering potential.
#
# Best For:
# - Classification: See if classes form distinct regions
# - Feature Engineering: Understand feature influence on class separation
# - Outlier Detection: Spots that appear far from class clusters
#
# =============================================================================

radviz_class = features.RadViz  # Alias for RadialVisualizer

radviz_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "features": None,              # List of feature names (default: None)
    "classes": None,               # List of class labels for legend (default: None)
    "colors": None,                # Color assignments per instance or class
    "colormap": None,              # Matplotlib colormap for color spacing
    "alpha": 1.0,                  # Transparency level 0-1 (default: 1.0)
}

# Quick method signature
# radviz(X, y=None, ax=None, features=None, classes=None, colors=None,
#        colormap=None, alpha=1.0, show=True, **kwargs)

# Note: RadViz handles missing values (np.nan) by omitting them and
# raises a DataWarning about the percentage of missing data.

# Internal normalization: MinMax scaling to [0, 1] range is applied automatically

print("RadViz kwargs:")
for key, value in radviz_kwargs.items():
    print(f"  {key}: {value}")

RadViz kwargs:
  ax: None
  features: None
  classes: None
  colors: None
  colormap: None
  alpha: 1.0


In [7]:
# =============================================================================
# 3. RANK1D VISUALIZER
# =============================================================================
#
# Purpose: Scores individual features using a single metric and displays
# results as a horizontal or vertical bar plot.
#
# Use Case: Quick feature importance ranking, identify features that
# deviate from normal distribution (Shapiro-Wilk test).
#
# Available Algorithms:
# - 'shapiro': Shapiro-Wilk test for normality (default)
#
# Best For:
# - Feature Selection: Rank features by statistical properties
# - Data Quality: Identify non-normal distributions
# - Preprocessing Decisions: Choose normalization strategies
#
# =============================================================================

rank1d_class = features.Rank1D

rank1d_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "algorithm": "shapiro",        # Ranking method: 'shapiro' (default)
    "features": None,              # List of feature names (default: None)
    "orient": "h",                 # 'h' (horizontal) or 'v' (vertical) bars
    "show_feature_names": True,    # Display feature labels (default: True)
    "color": None,                 # Bar color specification (default: None)
}

# Quick method signature
# rank1d(X, y=None, ax=None, algorithm='shapiro', features=None, orient='h',
#        show_feature_names=True, color=None, show=True, **kwargs)

# After fitting, access ranks via: visualizer.ranks_ (ndarray of shape (n_features,))

print("Rank1D kwargs:")
for key, value in rank1d_kwargs.items():
    print(f"  {key}: {value}")

Rank1D kwargs:
  ax: None
  algorithm: shapiro
  features: None
  orient: h
  show_feature_names: True
  color: None


In [8]:
# =============================================================================
# 4. RANK2D VISUALIZER
# =============================================================================
#
# Purpose: Performs pairwise feature comparisons and visualizes results
# as a lower-left triangle heatmap.
#
# Use Case: Detect feature correlations, identify redundant features,
# understand covariance structure.
#
# Available Algorithms:
# - 'pearson': Pearson correlation coefficient (default) - linear relationship
# - 'covariance': Covariance matrix - scale-dependent
# - 'spearman': Spearman rank correlation - monotonic relationship
# - 'kendalltau': Kendall's tau - ordinal association
#
# Best For:
# - Feature Selection: Remove highly correlated features
# - Multicollinearity Detection: Identify problematic feature pairs
# - Feature Engineering: Find features to combine
#
# =============================================================================

rank2d_class = features.Rank2D

rank2d_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "algorithm": "pearson",        # 'pearson', 'covariance', 'spearman', 'kendalltau'
    "features": None,              # List of feature names (default: None)
    "colormap": "RdBu_r",          # matplotlib colormap (default: 'RdBu_r')
    "show_feature_names": True,    # Display axis labels (default: True)
}

# Quick method signature
# rank2d(X, y=None, ax=None, algorithm='pearson', features=None,
#        colormap='RdBu_r', show_feature_names=True, show=True, **kwargs)

# After fitting, access ranks via: visualizer.ranks_ (ndarray of shape (n_features, n_features))

print("Rank2D kwargs:")
for key, value in rank2d_kwargs.items():
    print(f"  {key}: {value}")

Rank2D kwargs:
  ax: None
  algorithm: pearson
  features: None
  colormap: RdBu_r
  show_feature_names: True


In [9]:
# =============================================================================
# 5. PCA PROJECTION VISUALIZER
# =============================================================================
#
# Purpose: Projects high-dimensional data into 2D or 3D space using
# Principal Component Analysis for visualization.
#
# Use Case: Visualize class separability in reduced dimensions,
# understand variance structure, create biplots.
#
# Features:
# - 2D and 3D projections
# - Biplot (proj_features=True) shows feature vectors
# - Heatmap shows feature contributions to components
# - Supports both discrete and continuous targets
#
# Best For:
# - Dimensionality Reduction: Visualize high-dim data
# - Class Separation: See if classes are linearly separable
# - Feature Importance: Biplot shows feature directions
#
# =============================================================================

pca_class = features.PCA

pca_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "features": None,              # Column names matching dataset (default: None)
    "classes": None,               # Class labels for discrete targets (default: None)
    "scale": True,                 # Scale data before decomposition (default: True)
    "projection": 2,               # Dimensionality: 2 or 3 (default: 2)
    "proj_features": False,        # Display feature vectors as biplot (default: False)
    "colors": None,                # Instance or class colors (default: None)
    "colormap": None,              # Color scheme (default: None)
    "alpha": 0.75,                 # Point transparency 0-1 (default: 0.75)
    "random_state": None,          # Reproducibility seed (default: None)
    "colorbar": True,              # Show colorbar for continuous targets (default: True)
    "heatmap": False,              # Feature contribution heatmap (default: False)
}

# Quick method signature
# pca_decomposition(X, y=None, ax=None, features=None, classes=None, scale=True,
#                   projection=2, proj_features=False, colors=None, colormap=None,
#                   alpha=0.75, random_state=None, colorbar=True, heatmap=False,
#                   show=True, **kwargs)

# Properties:
# - visualizer.pca_components_: Feature magnitudes in principal components
# - visualizer.lax: Heatmap axes reference
# - visualizer.uax: Colorbar axes reference

print("PCA kwargs:")
for key, value in pca_kwargs.items():
    print(f"  {key}: {value}")

PCA kwargs:
  ax: None
  features: None
  classes: None
  scale: True
  projection: 2
  proj_features: False
  colors: None
  colormap: None
  alpha: 0.75
  random_state: None
  colorbar: True
  heatmap: False


In [10]:
# =============================================================================
# 6. MANIFOLD VISUALIZER
# =============================================================================
#
# Purpose: Visualizes high-dimensional data using non-linear manifold learning
# algorithms that capture complex structures PCA/SVD cannot.
#
# Available Manifold Algorithms:
# - 'lle': Locally Linear Embedding
# - 'ltsa': Local Tangent Space Alignment
# - 'hessian': Hessian-based LLE variant
# - 'modified': Regularized LLE
# - 'isomap': Geometric distance preservation
# - 'mds': Multi-Dimensional Scaling (default)
# - 'spectral': Graph-based embedding
# - 'tsne': t-Distributed Stochastic Neighbor Embedding
#
# Best For:
# - Non-linear Structure: When PCA fails to show separation
# - Cluster Visualization: t-SNE excels at showing clusters
# - Complex Patterns: Manifolds capture non-linear relationships
#
# Note: Manifold methods are computationally expensive for large datasets.
# Consider sampling for datasets > 10,000 instances.
#
# =============================================================================

manifold_class = features.Manifold

manifold_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "manifold": "mds",             # Algorithm: 'lle', 'ltsa', 'hessian', 'modified',
                                   #            'isomap', 'mds', 'spectral', 'tsne'
    "n_neighbors": None,           # Neighbors for nearest-neighbor algorithms
    "features": None,              # Feature names (default: None)
    "classes": None,               # Class labels for discrete targets (default: None)
    "colors": None,                # Custom color specification (default: None)
    "colormap": None,              # Color palette (default: None)
    "target_type": "auto",         # 'discrete', 'continuous', or 'auto'
    "projection": 2,               # Dimensionality: 2 or 3 (default: 2)
    "alpha": 0.75,                 # Transparency 0-1 (default: 0.75)
    "random_state": None,          # RNG seed for stochastic methods (default: None)
    "colorbar": True,              # Show colorbar for continuous targets (default: True)
}

# Quick method signature
# manifold_embedding(X, y=None, ax=None, manifold='mds', n_neighbors=None,
#                    features=None, classes=None, colors=None, colormap=None,
#                    target_type='auto', projection=2, alpha=0.75,
#                    random_state=None, colorbar=True, show=True, **kwargs)

# Property:
# - visualizer.manifold: Access underlying sklearn transformer

print("Manifold kwargs:")
for key, value in manifold_kwargs.items():
    print(f"  {key}: {value}")

Manifold kwargs:
  ax: None
  manifold: mds
  n_neighbors: None
  features: None
  classes: None
  colors: None
  colormap: None
  target_type: auto
  projection: 2
  alpha: 0.75
  random_state: None
  colorbar: True


In [11]:
# =============================================================================
# 7. JOINTPLOT VISUALIZER
# =============================================================================
#
# Purpose: Visualizes the 2D correlation between two features or a feature
# and the target, with marginal histograms on axes.
#
# Use Case: Understand bivariate relationships, detect non-linear
# correlations, visualize distributions.
#
# Correlation Algorithms:
# - 'pearson': Linear correlation (default)
# - 'covariance': Covariance
# - 'spearman': Rank correlation
# - 'kendalltau': Ordinal association
#
# Best For:
# - Feature-Target Relationship: Understand single feature's predictive power
# - Feature Pairs: Visualize correlation between two features
# - Distribution Analysis: See marginal distributions alongside joint
#
# =============================================================================

jointplot_class = features.JointPlot

jointplot_kwargs = {
    # Core Configuration
    "ax": None,                    # matplotlib Axes object (default: None)
    "columns": None,               # int/str/list: which columns to visualize
    "correlation": "pearson",      # 'pearson', 'covariance', 'spearman', 'kendalltau'
    "kind": "scatter",             # 'scatter' or 'hex' plot type
    "hist": True,                  # Histogram: 'density', 'frequency', True, False
    "alpha": 0.65,                 # Transparency level 0-1 (default: 0.65)
    "joint_kws": None,             # Additional joint plot arguments (dict)
    "hist_kws": None,              # Additional histogram arguments (dict)
}

# Quick method signature
# joint_plot(X, y=None, ax=None, columns=None, correlation='pearson',
#            kind='scatter', hist=True, alpha=0.65, joint_kws=None,
#            hist_kws=None, show=True, **kwargs)

# Properties:
# - visualizer.xhax: Top histogram axes
# - visualizer.yhax: Right histogram axes

print("JointPlot kwargs:")
for key, value in jointplot_kwargs.items():
    print(f"  {key}: {value}")

JointPlot kwargs:
  ax: None
  columns: None
  correlation: pearson
  kind: scatter
  hist: True
  alpha: 0.65
  joint_kws: None
  hist_kws: None


In [None]:
# =============================================================================
# 8. FEATURE IMPORTANCES VISUALIZER (Estimator-Based)
# =============================================================================
#
# Purpose: Visualizes feature importances from tree-based models as a
# horizontal bar chart, showing which features the model relies on most.
#
# Requirements: Estimator must have `feature_importances_` attribute
# Works with: XGBoost, CatBoost, RandomForest, GradientBoosting, etc.
#
# Use Case: Understand which features drive model predictions,
# identify features to keep/remove for model simplification.
#
# Best For:
# - Model Interpretation: See what the model learned
# - Feature Selection: Identify low-importance features to remove
# - Debugging: Verify model uses expected features
#
# =============================================================================

feature_importances_class = features.FeatureImportances

feature_importances_kwargs = {
    # Core Configuration (REQUIRES ESTIMATOR)
    "estimator": None,             # sklearn estimator with feature_importances_
    "ax": None,                    # matplotlib Axes object (default: None)
    "labels": None,                # Feature names (default: None, uses indices)
    "relative": True,              # Show relative importance (sums to 1)
    "absolute": False,             # Use absolute values of importances
    "xlabel": None,                # X-axis label (default: auto)
    "stack": False,                # Stack bars for multiple estimators
    "colors": None,                # Bar colors (default: None)
    "colormap": None,              # Colormap for multiple estimators
    "topn": None,                  # Show only top N features (default: all)
    "is_fitted": "auto",           # Whether estimator is already fitted
}

# Quick method signature
# feature_importances(estimator, X, y, ax=None, labels=None, relative=True,
#                     absolute=False, xlabel=None, stack=False, colors=None,
#                     colormap=None, topn=None, is_fitted='auto', show=True, **kwargs)

# After fitting, access importances via:
# - visualizer.feature_importances_: Raw importance values
# - visualizer.features_: Feature names/indices

print("FeatureImportances kwargs (REQUIRES ESTIMATOR):")
for key, value in feature_importances_kwargs.items():
    print(f"  {key}: {value}")

In [None]:
# =============================================================================
# 9. RFECV VISUALIZER (Estimator-Based)
# =============================================================================
#
# Purpose: Recursive Feature Elimination with Cross-Validation.
# Progressively removes features and plots CV scores vs number of features.
#
# Requirements: Estimator must have `coef_` or `feature_importances_` attribute
# Works with: Most sklearn classifiers/regressors
#
# Use Case: Find optimal number of features for model performance,
# identify minimal feature set that maintains accuracy.
#
# WARNING: VERY SLOW! Performs CV for each feature subset.
# For N features with 5-fold CV, it runs 5*N model fits.
#
# Best For:
# - Feature Selection: Find optimal feature count
# - Model Simplification: Remove unnecessary features
# - Overfitting Prevention: Simpler models generalize better
#
# =============================================================================

rfecv_class = features.RFECV

rfecv_kwargs = {
    # Core Configuration (REQUIRES ESTIMATOR)
    "estimator": None,             # sklearn estimator with coef_ or feature_importances_
    "ax": None,                    # matplotlib Axes object (default: None)
    "step": 1,                     # Features to remove per iteration (int or float 0-1)
    "groups": None,                # Group labels for GroupKFold
    "cv": None,                    # Cross-validation strategy (default: 5-fold)
    "scoring": None,               # Scoring metric (default: estimator's score)
    "min_features_to_select": 1,   # Minimum features to keep (default: 1)
}

# Quick method signature
# rfecv(estimator, X, y, ax=None, step=1, groups=None, cv=None, scoring=None,
#       min_features_to_select=1, show=True, **kwargs)

# After fitting, access results via:
# - visualizer.n_features_: Optimal number of features
# - visualizer.support_: Boolean mask of selected features
# - visualizer.ranking_: Feature ranking (1 = selected)
# - visualizer.cv_scores_: CV scores for each feature count
# - visualizer.rfe_estimator_: The underlying RFE estimator

print("RFECV kwargs (REQUIRES ESTIMATOR - VERY SLOW!):")
for key, value in rfecv_kwargs.items():
    print(f"  {key}: {value}")

In [12]:
# =============================================================================
# RECOMMENDED CONFIGURATION FOR TRANSACTION FRAUD DETECTION (TFD)
# =============================================================================
#
# TFD Characteristics:
# - Binary classification (fraud=1, non-fraud=0)
# - Highly imbalanced (~1-5% fraud rate)
# - Many numerical features (transaction amounts, velocities)
# - Categorical features encoded as numerical
#
# Feature Analysis Goals:
# 1. Identify features that separate fraud from non-fraud
# 2. Detect redundant/correlated features
# 3. Understand feature distributions
# 4. Visualize class separation in reduced dimensions
#
# =============================================================================

# Binary classes for TFD
tfd_classes = [0, 1]  # or ["Non-Fraud", "Fraud"]

# -----------------------------------------------------------------------------
# PRIMARY VISUALIZERS - Most useful for fraud detection
# -----------------------------------------------------------------------------

primary_feature_visualizers = {
    # Rank2D: Essential for identifying correlated features
    # Use to remove redundant features before training
    "Rank2D": {
        "algorithm": "pearson",    # Linear correlation for numerical features
        "colormap": "RdBu_r",       # Red-Blue diverging colormap
        "show_feature_names": True,
    },
    
    # PCA: Quick view of class separation in 2D
    # Good first check for linear separability
    "PCA": {
        "classes": tfd_classes,
        "scale": True,              # Always scale for PCA
        "projection": 2,            # 2D for interpretability
        "proj_features": False,     # Enable for biplot (slower)
        "alpha": 0.5,               # Transparency for overlapping points
        "heatmap": False,           # Enable to see feature contributions
    },
    
    # ParallelCoordinates: See how fraud differs across features
    # Sample data for large datasets
    "ParallelCoordinates": {
        "classes": tfd_classes,
        "sample": 0.05,             # Sample 5% for large datasets
        "shuffle": True,            # Randomize sampling
        "normalize": "minmax",      # Normalize for comparable scales
        "alpha": 0.3,               # Low alpha for many lines
        "fast": True,               # Fast mode for large data
    },
}

print("Primary Feature Visualizers for TFD:")
for name, kwargs in primary_feature_visualizers.items():
    print(f"\n  {name}:")
    for key, value in kwargs.items():
        print(f"    {key}: {value}")

Primary Feature Visualizers for TFD:

  Rank2D:
    algorithm: pearson
    colormap: RdBu_r
    show_feature_names: True

  PCA:
    classes: [0, 1]
    scale: True
    projection: 2
    proj_features: False
    alpha: 0.5
    heatmap: False

  ParallelCoordinates:
    classes: [0, 1]
    sample: 0.05
    shuffle: True
    normalize: minmax
    alpha: 0.3
    fast: True


In [13]:
# -----------------------------------------------------------------------------
# SECONDARY VISUALIZERS - Useful for deeper analysis
# -----------------------------------------------------------------------------

secondary_feature_visualizers = {
    # Rank1D: Quick feature normality check
    "Rank1D": {
        "algorithm": "shapiro",     # Shapiro-Wilk normality test
        "orient": "h",              # Horizontal bars
        "show_feature_names": True,
    },
    
    # RadViz: Circular visualization for class separation
    # Good for detecting clustering potential
    "RadViz": {
        "classes": tfd_classes,
        "alpha": 0.5,               # Transparency for overlapping
    },
    
    # Manifold (t-SNE): Non-linear dimensionality reduction
    # Better than PCA for complex, non-linear structures
    # WARNING: Computationally expensive!
    "Manifold": {
        "manifold": "tsne",         # t-SNE usually best for visualization
        "classes": tfd_classes,
        "projection": 2,
        "alpha": 0.5,
        "random_state": 42,         # Reproducibility
    },
    
    # JointPlot: Feature-target correlation
    # Visualize individual feature's relationship with fraud
    "JointPlot": {
        "correlation": "spearman",  # Rank correlation (robust to outliers)
        "kind": "hex",              # Hexbin for large datasets
        "hist": "density",          # Density histograms
        "alpha": 0.7,
    },
}

print("Secondary Feature Visualizers for TFD:")
for name, kwargs in secondary_feature_visualizers.items():
    print(f"\n  {name}:")
    for key, value in kwargs.items():
        print(f"    {key}: {value}")

Secondary Feature Visualizers for TFD:

  Rank1D:
    algorithm: shapiro
    orient: h
    show_feature_names: True

  RadViz:
    classes: [0, 1]
    alpha: 0.5

  Manifold:
    manifold: tsne
    classes: [0, 1]
    projection: 2
    alpha: 0.5
    random_state: 42

  JointPlot:
    correlation: spearman
    kind: hex
    hist: density
    alpha: 0.7


In [None]:
# =============================================================================
# CONSOLIDATED CONFIGURATION FOR NOTEBOOK 010 INTEGRATION
# =============================================================================

def yellowbrick_feature_analysis_kwargs(
    classes,
    features=None,
    estimator=None,
    cv=None,
    verbose=True
):
    """
    Generate kwargs for ALL yellowbrick.features visualizers.

    Parameters:
    -----------
    classes : list
        List of class labels [0, 1] for binary classification
    features : list, optional
        List of feature names (default: None, auto-detected)
    estimator : sklearn estimator, optional
        Required for FeatureImportances/RFECV (default: None, skips these)
    cv : cross-validator, optional
        Cross-validation strategy for RFECV (default: None uses StratifiedKFold)
    verbose : bool
        Print configuration details (default: True)

    Returns:
    --------
    dict : Dictionary of visualizer name -> kwargs
    """
    from sklearn.model_selection import StratifiedKFold

    if cv is None:
        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    kwargs = {
        # -----------------------------------------------------------------
        # PRIMARY VISUALIZERS (Fast, always recommended)
        # -----------------------------------------------------------------
        "Rank2D": {
            "algorithm": "pearson",
            "features": features,
            "colormap": "RdBu_r",
            "show_feature_names": True,
        },
        "PCA": {
            "classes": classes,
            "features": features,
            "scale": True,
            "projection": 2,
            "alpha": 0.5,
        },
        "ParallelCoordinates": {
            "classes": classes,
            "features": features,
            "sample": 0.05,
            "shuffle": True,
            "normalize": "minmax",
            "alpha": 0.3,
            "fast": True,
        },

        # -----------------------------------------------------------------
        # SECONDARY VISUALIZERS (Optional, for deeper analysis)
        # -----------------------------------------------------------------
        "Rank1D": {
            "algorithm": "shapiro",
            "features": features,
            "orient": "h",
            "show_feature_names": True,
        },
        "RadViz": {
            "classes": classes,
            "features": features,
            "alpha": 0.5,
        },
        "JointPlot": {
            "columns": None,  # Set to column index/name for specific feature
            "correlation": "spearman",
            "kind": "hex",
            "hist": "density",
            "alpha": 0.7,
        },

        # -----------------------------------------------------------------
        # SLOW VISUALIZERS (Uncomment only for small/sampled datasets)
        # -----------------------------------------------------------------
        # "Manifold": {
        #     "manifold": "tsne",
        #     "classes": classes,
        #     "features": features,
        #     "projection": 2,
        #     "alpha": 0.5,
        #     "random_state": 42,
        # },
    }

    # -----------------------------------------------------------------
    # ESTIMATOR-BASED VISUALIZERS (Only if estimator provided)
    # -----------------------------------------------------------------
    if estimator is not None:
        kwargs["FeatureImportances"] = {
            "estimator": estimator,
            "labels": features,
            "relative": True,
            "absolute": False,
            "stack": False,
            "topn": None,
        }
        # RFECV is very slow, uncomment if needed
        # kwargs["RFECV"] = {
        #     "estimator": estimator,
        #     "cv": cv,
        #     "scoring": "average_precision",
        #     "step": 1,
        # }

    if verbose:
        print()
        print("+" + "=" * 62 + "+")
        print("|" + " YELLOWBRICK FEATURE ANALYSIS CONFIGURATION ".center(62) + "|")
        print("+" + "=" * 62 + "+")
        print(f"| {'Classes:':<20} {str(classes):<40} |")
        print(f"| {'Features:':<20} {(str(len(features)) + ' features') if features else 'auto-detect':<40} |")
        print(f"| {'Estimator:':<20} {(type(estimator).__name__ if estimator else 'None'):<40} |")
        print("+" + "-" * 62 + "+")
        print(f"| {'Configured ' + str(len(kwargs)) + ' visualizers:':<62} |")
        print("+" + "-" * 62 + "+")
        for i, name in enumerate(kwargs.keys(), 1):
            status = "[DATA-ONLY]" if name not in ["FeatureImportances", "RFECV"] else "[ESTIMATOR]"
            print(f"|   {i:>2}. {name:<30} {status:<24} |")
        print("+" + "=" * 62 + "+")
        print()

    return kwargs


def yellowbrick_feature_analysis_visualizers(
    yb_feature_analysis_kwargs,
    X,
    y,
    YELLOWBRICK_PATH,
    verbose=True
):
    """
    Run all feature analysis visualizers and save to disk.

    Parameters:
    -----------
    yb_feature_analysis_kwargs : dict
        Output from yellowbrick_feature_analysis_kwargs()
    X : pd.DataFrame or np.ndarray
        Feature matrix
    y : pd.Series or np.ndarray
        Target vector
    YELLOWBRICK_PATH : str
        Path to save visualizations
    verbose : bool
        Print progress details (default: True)

    Returns:
    --------
    dict : Summary with successful, failed counts and timing info
    """
    import matplotlib.pyplot as plt
    import os
    import time
    from datetime import datetime

    os.makedirs(f"{YELLOWBRICK_PATH}/feature_analysis", exist_ok=True)

    # Visualizers grouped by their fit method
    fit_transform_visualizers = ["ParallelCoordinates", "PCA", "Manifold"]
    fit_only_visualizers = ["FeatureImportances", "RFECV", "JointPlot"]

    total = len(yb_feature_analysis_kwargs)
    results = []
    start_total = time.time()

    if verbose:
        print()
        print("+" + "=" * 62 + "+")
        print("|" + " YELLOWBRICK FEATURE ANALYSIS ".center(62) + "|")
        print("+" + "=" * 62 + "+")
        print(f"| {'Started:':<15} {datetime.now().strftime('%Y-%m-%d %H:%M:%S'):<45} |")
        print(f"| {'Data shape:':<15} {str(X.shape[0]) + ' samples x ' + str(X.shape[1]) + ' features':<45} |")
        print(f"| {'Output:':<15} {YELLOWBRICK_PATH + '/feature_analysis/':<45} |")
        print(f"| {'Visualizers:':<15} {str(total) + ' to process':<45} |")
        print("+" + "=" * 62 + "+")
        print()

    for idx, (visualizer_name, kwargs) in enumerate(yb_feature_analysis_kwargs.items(), 1):
        start_time = time.time()
        status = "OK"
        error_msg = None
        output_path = None

        if verbose:
            progress = f"[{idx}/{total}]"
            bar_width = 20
            filled = int(bar_width * idx / total)
            bar = "█" * filled + "░" * (bar_width - filled)
            print(f"{progress} |{bar}| {visualizer_name}...", end=" ", flush=True)

        try:
            visualizer = getattr(features, visualizer_name)(**kwargs)

            if visualizer_name in fit_transform_visualizers:
                visualizer.fit_transform(X, y)
            elif visualizer_name in fit_only_visualizers:
                visualizer.fit(X, y)
            else:
                visualizer.fit(X, y)
                visualizer.transform(X)

            visualizer.show()
            output_path = f"{YELLOWBRICK_PATH}/feature_analysis/{visualizer.__class__.__name__}.png"
            visualizer.fig.savefig(output_path, dpi=150, bbox_inches="tight")
            plt.clf()
            plt.close('all')

        except Exception as e:
            status = "FAILED"
            error_msg = str(e)
            plt.clf()
            plt.close('all')

        elapsed = time.time() - start_time
        results.append({
            "name": visualizer_name,
            "status": status,
            "time": elapsed,
            "output": output_path,
            "error": error_msg
        })

        if verbose:
            if status == "OK":
                print(f"✓ ({elapsed:.2f}s)")
            else:
                print(f"✗ FAILED ({elapsed:.2f}s)")
                print(f"         └─ Error: {error_msg[:50]}..." if len(str(error_msg)) > 50 else f"         └─ Error: {error_msg}")

    total_time = time.time() - start_total
    successful = sum(1 for r in results if r["status"] == "OK")
    failed = sum(1 for r in results if r["status"] == "FAILED")

    if verbose:
        print()
        print("+" + "=" * 62 + "+")
        print("|" + " SUMMARY ".center(62) + "|")
        print("+" + "=" * 62 + "+")
        print(f"| {'Completed:':<15} {datetime.now().strftime('%Y-%m-%d %H:%M:%S'):<45} |")
        print(f"| {'Total time:':<15} {f'{total_time:.2f} seconds':<45} |")
        print(f"| {'Successful:':<15} {f'{successful}/{total} visualizers':<45} |")
        if failed > 0:
            print(f"| {'Failed:':<15} {f'{failed}/{total} visualizers':<45} |")
        print("+" + "-" * 62 + "+")
        print("|" + " TIMING BREAKDOWN ".center(62) + "|")
        print("+" + "-" * 62 + "+")
        
        # Sort by time descending
        sorted_results = sorted(results, key=lambda x: x["time"], reverse=True)
        for r in sorted_results:
            status_icon = "✓" if r["status"] == "OK" else "✗"
            time_bar_width = 15
            max_time = max(r["time"] for r in results) if results else 1
            filled = int(time_bar_width * r["time"] / max_time) if max_time > 0 else 0
            time_bar = "▓" * filled + "░" * (time_bar_width - filled)
            print(f"| {status_icon} {r['name']:<25} |{time_bar}| {r['time']:>6.2f}s |")
        
        print("+" + "=" * 62 + "+")
        
        if successful > 0:
            print()
            print("Saved visualizations:")
            for r in results:
                if r["status"] == "OK" and r["output"]:
                    print(f"  → {r['output']}")
        print()

    return {
        "successful": successful,
        "failed": failed,
        "total_time": total_time,
        "results": results
    }


print("Functions defined for notebook 010 integration:")
print("  - yellowbrick_feature_analysis_kwargs(classes, features, estimator, cv, verbose)")
print("  - yellowbrick_feature_analysis_visualizers(kwargs, X, y, path, verbose)")
print()
print("ALL 9 VISUALIZERS SUPPORTED:")
print("  Data-only: Rank1D, Rank2D, PCA, ParallelCoordinates, RadViz, JointPlot, Manifold")
print("  Estimator-based: FeatureImportances, RFECV (pass estimator to kwargs function)")

In [None]:
# =============================================================================
# KEY INSIGHTS FOR TFD FEATURE ANALYSIS
# =============================================================================
#
# 1. VISUALIZER SELECTION STRATEGY:
#    - Start with Rank2D to identify correlated features
#    - Use PCA for quick linear separability check
#    - Use ParallelCoordinates to see feature patterns by class
#    - Use FeatureImportances after training to see model's view
#    - Use Manifold (t-SNE) only for small samples or deep analysis
#
# 2. PERFORMANCE CONSIDERATIONS:
#    | Visualizer          | Speed    | Requires Estimator |
#    |---------------------|----------|-------------------|
#    | Rank1D              | Fast     | No                |
#    | Rank2D              | Fast     | No                |
#    | PCA                 | Fast     | No                |
#    | JointPlot           | Fast     | No                |
#    | ParallelCoordinates | Moderate | No                |
#    | RadViz              | Moderate | No                |
#    | FeatureImportances  | Fast     | YES               |
#    | Manifold            | SLOW     | No                |
#    | RFECV               | VERY SLOW| YES               |
#
# 3. FRAUD DETECTION SPECIFIC:
#    - Class imbalance: Use alpha transparency to see minority class
#    - Many features: Sample or select top features first
#    - Look for "braids" in ParallelCoordinates where fraud differs
#    - Check if fraud forms distinct clusters in PCA/Manifold
#    - Use FeatureImportances to identify key fraud indicators
#
# 4. INTEGRATION WITH SKLEARN WORKFLOW:
#    - Run data-only visualizers BEFORE training (Rank2D, PCA, etc.)
#    - Run estimator-based visualizers AFTER training (FeatureImportances)
#    - Use Rank2D to inform feature selection
#    - Use PCA insights for choosing model complexity
#    - Save visualizations for experiment tracking (MLflow)
#
# 5. USAGE EXAMPLES:
#
#    # Data-only analysis (no model needed):
#    kwargs = yellowbrick_feature_analysis_kwargs(classes=[0, 1])
#    yellowbrick_feature_analysis_visualizers(kwargs, X, y, "output/")
#
#    # With estimator for FeatureImportances:
#    model = XGBClassifier().fit(X_train, y_train)
#    kwargs = yellowbrick_feature_analysis_kwargs(
#        classes=[0, 1],
#        estimator=model
#    )
#    yellowbrick_feature_analysis_visualizers(kwargs, X, y, "output/")
#
# =============================================================================
print("Key insights documented above.")
print()
print("YELLOWBRICK FEATURES SUMMARY:")
print("  - Total Visualizers: 9")
print()
print("  DATA-ONLY (7):")
print("    Primary: Rank2D, PCA, ParallelCoordinates")
print("    Secondary: Rank1D, RadViz, JointPlot")
print("    Slow: Manifold (t-SNE)")
print()
print("  ESTIMATOR-BASED (2):")
print("    FeatureImportances (tree-based models)")
print("    RFECV (recursive feature elimination)")