In [19]:
# Standard libraries
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import display, clear_output
import seaborn as sns
import networkx as nx

# Scikit-learn imports
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.decomposition import KernelPCA
from sklearn.metrics import r2_score

from abc import ABC, abstractmethod
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.decomposition import PCA, KernelPCA
import pandas as pd
from tabulate import tabulate


In [2]:
# Function to generate nonlinear regression data
def generate_nonlinear_data(n_samples=100, noise=0.3, random_state=None):
    """Generate a nonlinear regression dataset."""
    if random_state is not None:
        np.random.seed(random_state)
    
    X = np.random.uniform(-3, 3, n_samples).reshape(-1, 1)
    y = np.sin(X.ravel()) + X.ravel()**2 / 6 + noise * np.random.randn(n_samples)
    return X, y

# Function to generate binary classification datasets
def generate_binary_datasets(n_samples=200, noise=0.2, random_state=42):
    """Generate various binary classification datasets."""
    datasets = {
        "moons": make_moons(n_samples=n_samples, noise=noise, random_state=random_state),
        "circles": make_circles(n_samples=n_samples, noise=noise, factor=0.5, random_state=random_state),
        "linearly_separable": make_classification(
            n_samples=n_samples, n_features=2, n_redundant=0, n_informative=2,
            random_state=random_state, n_clusters_per_class=1
        )
    }
    return datasets

# Function to generate multiclass data
def generate_multiclass_data(n_samples=300, n_classes=3, n_features=2, random_state=42):
    """Generate a multiclass classification dataset."""
    X, y = make_classification(
        n_samples=n_samples, 
        n_features=n_features, 
        n_informative=n_features, 
        n_redundant=0,
        n_classes=n_classes, 
        n_clusters_per_class=1, 
        random_state=random_state
    )
    return X, y

# Function to preprocess data (standardize and split)
def preprocess_data(X, y, test_size=0.3, random_state=42):
    """Standardize and split data into train and test sets."""
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state
    )
    
    # Standardize
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return X_train_scaled, X_test_scaled, y_train, y_test, scaler

In [3]:
import numpy as np
from abc import ABC, abstractmethod

class Kernel(ABC):
    """
    Abstract base class for kernels.
    
    A kernel function k(x, y) maps pairs of input points from the original space
    to a similarity score in the feature space without explicitly computing the 
    coordinates in the feature space. This is known as the "kernel trick".
    
    In RKHS theory, a kernel function corresponds to an inner product in some 
    feature space: k(x, y) = <φ(x), φ(y)>, where φ is a feature map.
    """
    
    @abstractmethod
    def __call__(self, X1, X2):
        """
        Compute the kernel matrix between X1 and X2.
        
        Parameters:
        -----------
        X1 : ndarray of shape (n_samples_1, n_features)
            First set of samples
        X2 : ndarray of shape (n_samples_2, n_features)
            Second set of samples
            
        Returns:
        --------
        K : ndarray of shape (n_samples_1, n_samples_2)
            Kernel matrix
        """
        pass
    
    def compute_gram_matrix(self, X):
        """
        Compute the Gram matrix for a dataset X.
        
        The Gram matrix is a square matrix of kernel evaluations between all pairs
        of points in the dataset. It is a key component in kernel methods.
        
        Parameters:
        -----------
        X : ndarray of shape (n_samples, n_features)
            Input data
            
        Returns:
        --------
        K : ndarray of shape (n_samples, n_samples)
            Gram matrix
        """
        return self(X, X)
    
    def is_psd(self, X, tol=1e-8):
        """
        Check if the kernel matrix is positive semi-definite (PSD).
        
        A valid kernel function must produce a PSD kernel matrix, which is a 
        fundamental property in RKHS theory.
        
        Parameters:
        -----------
        X : ndarray of shape (n_samples, n_features)
            Input data
        tol : float
            Tolerance for eigenvalue positivity check
            
        Returns:
        --------
        is_psd : bool
            True if the kernel matrix is PSD, False otherwise
        eigenvalues : ndarray
            Eigenvalues of the kernel matrix
        """
        K = self.compute_gram_matrix(X)
        
        # Ensure the matrix is symmetric
        if not np.allclose(K, K.T):
            print("Warning: Kernel matrix is not symmetric.")
            K = (K + K.T) / 2.0
        
        # Compute eigenvalues
        eigenvalues = np.linalg.eigvalsh(K)
        
        # Check if all eigenvalues are positive (within numerical tolerance)
        is_psd = np.all(eigenvalues > -tol)
        
        return is_psd, eigenvalues
    
    def get_feature_map_example(self, x):
        """
        Return an example of the feature map for a single data point.
        This is for educational purposes to show the explicit mapping.
        
        Parameters:
        -----------
        x : ndarray of shape (n_features,)
            Input data point
            
        Returns:
        --------
        phi_x : ndarray or str
            Feature map of x or a description of the feature map
        """
        raise NotImplementedError("Feature map example not implemented for this kernel")


In [4]:
class LinearKernel(Kernel):
    """
    Linear kernel: k(x, y) = <x, y>
    
    The linear kernel is the simplest kernel function, corresponding to the 
    standard dot product in the input space. It doesn't map the data to a 
    higher-dimensional space.
    """
    
    def __call__(self, X1, X2):
        """
        Compute the linear kernel matrix between X1 and X2.
        
        Parameters:
        -----------
        X1 : ndarray of shape (n_samples_1, n_features)
            First set of samples
        X2 : ndarray of shape (n_samples_2, n_features)
            Second set of samples
            
        Returns:
        --------
        K : ndarray of shape (n_samples_1, n_samples_2)
            Kernel matrix
        """
        return np.dot(X1, X2.T)
    
    def get_feature_map_example(self, x):
        """
        For the linear kernel, the feature map is the identity function.
        
        Parameters:
        -----------
        x : ndarray of shape (n_features,)
            Input data point
            
        Returns:
        --------
        phi_x : ndarray
            Feature map of x (same as x for linear kernel)
        """
        return x

In [5]:
class PolynomialKernel(Kernel):
    """
    Polynomial kernel: k(x, y) = (gamma * <x, y> + coef0)^degree
    
    The polynomial kernel maps the data into a higher-dimensional space where
    the new features correspond to all possible polynomial combinations of the
    original features up to the specified degree.
    """
    
    def __init__(self, degree=3, gamma=1.0, coef0=1.0):
        """
        Initialize the polynomial kernel.
        
        Parameters:
        -----------
        degree : int, default=3
            Degree of the polynomial
        gamma : float, default=1.0
            Scale parameter
        coef0 : float, default=1.0
            Independent term
        """
        self.degree = degree
        self.gamma = gamma
        self.coef0 = coef0
    
    def __call__(self, X1, X2):
        """
        Compute the polynomial kernel matrix between X1 and X2.
        
        Parameters:
        -----------
        X1 : ndarray of shape (n_samples_1, n_features)
            First set of samples
        X2 : ndarray of shape (n_samples_2, n_features)
            Second set of samples
            
        Returns:
        --------
        K : ndarray of shape (n_samples_1, n_samples_2)
            Kernel matrix
        """
        return (self.gamma * np.dot(X1, X2.T) + self.coef0) ** self.degree
    
    def get_feature_map_example(self, x):
        """
        Return an example of the feature map for the polynomial kernel.
        This is only implemented for 2D data and degree 2 for simplicity.
        
        Parameters:
        -----------
        x : ndarray of shape (2,)
            Input data point (must be 2D)
            
        Returns:
        --------
        phi_x : ndarray or str
            Feature map of x or a description of the feature map
        """
        if len(x) != 2 or self.degree != 2:
            return (f"For a polynomial kernel of degree {self.degree}, the feature map maps to a "
                   f"space of dimension C(n+d,d) where n is the input dimension and d is the degree.")
        
        # For a 2D input x = [x1, x2] and degree 2, the feature map is:
        # φ(x) = [1, sqrt(2*gamma)*x1, sqrt(2*gamma)*x2, gamma*x1^2, sqrt(2)*gamma*x1*x2, gamma*x2^2]
        # This is a simplified version assuming coef0 = 1
        x1, x2 = x
        sqrt_2gamma = np.sqrt(2 * self.gamma)
        
        return np.array([
            1, 
            sqrt_2gamma * x1, 
            sqrt_2gamma * x2, 
            self.gamma * x1**2, 
            sqrt_2gamma * self.gamma * x1 * x2, 
            self.gamma * x2**2
        ])


In [6]:
class RBFKernel(Kernel):
    """
    Radial Basis Function (RBF) kernel: k(x, y) = exp(-gamma * ||x - y||^2)
    
    The RBF kernel, also known as the Gaussian kernel, maps the data into an 
    infinite-dimensional space. It is one of the most widely used kernels due to 
    its flexibility and theoretical properties. In RKHS theory, the RBF kernel 
    induces a space of smooth functions.
    """
    
    def __init__(self, gamma=1.0):
        """
        Initialize the RBF kernel.
        
        Parameters:
        -----------
        gamma : float, default=1.0
            Scale parameter (inverse of the standard deviation)
        """
        self.gamma = gamma
    
    def __call__(self, X1, X2):
        """
        Compute the RBF kernel matrix between X1 and X2.
        
        Parameters:
        -----------
        X1 : ndarray of shape (n_samples_1, n_features)
            First set of samples
        X2 : ndarray of shape (n_samples_2, n_features)
            Second set of samples
            
        Returns:
        --------
        K : ndarray of shape (n_samples_1, n_samples_2)
            Kernel matrix
        """
        # Compute squared Euclidean distances efficiently
        X1_norm = np.sum(X1 ** 2, axis=1).reshape(-1, 1)
        X2_norm = np.sum(X2 ** 2, axis=1).reshape(1, -1)
        distances = X1_norm + X2_norm - 2 * np.dot(X1, X2.T)
        
        # Apply the RBF function
        return np.exp(-self.gamma * distances)
    
    def get_feature_map_example(self, x):
        """
        The RBF kernel maps to an infinite-dimensional space,
        so we can't explicitly represent the full feature map.
        However, we can show the first few terms of its Taylor expansion.
        
        Parameters:
        -----------
        x : ndarray of shape (n_features,)
            Input data point
            
        Returns:
        --------
        description : str
            Description of the feature map
        """
        return ("The RBF kernel maps to an infinite-dimensional space. "
                "Its feature map can be represented as an infinite series using "
                "the Taylor expansion of the exponential function.")


In [7]:
KERNEL_DICT = {
    'linear': LinearKernel,
    'polynomial': PolynomialKernel,
    'rbf': RBFKernel,
    
}

def get_kernel(kernel_name, **kernel_params):
    """
    Get a kernel instance by name.
    
    Parameters:
    -----------
    kernel_name : str
        Name of the kernel. Must be one of: 'linear', 'polynomial', 'rbf', 
        'laplacian', 'sigmoid'.
    **kernel_params : dict
        Additional parameters to be passed to the kernel constructor.
    
    Returns:
    --------
    kernel : Kernel
        Kernel instance
    """
    if kernel_name not in KERNEL_DICT:
        raise ValueError(f"Unknown kernel: {kernel_name}. Available kernels: {list(KERNEL_DICT.keys())}")
    
    return KERNEL_DICT[kernel_name](**kernel_params)

In [8]:
def _visualize_kernel(dataset_type='regression', kernel_type='rbf', gamma=1.0, degree=2, coef0=1.0, elev=30, azim=30):
    """
    Academic visualization of kernel properties focusing on dataset, kernel heatmap, and feature space mapping.
    
    Parameters:
    -----------
    dataset_type : str
        Type of dataset to use ('regression', 'binary', 'multiclass')
    kernel_type : str
        Type of kernel to visualize ('linear', 'polynomial', 'rbf')
    gamma : float
        Gamma parameter for kernels
    degree : int
        Degree for polynomial kernel
    coef0 : float
        Coefficient for polynomial kernel
    elev : int
        Elevation angle for 3D plot
    azim : int
        Azimuth angle for 3D plot
    """
    # Set random seed for reproducibility
    np.random.seed(42)
    
    # Generate appropriate dataset
    if dataset_type == 'regression':
        # Use a fixed grid for regression to avoid random patterns
        X = np.linspace(-3, 3, 100).reshape(-1, 1)
        y = np.sin(X.ravel()) + X.ravel()**2 / 6 + 0.3 * np.random.randn(100)
        title_prefix = 'Regression'
        X_vis = X
    elif dataset_type == 'binary':
        datasets = generate_binary_datasets(n_samples=200, noise=0.2, random_state=42)
        X, y = datasets['moons']
        title_prefix = 'Binary Classification (Moons)'
        X_vis = X
    elif dataset_type == 'multiclass':
        X, y = generate_multiclass_data(n_samples=300, n_classes=3, n_features=2, random_state=42)
        title_prefix = 'Multiclass Classification'
        X_vis = X
    
    # Create figure with academic styling
    
    fig = plt.figure(figsize=(15, 5))
    
    # Plot original data
    ax1 = fig.add_subplot(131)
    if dataset_type == 'regression':
        scatter = ax1.scatter(X.ravel(), y, c=X.ravel(), cmap='viridis', s=40, alpha=0.8, edgecolors='w', linewidth=0.5)
        ax1.set_title(f'{title_prefix} Dataset', fontsize=14, fontweight='bold')
        ax1.set_xlabel('$x$', fontsize=12)
        ax1.set_ylabel('$y$', fontsize=12)
        fig.colorbar(scatter, ax=ax1, label='$x$ value')
    else:
        scatter = ax1.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', s=40, alpha=0.8, edgecolors='w', linewidth=0.5)
        ax1.set_title(f'{title_prefix} Dataset', fontsize=14, fontweight='bold')
        ax1.set_xlabel('$x_1$', fontsize=12)
        ax1.set_ylabel('$x_2$', fontsize=12)
        legend1 = ax1.legend(*scatter.legend_elements(), title="Classes", frameon=True)
        legend1.get_frame().set_facecolor('white')
        legend1.get_frame().set_alpha(0.9)
        ax1.add_artist(legend1)
    
    ax1.grid(True, linestyle='--', alpha=0.7)
    
    # Get kernel
    if kernel_type == 'linear':
        kernel = LinearKernel()
        kernel_title = 'Linear Kernel'
        kernel_formula = r'$k(x, y) = x^T y$'
    elif kernel_type == 'polynomial':
        kernel = PolynomialKernel(degree=degree, gamma=gamma, coef0=coef0)
        kernel_title = f'Polynomial Kernel (degree={degree})'
        kernel_formula = r'$k(x, y) = (\gamma x^T y + c_0)^{' + str(degree) + r'}$'
    elif kernel_type == 'rbf':
        kernel = RBFKernel(gamma=gamma)
        kernel_title = f'RBF Kernel'
        kernel_formula = r'$k(x, y) = \exp(-\gamma ||x - y||^2)$'
    
    # Compute kernel heatmap
    if dataset_type == 'regression':
        # For regression, create a 2D grid to visualize the kernel function
        x_min, x_max = -3, 3
        xx, yy = np.meshgrid(np.linspace(x_min, x_max, 50), np.linspace(x_min, x_max, 50))
        grid_points = np.column_stack([xx.ravel(), yy.ravel()])
        
        # Compute kernel values for a fixed point against all grid points
        fixed_point = np.array([0.0]).reshape(1, -1)  # Center point
        if dataset_type == 'regression':
            K_values = kernel(fixed_point, X_vis)
        else:
            K_values = kernel(fixed_point, grid_points.reshape(-1, 2))
        
        # Plot kernel heatmap
        ax2 = fig.add_subplot(132)
        
        if dataset_type == 'regression':
            # For 1D data, plot kernel values against input points
            ax2.scatter(X_vis.ravel(), K_values.ravel(), c=X_vis.ravel(), cmap='viridis', 
                       s=40, alpha=0.8, edgecolors='w', linewidth=0.5)
            ax2.set_title(f'{kernel_title} Values\n{kernel_formula}', fontsize=14, fontweight='bold')
            ax2.set_xlabel('$x$', fontsize=12)
            ax2.set_ylabel('$k(0, x)$', fontsize=12)
            ax2.grid(True, linestyle='--', alpha=0.7)
        else:
            # For 2D data, create a heatmap
            K_grid = K_values.reshape(xx.shape)
            im = ax2.contourf(xx, yy, K_grid, levels=50, cmap='viridis', alpha=0.8)
            ax2.scatter(fixed_point[0, 0], fixed_point[0, 0], c='red', s=100, marker='*', 
                       edgecolors='w', linewidth=1.5, label='Reference Point')
            ax2.set_title(f'{kernel_title} Values\n{kernel_formula}', fontsize=14, fontweight='bold')
            ax2.set_xlabel('$x_1$', fontsize=12)
            ax2.set_ylabel('$x_2$', fontsize=12)
            ax2.grid(True, linestyle='--', alpha=0.7)
            ax2.legend(frameon=True)
            fig.colorbar(im, ax=ax2, label='$k(0, x)$')
    else:
        # For classification datasets, visualize decision boundaries
        x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
        y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
        xx, yy = np.meshgrid(np.linspace(x_min, x_max, 50), np.linspace(y_min, y_max, 50))
        grid_points = np.column_stack([xx.ravel(), yy.ravel()])
        
        # Compute kernel values for a fixed point against all grid points
        fixed_point = np.array([0.0, 0.0]).reshape(1, -1)  # Center point
        K_values = kernel(fixed_point, grid_points)
        K_grid = K_values.reshape(xx.shape)
        
        # Plot kernel heatmap
        ax2 = fig.add_subplot(132)
        im = ax2.contourf(xx, yy, K_grid, levels=50, cmap='viridis', alpha=0.8)
        ax2.scatter(fixed_point[0, 0], fixed_point[0, 1], c='red', s=100, marker='*', 
                   edgecolors='w', linewidth=1.5, label='Reference Point')
        ax2.set_title(f'{kernel_title} Values\n{kernel_formula}', fontsize=14, fontweight='bold')
        ax2.set_xlabel('$x_1$', fontsize=12)
        ax2.set_ylabel('$x_2$', fontsize=12)
        ax2.grid(True, linestyle='--', alpha=0.7)
        ax2.legend(frameon=True)
        fig.colorbar(im, ax=ax2, label='$k(0, x)$')
    
    # Feature space mapping visualization
    ax3 = fig.add_subplot(133, projection='3d')
    
    # Create feature mapping based on the kernel
    if dataset_type == 'regression':
        # For 1D regression data
        if kernel_type == 'linear':
            # For linear kernel, map to 3D for visualization
            X_mapped = np.column_stack([
                X.ravel(),
                y,
                np.ones_like(X.ravel()) * 0.1  # Constant third dimension
            ])
            feature_map_title = 'Linear Kernel Feature Map\n$\\phi(x) = x$'
        elif kernel_type == 'polynomial':
            # For polynomial kernel, show polynomial terms
            if degree == 2:
                X_mapped = np.column_stack([
                    X.ravel(),
                    y,
                    np.sqrt(gamma) * X.ravel()**2  # Quadratic term
                ])
                feature_map_title = f'Polynomial Kernel Feature Map\n$\\phi(x) = [x, x^2]$'
            else:
                X_mapped = np.column_stack([
                    X.ravel(),
                    y,
                    np.sqrt(gamma) * X.ravel()**degree  # Higher degree term
                ])
                feature_map_title = f'Polynomial Kernel Feature Map\n$\\phi(x) = [x, x^{degree}]$'
        elif kernel_type == 'rbf':
            # For RBF kernel, show radial basis functions
            radial = np.exp(-gamma * X.ravel()**2)
            X_mapped = np.column_stack([
                X.ravel(),
                y,
                radial
            ])
            feature_map_title = f'RBF Kernel Feature Map\n$\\phi(x) = [\\exp(-\\gamma ||x||^2)]$'
    else:
        # For 2D classification data
        if kernel_type == 'linear':
            # For linear kernel, add a constant third dimension
            X_mapped = np.column_stack([
                X[:, 0],
                X[:, 1],
                np.ones(X.shape[0]) * 0.1
            ])
            feature_map_title = 'Linear Kernel Feature Map\n$\\phi(x) = x$'
        elif kernel_type == 'polynomial':
            # For polynomial kernel with 2D data
            if degree == 2:
                # For degree 2, show a simplified feature map
                X_mapped = np.column_stack([
                    X[:, 0],
                    X[:, 1],
                    np.sqrt(gamma) * (X[:, 0]**2 + X[:, 1]**2)  # Simplified quadratic term
                ])
                feature_map_title = f'Polynomial Kernel Feature Map\n$\\phi(x) = [x_1, x_2, x_1^2+x_2^2]$'
            else:
                X_mapped = np.column_stack([
                    X[:, 0],
                    X[:, 1],
                    np.sqrt(gamma) * (X[:, 0]**degree + X[:, 1]**degree)  # Simplified higher degree term
                ])
                feature_map_title = f'Polynomial Kernel Feature Map\n$\\phi(x) = [x_1, x_2, x_1^{degree}+x_2^{degree}]$'
        elif kernel_type == 'rbf':
            # For RBF kernel, show distance from origin
            radial = np.exp(-gamma * np.sum(X**2, axis=1))
            X_mapped = np.column_stack([
                X[:, 0],
                X[:, 1],
                radial
            ])
            feature_map_title = f'RBF Kernel Feature Map\n$\\phi(x) = [\\exp(-\\gamma ||x||^2)]$'
    
    # Plot the mapped data
    if dataset_type == 'regression':
        scatter = ax3.scatter(X_mapped[:, 0], X_mapped[:, 1], X_mapped[:, 2], 
                             c=X.ravel(), cmap='viridis', s=40, alpha=0.8, edgecolors='w', linewidth=0.5)
    else:
        scatter = ax3.scatter(X_mapped[:, 0], X_mapped[:, 1], X_mapped[:, 2], 
                             c=y, cmap='viridis', s=40, alpha=0.8, edgecolors='w', linewidth=0.5)
    
    ax3.set_title(feature_map_title, fontsize=14, fontweight='bold')
    
    if dataset_type == 'regression':
        ax3.set_xlabel('$x$', fontsize=12)
        ax3.set_ylabel('$y$', fontsize=12)
    else:
        ax3.set_xlabel('$x_1$', fontsize=12)
        ax3.set_ylabel('$x_2$', fontsize=12)
    
    ax3.set_zlabel('$\\phi(x)_3$', fontsize=12)
    
    # Set the viewing angle
    ax3.view_init(elev=elev, azim=azim)
    
    # Add grid to 3D plot
    ax3.grid(True, linestyle='--', alpha=0.7)
    
    # Add parameter information
    if kernel_type == 'polynomial':
        param_text = f'Parameters: $\\gamma={gamma:.2f}$, $c_0={coef0:.2f}$, $d={degree}$'
    elif kernel_type == 'rbf':
        param_text = f'Parameter: $\\gamma={gamma:.2f}$'
    else:
        param_text = ''
    
    if param_text:
        fig.text(0.5, 0.01, param_text, ha='center', fontsize=12, bbox=dict(facecolor='white', alpha=0.8, boxstyle='round,pad=0.5'))
    
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.15)
    plt.show()
    
    return fig

# Create interactive widget with academic styling
def interactive_kernel():
    style_widget = {'description_width': 'initial'}
    layout_widget = {'width': 'auto'}
    
    interact(
        _visualize_kernel,
        dataset_type=widgets.Dropdown(
            options=[
                ('Regression', 'regression'), 
                ('Binary Classification', 'binary'), 
                ('Multiclass Classification', 'multiclass')
            ],
            value='regression',
            description='Dataset Type:',
            style=style_widget,
            layout=layout_widget
        ),
        kernel_type=widgets.Dropdown(
            options=[
                ('Linear Kernel', 'linear'), 
                ('Polynomial Kernel', 'polynomial'), 
                ('RBF Kernel', 'rbf')
            ],
            value='rbf',
            description='Kernel Function:',
            style=style_widget,
            layout=layout_widget
        ),
        gamma=widgets.FloatLogSlider(
            value=1.0,
            base=10,
            min=-1,  # 10^-1
            max=1,   # 10^1
            step=0.1,
            description='γ (Scale Parameter):',
            style=style_widget,
            layout=layout_widget
        ),
        degree=widgets.IntSlider(
            value=2,
            min=2,
            max=5,
            step=1,
            description='Degree (Polynomial):',
            style=style_widget,
            layout=layout_widget
        ),
        coef0=widgets.FloatSlider(
            value=1.0,
            min=0.0,
            max=2.0,
            step=0.1,
            description='c₀ (Polynomial):',
            style=style_widget,
            layout=layout_widget
        ),
        elev=widgets.IntSlider(
            value=30,
            min=0,
            max=90,
            step=5,
            description='Elevation (3D View):',
            style=style_widget,
            layout=layout_widget
        ),
        azim=widgets.IntSlider(
            value=30,
            min=-180,
            max=180,
            step=5,
            description='Azimuth (3D View):',
            style=style_widget,
            layout=layout_widget
        )
    )

# Run the interactive visualization
interactive_kernel()


interactive(children=(Dropdown(description='Dataset Type:', layout=Layout(width='auto'), options=(('Regression…

In [9]:

class KernelRidgeRegression(BaseEstimator, RegressorMixin):
    """
    Kernel Ridge Regression.
    
    This class implements Kernel Ridge Regression, which combines Ridge Regression
    with the kernel trick to learn non-linear functions.
    
    Parameters:
    -----------
    kernel : str or callable, default='linear'
        Kernel name or kernel function. If string, must be one of the kernels
        available in kernels.py.
    lambda_reg : float, default=1.0
        Regularization parameter.
    kernel_params : dict, default=None
        Additional parameters for the kernel function.
    """
    
    def __init__(self, kernel='linear', lambda_reg=1.0, kernel_params=None):
        self.kernel = kernel
        self.lambda_reg = lambda_reg
        self.kernel_params = kernel_params or {}
        self.dual_coef_ = None
        self.X_train_ = None
        self._kernel_fn = None
    
    def fit(self, X, y):
        """
        Fit the Kernel Ridge Regression model.
        
        Parameters:
        -----------
        X : numpy.ndarray, shape (n_samples, n_features)
            Training data.
        y : numpy.ndarray, shape (n_samples,)
            Target values.
            
        Returns:
        --------
        self : object
            Returns self.
        """
        # Store training data for prediction
        self.X_train_ = X
        
        # Get kernel function
        if isinstance(self.kernel, str):
            self._kernel_fn = get_kernel(self.kernel, **self.kernel_params)
        else:
            self._kernel_fn = self.kernel
        
        # Compute Gram matrix
        K = self._kernel_fn.compute_gram_matrix(X)
        
        # Add regularization to diagonal
        K_reg = K + self.lambda_reg * np.eye(K.shape[0])
        
        # Solve the dual problem
        self.dual_coef_ = np.linalg.solve(K_reg, y)
        
        return self
    
    def predict(self, X):
        """
        Predict using the Kernel Ridge Regression model.
        
        Parameters:
        -----------
        X : numpy.ndarray, shape (n_samples, n_features)
            Samples.
            
        Returns:
        --------
        y_pred : numpy.ndarray, shape (n_samples,)
            Predicted values.
        """
        # Compute kernel between test points and training points
        K_test = self._kernel_fn(X, self.X_train_)
        
        # Compute predictions
        y_pred = np.dot(K_test, self.dual_coef_)
        
        return y_pred



In [20]:
def evaluate_kernel_regression_tabular(kernel_type='rbf', gamma=1.0, degree=3):
    """
    Evaluate Kernel Ridge Regression with the specified kernel type and parameters.
    Displays results in a clean tabular format using tabulate and visualizes feature space.
    
    Parameters:
    -----------
    kernel_type : str
        Type of kernel ('linear', 'polynomial', or 'rbf')
    gamma : float
        Gamma parameter for RBF and polynomial kernels
    degree : int
        Degree parameter for polynomial kernel
    
    Returns:
    --------
    best_model : KernelRidgeRegression
        The best model found
    """
  

    
    # Generate nonlinear regression data
    print("Generating regression dataset...")
    X = np.linspace(-3, 3, 200).reshape(-1, 1)
    y = np.sin(X.ravel()) + X.ravel()**2 / 6 + 0.3 * np.random.randn(200)
    
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Define regularization parameters to evaluate
    lambda_regs = [0.001, 0.01, 0.1, 1.0, 10.0]
    
    # Initialize results list
    results = []
    best_model = None
    best_score = float('-inf')  # For R², higher is better
    
    # Set kernel parameters based on kernel type
    if kernel_type == 'linear':
        kernel_params = {}
        print(f"Evaluating Linear Kernel with different regularization values...")
    elif kernel_type == 'polynomial':
        kernel_params = {'degree': degree, 'gamma': gamma, 'coef0': 1.0}
        print(f"Evaluating Polynomial Kernel (degree={degree}, gamma={gamma}) with different regularization values...")
    elif kernel_type == 'rbf':
        kernel_params = {'gamma': gamma}
        print(f"Evaluating RBF Kernel (gamma={gamma}) with different regularization values...")
    
    # Evaluate models with different regularization parameters
    for lambda_reg in lambda_regs:
        model = KernelRidgeRegression(kernel=kernel_type, lambda_reg=lambda_reg, kernel_params=kernel_params)
        model.fit(X_train_scaled, y_train)
        
        # Predict on training and test sets
        y_train_pred = model.predict(X_train_scaled)
        y_test_pred = model.predict(X_test_scaled)
        
        # Calculate R² scores
        train_r2 = r2_score(y_train, y_train_pred)
        test_r2 = r2_score(y_test, y_test_pred)
        
        results.append({
            'λ': lambda_reg,
            'Train R²': train_r2,
            'Test R²': test_r2
        })
        
        # Track best model (highest test R²)
        if test_r2 > best_score:
            best_score = test_r2
            best_model = model
    
    # Convert results to DataFrame
    results_df = pd.DataFrame(results)
    
    # Display results in a nicely formatted table using tabulate
    print("\nPerformance Metrics (R² Score):")
    print(tabulate(results_df, headers='keys', tablefmt='fancy_grid', 
                  floatfmt='.6f', showindex=False, numalign='center'))
    
    # Highlight the best model
    best_lambda = results_df.loc[results_df['Test R²'].idxmax()]['λ']
    print(f"\nBest Model: {kernel_type.capitalize()} Kernel with λ={best_lambda}")
    print(f"Best Test R²: {best_score:.6f}")
    
    # Create a figure with two subplots
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 6))
    
    # Visualize the model predictions
    visualize_model(X, y, best_model, scaler, kernel_type, best_lambda, gamma, degree, ax=ax1)
    
    # Visualize feature space
    visualize_feature_space(X_train_scaled, y_train, kernel_type, kernel_params, ax=ax2)
    
    plt.tight_layout()
    plt.show()
    
    return best_model

def visualize_model(X, y, model, scaler, kernel_type, lambda_reg, gamma=None, degree=None, ax=None):
    """
    Visualize the model predictions.
    
    Parameters:
    -----------
    X : numpy.ndarray
        Input features
    y : numpy.ndarray
        Target values
    model : KernelRidgeRegression
        Trained model
    scaler : StandardScaler
        Scaler used to standardize features
    kernel_type : str
        Type of kernel
    lambda_reg : float
        Regularization parameter
    gamma : float
        Gamma parameter (for RBF and polynomial kernels)
    degree : int
        Degree parameter (for polynomial kernel)
    ax : matplotlib.axes.Axes
        Axes to plot on
    """
    if ax is None:
        fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plot original data
    ax.scatter(X, y, c='black', s=30, alpha=0.5, label='Data')
    
    # Create a grid for smooth curves
    X_plot = np.linspace(X.min(), X.max(), 1000).reshape(-1, 1)
    X_plot_scaled = scaler.transform(X_plot)
    
    # Predict
    y_pred = model.predict(X_plot_scaled)
    
    # Plot prediction
    if kernel_type == 'linear':
        ax.plot(X_plot, y_pred, color='blue', linewidth=3, 
                label=f"Linear Kernel (λ={lambda_reg:.3f})")
    elif kernel_type == 'polynomial':
        ax.plot(X_plot, y_pred, color='red', linewidth=3, 
                label=f"Polynomial Kernel (d={degree}, γ={gamma:.3f}, λ={lambda_reg:.3f})")
    elif kernel_type == 'rbf':
        ax.plot(X_plot, y_pred, color='green', linewidth=3, 
                label=f"RBF Kernel (γ={gamma:.3f}, λ={lambda_reg:.3f})")
    
    ax.set_title("Kernel Ridge Regression", fontsize=16)
    ax.set_xlabel("x", fontsize=14)
    ax.set_ylabel("y", fontsize=14)
    ax.legend(fontsize=12)
    ax.grid(True, linestyle='--', alpha=0.7)

def visualize_feature_space(X, y, kernel_type, kernel_params, ax=None):
    """
    Visualize the feature space induced by the kernel.
    
    Parameters:
    -----------
    X : numpy.ndarray
        Input features (scaled)
    y : numpy.ndarray
        Target values
    kernel_type : str
        Type of kernel
    kernel_params : dict
        Kernel parameters
    ax : matplotlib.axes.Axes
        Axes to plot on
    """
    from sklearn.decomposition import PCA, KernelPCA
    
    if ax is None:
        fig, ax = plt.subplots(figsize=(10, 6))
    
    # Get kernel instance
    kernel = get_kernel(kernel_type, **kernel_params)
    
    # Compute Gram matrix
    K = kernel.compute_gram_matrix(X)
    
    # Use a subset of points for visualization (max 100)
    n_samples = min(100, X.shape[0])
    indices = np.linspace(0, X.shape[0]-1, n_samples).astype(int)
    K_subset = K[indices][:, indices]
    y_subset = y[indices]
    
    # Check if kernel matrix is PSD
    is_psd, eigenvalues = kernel.is_psd(X[indices])
    
    # For 1D data, we need a special approach
    if X.shape[1] == 1:
        # For 1D data, we'll plot the original feature vs the target
        if kernel_type == 'linear':
            title = "Linear Feature Space (Original Space)"
            # Just plot X vs y for 1D data
            X_pca = np.column_stack([X[indices].flatten(), np.zeros(len(indices))])
        else:
            # For non-linear kernels with 1D data, use the kernel matrix itself
            # Use MDS on the kernel matrix to get a 2D representation
            from sklearn.manifold import MDS
            mds = MDS(n_components=2, dissimilarity='precomputed', random_state=42)
            # Convert kernel matrix to distance matrix (higher similarity = lower distance)
            D = 1 - K_subset / np.max(K_subset)
            X_pca = mds.fit_transform(D)
            
            if kernel_type == 'polynomial':
                title = f"Polynomial Kernel Feature Space (d={kernel_params['degree']})"
            elif kernel_type == 'rbf':
                title = f"RBF Kernel Feature Space (γ={kernel_params['gamma']:.3f})"
    else:
        # Normal case for data with at least 2 dimensions
        if kernel_type == 'linear':
            # For linear kernel, we can just use PCA on the original data
            pca = PCA(n_components=2)
            X_pca = pca.fit_transform(X[indices])
            title = "Linear Feature Space (Original Space)"
        else:
            # For non-linear kernels, use Kernel PCA
            if kernel_type == 'polynomial':
                kpca = KernelPCA(n_components=2, kernel='poly', 
                                gamma=kernel_params.get('gamma', 1.0),
                                degree=kernel_params.get('degree', 3),
                                coef0=kernel_params.get('coef0', 1.0))
                title = f"Polynomial Kernel Feature Space (d={kernel_params['degree']})"
            elif kernel_type == 'rbf':
                kpca = KernelPCA(n_components=2, kernel='rbf', 
                                gamma=kernel_params.get('gamma', 1.0))
                title = f"RBF Kernel Feature Space (γ={kernel_params['gamma']:.3f})"
            
            X_pca = kpca.fit_transform(X[indices])
    
    # Plot the points in the feature space
    scatter = ax.scatter(X_pca[:, 0], X_pca[:, 1], c=y_subset, cmap='viridis', 
                        s=50, alpha=0.8, edgecolors='k')
    
    # Add a colorbar
    plt.colorbar(scatter, ax=ax, label='Target Value')
    
    # Add eigenvalue information
    if is_psd:
        psd_status = "PSD ✓"
    else:
        psd_status = "Not PSD ✗"
    
    # Add title and labels
    ax.set_title(f"{title}\n({psd_status}, λ_max={eigenvalues.max():.2f}, λ_min={eigenvalues.min():.2f})", 
                fontsize=16)
    ax.set_xlabel("Principal Component 1", fontsize=14)
    ax.set_ylabel("Principal Component 2", fontsize=14)
    ax.grid(True, linestyle='--', alpha=0.7)

def compare_kernels_interactive():
    """
    Interactive comparison of different kernel methods.
    """
    from IPython.display import display, clear_output
    
    # Create output widget to control display
    output = widgets.Output()
    
    # Define the update function
    def update(kernel_type, gamma, degree):
        with output:
            clear_output(wait=True)
            evaluate_kernel_regression_tabular(kernel_type, gamma, degree)
    
    # Create interactive widgets
    style_widget = {'description_width': 'initial'}
    layout_widget = {'width': 'auto'}
    
    kernel_widget = widgets.Dropdown(
        options=[
            ('Linear Kernel', 'linear'), 
            ('Polynomial Kernel', 'polynomial'), 
            ('RBF Kernel', 'rbf')
        ],
        value='rbf',
        description='Kernel Type:',
        style=style_widget,
        layout=layout_widget
    )
    
    gamma_widget = widgets.FloatLogSlider(
        value=1.0,
        base=10,
        min=-2,  # 10^-2
        max=1,   # 10^1
        step=0.1,
        description='γ (Scale Parameter):',
        style=style_widget,
        layout=layout_widget
    )
    
    degree_widget = widgets.IntSlider(
        value=3,
        min=2,
        max=5,
        step=1,
        description='Degree (Polynomial):',
        style=style_widget,
        layout=layout_widget
    )
    
    # Create interactive widget
    interactive_widget = widgets.interactive(
        update,
        kernel_type=kernel_widget,
        gamma=gamma_widget,
        degree=degree_widget
    )
    
    # Display the widgets and output
    display(interactive_widget)
    display(output)
    
    # Initial update
    update(kernel_widget.value, gamma_widget.value, degree_widget.value)
    
    
print("Kernel Ridge Regression Comparison")
print("=================================")
print("Select a kernel type and parameters to see performance metrics and visualization.")
compare_kernels_interactive()

Kernel Ridge Regression Comparison
Select a kernel type and parameters to see performance metrics and visualization.


interactive(children=(Dropdown(description='Kernel Type:', index=2, layout=Layout(width='auto'), options=(('Li…

Output()

In [21]:

class KernelSVM:
    """
    Implementation of Kernel SVM using the dual formulation.
    
    Parameters:
    -----------
    kernel : str, default='rbf'
        Kernel type. Can be 'linear', 'polynomial', or 'rbf'.
    C : float, default=1.0
        Regularization parameter. The strength of the regularization is
        inversely proportional to C.
    gamma : float, default=1.0
        Kernel coefficient for 'rbf' and 'polynomial' kernels.
    degree : int, default=3
        Degree of the polynomial kernel.
    coef0 : float, default=1.0
        Independent term in the polynomial kernel.
    tol : float, default=1e-3
        Tolerance for stopping criterion.
    max_iter : int, default=1000
        Maximum number of iterations for the solver.
    """
    
    def __init__(self, kernel='rbf', C=1.0, gamma=1.0, degree=3, coef0=1.0, tol=1e-3, max_iter=1000):
        self.kernel = kernel
        self.C = C
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self.tol = tol
        self.max_iter = max_iter
        self.support_vectors_ = None
        self.support_vector_indices_ = None
        self.dual_coef_ = None
        self.intercept_ = None
        self.classes_ = None
        self.X_train_ = None
        
    def _kernel_function(self, X1, X2):
        """Compute the kernel matrix between X1 and X2."""
        if self.kernel == 'linear':
            return np.dot(X1, X2.T)
        elif self.kernel == 'polynomial':
            return (self.gamma * np.dot(X1, X2.T) + self.coef0) ** self.degree
        elif self.kernel == 'rbf':
            # Compute squared Euclidean distances efficiently
            X1_norm = np.sum(X1 ** 2, axis=1).reshape(-1, 1)
            X2_norm = np.sum(X2 ** 2, axis=1).reshape(1, -1)
            distances = X1_norm + X2_norm - 2 * np.dot(X1, X2.T)
            return np.exp(-self.gamma * distances)
        else:
            raise ValueError(f"Unknown kernel: {self.kernel}")
    
    def fit(self, X, y):
        """
        Fit the SVM model according to the given training data.
        
        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Training vectors.
        y : array-like of shape (n_samples,)
            Target values. Should contain only two classes.
            
        Returns:
        --------
        self : object
            Fitted estimator.
        """
        # Store training data
        self.X_train_ = X
        
        # Convert y to {-1, 1}
        self.classes_ = np.unique(y)
        if len(self.classes_) != 2:
            raise ValueError("KernelSVM only supports binary classification")
        
        y_binary = np.where(y == self.classes_[0], -1, 1)
        
        n_samples = X.shape[0]
        
        # Compute the kernel matrix
        K = self._kernel_function(X, X)
        
        # Solve the dual problem using SMO algorithm (simplified)
        # Initialize alphas and bias
        alphas = np.zeros(n_samples)
        b = 0
        
        # SMO algorithm
        for _ in range(self.max_iter):
            alpha_changed = 0
            
            for i in range(n_samples):
                # Calculate Ei = f(xi) - yi
                f_i = np.sum(alphas * y_binary * K[i]) + b
                E_i = f_i - y_binary[i]
                
                # Check if alpha_i can be optimized
                if (y_binary[i] * E_i < -self.tol and alphas[i] < self.C) or \
                   (y_binary[i] * E_i > self.tol and alphas[i] > 0):
                    
                    # Select j randomly
                    j = np.random.choice([idx for idx in range(n_samples) if idx != i])
                    
                    # Calculate Ej
                    f_j = np.sum(alphas * y_binary * K[j]) + b
                    E_j = f_j - y_binary[j]
                    
                    # Save old alphas
                    alpha_i_old = alphas[i]
                    alpha_j_old = alphas[j]
                    
                    # Compute bounds for alpha_j
                    if y_binary[i] != y_binary[j]:
                        L = max(0, alphas[j] - alphas[i])
                        H = min(self.C, self.C + alphas[j] - alphas[i])
                    else:
                        L = max(0, alphas[i] + alphas[j] - self.C)
                        H = min(self.C, alphas[i] + alphas[j])
                    
                    if L == H:
                        continue
                    
                    # Compute eta
                    eta = 2 * K[i, j] - K[i, i] - K[j, j]
                    if eta >= 0:
                        continue
                    
                    # Update alpha_j
                    alphas[j] = alpha_j_old - y_binary[j] * (E_i - E_j) / eta
                    
                    # Clip alpha_j
                    alphas[j] = min(H, max(L, alphas[j]))
                    
                    if abs(alphas[j] - alpha_j_old) < 1e-5:
                        continue
                    
                    # Update alpha_i
                    alphas[i] = alpha_i_old + y_binary[i] * y_binary[j] * (alpha_j_old - alphas[j])
                    
                    # Update b
                    b1 = b - E_i - y_binary[i] * (alphas[i] - alpha_i_old) * K[i, i] - \
                         y_binary[j] * (alphas[j] - alpha_j_old) * K[i, j]
                    b2 = b - E_j - y_binary[i] * (alphas[i] - alpha_i_old) * K[i, j] - \
                         y_binary[j] * (alphas[j] - alpha_j_old) * K[j, j]
                    
                    if 0 < alphas[i] < self.C:
                        b = b1
                    elif 0 < alphas[j] < self.C:
                        b = b2
                    else:
                        b = (b1 + b2) / 2
                    
                    alpha_changed += 1
            
            if alpha_changed == 0:
                break
        
        # Save support vectors
        sv_indices = alphas > 1e-5
        self.support_vector_indices_ = np.where(sv_indices)[0]
        self.support_vectors_ = X[sv_indices]
        self.dual_coef_ = alphas[sv_indices] * y_binary[sv_indices]
        self.intercept_ = b
        
        return self
    
    def decision_function(self, X):
        """
        Compute the decision function of X.
        
        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Samples.
            
        Returns:
        --------
        decision : array-like of shape (n_samples,)
            Decision function values.
        """
        K = self._kernel_function(X, self.support_vectors_)
        return np.dot(K, self.dual_coef_) + self.intercept_
    
    def predict(self, X):
        """
        Perform classification on samples in X.
        
        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Samples.
            
        Returns:
        --------
        y_pred : array-like of shape (n_samples,)
            Class labels for samples in X.
        """
        decision = self.decision_function(X)
        return np.where(decision < 0, self.classes_[0], self.classes_[1])




In [22]:
def generate_datasets(dataset_type='moons', n_samples=200, noise=0.2, random_state=42):
    """
    Generate synthetic datasets for classification.
    
    Parameters:
    -----------
    dataset_type : str, default='moons'
        Type of dataset. Can be 'moons', 'circles', or 'linearly_separable'.
    n_samples : int, default=200
        Number of samples to generate.
    noise : float, default=0.2
        Standard deviation of Gaussian noise added to the data.
    random_state : int, default=42
        Random seed for reproducibility.
        
    Returns:
    --------
    X : array-like of shape (n_samples, 2)
        Generated features.
    y : array-like of shape (n_samples,)
        Generated target values.
    """
    if dataset_type == 'moons':
        X, y = make_moons(n_samples=n_samples, noise=noise, random_state=random_state)
    elif dataset_type == 'circles':
        X, y = make_circles(n_samples=n_samples, noise=noise, factor=0.5, random_state=random_state)
    elif dataset_type == 'linearly_separable':
        X, y = make_classification(n_samples=n_samples, n_features=2, n_redundant=0, 
                                  n_informative=2, random_state=random_state, 
                                  n_clusters_per_class=1)
    else:
        raise ValueError(f"Unknown dataset type: {dataset_type}")
    
    return X, y

# Function to visualize decision boundaries and feature space
def visualize_kernel_svm(dataset_type='moons', kernel='rbf', C=1.0, gamma=1.0, 
                         degree=3, coef0=1.0, n_samples=200, noise=0.2, 
                         random_state=42, elev=30, azim=30):
    """
    Visualize Kernel SVM decision boundaries and feature space.
    
    Parameters:
    -----------
    dataset_type : str, default='moons'
        Type of dataset. Can be 'moons', 'circles', or 'linearly_separable'.
    kernel : str, default='rbf'
        Kernel type. Can be 'linear', 'polynomial', or 'rbf'.
    C : float, default=1.0
        Regularization parameter.
    gamma : float, default=1.0
        Kernel coefficient for 'rbf' and 'polynomial' kernels.
    degree : int, default=3
        Degree of the polynomial kernel.
    coef0 : float, default=1.0
        Independent term in the polynomial kernel.
    n_samples : int, default=200
        Number of samples to generate.
    noise : float, default=0.2
        Standard deviation of Gaussian noise added to the data.
    random_state : int, default=42
        Random seed for reproducibility.
    elev : float, default=30
        Elevation angle for 3D plot.
    azim : float, default=30
        Azimuth angle for 3D plot.
    """
    # Generate dataset
    X, y = generate_datasets(dataset_type, n_samples, noise, random_state)
    
    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, random_state=random_state
    )
    
    # Train Kernel SVM
    svm = KernelSVM(kernel=kernel, C=C, gamma=gamma, degree=degree, coef0=coef0)
    svm.fit(X_train, y_train)
    
    # Make predictions
    y_train_pred = svm.predict(X_train)
    y_test_pred = svm.predict(X_test)
    
    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    # Create figure with 2 subplots
    fig = plt.figure(figsize=(18, 8))
    
    # Plot decision boundary
    ax1 = fig.add_subplot(121)
    
    # Create a mesh grid
    x_min, x_max = X_scaled[:, 0].min() - 0.5, X_scaled[:, 0].max() + 0.5
    y_min, y_max = X_scaled[:, 1].min() - 0.5, X_scaled[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
    
    # Predict on the mesh grid
    Z = svm.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    # Plot decision boundary
    ax1.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.RdBu)
    
    # Plot training and testing data
    scatter_train = ax1.scatter(X_train[:, 0], X_train[:, 1], c=y_train, 
                               edgecolors='k', cmap=plt.cm.RdBu, marker='o', s=80, alpha=0.8)
    scatter_test = ax1.scatter(X_test[:, 0], X_test[:, 1], c=y_test, 
                              edgecolors='k', cmap=plt.cm.RdBu, marker='^', s=80, alpha=0.8)
    
    # Plot support vectors
    ax1.scatter(svm.support_vectors_[:, 0], svm.support_vectors_[:, 1], 
               s=120, facecolors='none', edgecolors='k', linewidths=1.5)
    
    # Set labels and title
    ax1.set_xlabel('Feature 1', fontsize=14)
    ax1.set_ylabel('Feature 2', fontsize=14)
    ax1.set_title(f'Decision Boundary - {dataset_type.capitalize()} Dataset\n'
                 f'{kernel.capitalize()} Kernel (C={C}, γ={gamma})\n'
                 f'Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}', 
                 fontsize=16)
    ax1.legend([scatter_train, scatter_test], ['Training Data', 'Testing Data'], 
              loc='upper right', fontsize=12)
    ax1.grid(True, linestyle='--', alpha=0.7)
    
    # Plot feature space in 3D
    ax2 = fig.add_subplot(122, projection='3d')
    
    # Use Kernel PCA to visualize the feature space
    if kernel == 'linear':
        # For linear kernel, we can just use PCA
        kpca = KernelPCA(n_components=3, kernel='linear')
    elif kernel == 'polynomial':
        kpca = KernelPCA(n_components=3, kernel='poly', 
                         gamma=gamma, degree=degree, coef0=coef0)
    elif kernel == 'rbf':
        kpca = KernelPCA(n_components=3, kernel='rbf', gamma=gamma)
    
    # Transform data to feature space
    X_kpca = kpca.fit_transform(X_scaled)
    
    # Plot data in feature space
    scatter_feature = ax2.scatter(X_kpca[:, 0], X_kpca[:, 1], X_kpca[:, 2], 
                                 c=y, edgecolors='k', cmap=plt.cm.RdBu, s=80, alpha=0.8)
    
    # Set labels and title
    ax2.set_xlabel('Component 1', fontsize=14)
    ax2.set_ylabel('Component 2', fontsize=14)
    ax2.set_zlabel('Component 3', fontsize=14)
    ax2.set_title(f'Feature Space Projection - {dataset_type.capitalize()} Dataset\n'
                 f'{kernel.capitalize()} Kernel (γ={gamma}' + 
                 (f', degree={degree}' if kernel == 'polynomial' else '') + ')', 
                 fontsize=16)
    
    # Set 3D view angle
    ax2.view_init(elev=elev, azim=azim)
    
    plt.tight_layout()
    plt.show()
    
    return svm, train_accuracy, test_accuracy

# Interactive visualization function
def interactive_kernel_svm():
    """
    Interactive visualization of Kernel SVM with different datasets and parameters.
    """
    style_widget = {'description_width': 'initial'}
    layout_widget = {'width': 'auto'}
    
    # Create output widget to control display
    output = widgets.Output()
    
    # Define the update function
    def update(dataset_type, kernel, C, gamma, degree, coef0, n_samples, noise, elev, azim):
        with output:
            clear_output(wait=True)
            visualize_kernel_svm(
                dataset_type=dataset_type, 
                kernel=kernel, 
                C=C, 
                gamma=gamma, 
                degree=degree, 
                coef0=coef0, 
                n_samples=n_samples, 
                noise=noise,
                elev=elev,
                azim=azim
            )
    
    # Create interactive widgets
    dataset_widget = widgets.Dropdown(
        options=[
            ('Moons', 'moons'), 
            ('Circles', 'circles'), 
            ('Linearly Separable', 'linearly_separable')
        ],
        value='moons',
        description='Dataset:',
        style=style_widget,
        layout=layout_widget
    )
    
    kernel_widget = widgets.Dropdown(
        options=[
            ('Linear Kernel', 'linear'), 
            ('Polynomial Kernel', 'polynomial'), 
            ('RBF Kernel', 'rbf')
        ],
        value='rbf',
        description='Kernel:',
        style=style_widget,
        layout=layout_widget
    )
    
    C_widget = widgets.FloatLogSlider(
        value=1.0,
        base=10,
        min=-2,  # 10^-2
        max=2,   # 10^2
        step=0.1,
        description='C (Regularization):',
        style=style_widget,
        layout=layout_widget
    )
    
    gamma_widget = widgets.FloatLogSlider(
        value=1.0,
        base=10,
        min=-2,  # 10^-2
        max=1,   # 10^1
        step=0.1,
        description='γ (Scale Parameter):',
        style=style_widget,
        layout=layout_widget
    )
    
    degree_widget = widgets.IntSlider(
        value=3,
        min=1,
        max=5,
        step=1,
        description='Degree (Polynomial):',
        style=style_widget,
        layout=layout_widget
    )
    
    coef0_widget = widgets.FloatSlider(
        value=1.0,
        min=0.0,
        max=5.0,
        step=0.1,
        description='Coef0 (Polynomial):',
        style=style_widget,
        layout=layout_widget
    )
    
    n_samples_widget = widgets.IntSlider(
        value=200,
        min=50,
        max=500,
        step=50,
        description='Number of Samples:',
        style=style_widget,
        layout=layout_widget
    )
    
    noise_widget = widgets.FloatSlider(
        value=0.2,
        min=0.0,
        max=0.5,
        step=0.05,
        description='Noise Level:',
        style=style_widget,
        layout=layout_widget
    )
    
    elev_widget = widgets.IntSlider(
        value=30,
        min=0,
        max=90,
        step=5,
        description='Elevation (3D):',
        style=style_widget,
        layout=layout_widget
    )
    
    azim_widget = widgets.IntSlider(
        value=30,
        min=0,
        max=360,
        step=5,
        description='Azimuth (3D):',
        style=style_widget,
        layout=layout_widget
    )
    
    # Create interactive widget
    interactive_widget = widgets.interactive(
        update,
        dataset_type=dataset_widget,
        kernel=kernel_widget,
        C=C_widget,
        gamma=gamma_widget,
        degree=degree_widget,
        coef0=coef0_widget,
        n_samples=n_samples_widget,
        noise=noise_widget,
        elev=elev_widget,
        azim=azim_widget
    )
    
    # Display the widgets and output
    display(interactive_widget)
    display(output)
    
    # Initial update
    update(
        dataset_type=dataset_widget.value, 
        kernel=kernel_widget.value, 
        C=C_widget.value, 
        gamma=gamma_widget.value, 
        degree=degree_widget.value, 
        coef0=coef0_widget.value, 
        n_samples=n_samples_widget.value, 
        noise=noise_widget.value,
        elev=elev_widget.value,
        azim=azim_widget.value
    )

# Run the interactive visualization
print("Kernel SVM Interactive Visualization")
print("====================================")
print("Select parameters to visualize Kernel SVM with different datasets and kernels.")
interactive_kernel_svm()

Kernel SVM Interactive Visualization
Select parameters to visualize Kernel SVM with different datasets and kernels.


interactive(children=(Dropdown(description='Dataset:', layout=Layout(width='auto'), options=(('Moons', 'moons'…

Output()