# Part I

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles

# Generate sample concentric circles data (two classes)
n_samples = 300
X, y = make_circles(n_samples=n_samples, factor=0.5, noise=0.07, random_state=42)

# X shape: (n_samples, 2); y shape: (n_samples,) with 0 and 1 labels

def phi(X):
    x1 = X[:, 0]
    x2 = X[:, 1]
    third = x1**2 + x2**2
    return np.column_stack((x1, x2, third))
X_phi = phi(X)  # shape: (n_samples, 3)


from mpl_toolkits.mplot3d import Axes3D  # needed for 3D

fig = plt.figure(figsize=(12, 5))

# 2D axis on the left
ax1 = fig.add_subplot(1, 2, 1)
ax1.scatter(X_phi[:, 2], np.zeros_like(X_phi[:, 2]), c=y, cmap='viridis', s=40)
ax1.set_xlabel(r'$x_1^2 + x_2^2$ (radius$^2$)')
ax1.set_title(r'Feature space: $x_1^2 + x_2^2$ distinguishes rings')
ax1.set_yticks([])

# 3D axis on the right
ax2 = fig.add_subplot(1, 2, 2, projection='3d')
ax2.scatter(X_phi[:, 0], X_phi[:, 1], X_phi[:, 2], c=y, cmap='viridis', s=40)
ax2.set_xlabel(r'$x_1$')
ax2.set_ylabel(r'$x_2$')
ax2.set_zlabel(r'$x_1^2 + x_2^2$')
ax2.set_title(r'Mapped feature space: $(x_1, x_2, x_1^2 + x_2^2)$')

plt.tight_layout()
plt.show()


# Part II

## Data Generation

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from numpy.linalg import svd
from scipy.linalg import eigh

# 1. Generate data
np.random.seed(0)
N = 100
sigma2 = 0.012
t = np.random.uniform(0, 2*np.pi, N)
n1 = np.random.normal(0, np.sqrt(sigma2), N)
n2 = np.random.normal(0, np.sqrt(sigma2), N)
x = np.sin(t) + n1
y = np.cos(t) + n2
gamma = None

# 2. Gaussian kernel
def rbf_kernel(z, gamma=None):
    z = z.reshape(-1, 1)
    if gamma is None:
        gamma = 1.0 / (2*np.var(z))
    sq = (z - z.T)**2
    return np.exp(-gamma * sq)

# 3. Centering function
def centerkernel(K):
    n = K.shape[0]
    H = np.eye(n) - np.ones((n, n))/n
    return H @ K @ H

K = rbf_kernel(x, gamma=gamma)
L = rbf_kernel(y, gamma=gamma)
HKH = centerkernel(K)   # HKH
HLH = centerkernel(L)   # HLH
n_exact  = HKH.shape[0]


## Incomplete Cholesky Implementation

In [3]:
# 4. Incomplete Cholesky
def IncompleteCholesky(K, tol, max_rank):
    """
    Incomplete Cholesky for kernel COCO, returns both Cholesky factors and pivot indices.
    Witness functions (f, g) are constructed as linear combinations of kernel sections of the selected pivots.

    Parameters
    ----------
    K : ndarray
        (n, n) kernel Gram matrix
    tol : float
        threshold for residual (stopping criterion)
    max_rank : int or None
        maximum rank of decomposition

    Returns
    -------
    G : ndarray
        (n, m) matrix, where m is the effective rank found
    pivots : ndarray
        indices of selected pivot points (subspace generators)
    """
    diag = np.diagonal(K).copy()
    pvec = np.arange(N, dtype=int)
    G = np.zeros((N, N if max_rank is None else max_rank), dtype=float)
    i = 0

    while (i < (N if max_rank is None else max_rank)) and np.sum(diag[i:]) > tol * (N - i):
        # Find new pivot with maximal unexplained variance
        jast = np.argmax(diag[i:]) + i
        # swap current with max, both pivots and G
        if i != jast:
            pvec[[i, jast]] = pvec[[jast, i]]
            G[[i, jast], :] = G[[jast, i], :]
            diag[[i, jast]] = diag[[jast, i]]

        # Compute i-th column
        G[i, i] = np.sqrt(diag[i])
        if G[i, i] < tol:
            break
        if i+1 < N:
            numer = K[pvec[i+1:], pvec[i]] - G[pvec[i+1:], :i] @ G[i, :i]
            G[i+1:, i] = numer / G[i, i]
            # Update the residual (projected) diagonal
            diag[i+1:] -= G[i+1:, i]**2
        i += 1

    pivots = pvec[:i]
    # Return only up to the rank discovered
    G_out = np.zeros((N, i))
    G_out[pvec, :] = G[:N, :i]   # invert permutation

    return G_out, pivots

# Get incomplete Cholesky factors for centered kernels
r = 20
tol = 1e-12
ZX, icK_pivots = IncompleteCholesky(HKH, tol, r)
ZY, icL_pivots = IncompleteCholesky(HLH, tol, r)

# Center new matrices by subtracting columns means
# def centeric(G):
    # n = G.shape[0]
    # H = np.eye(n) - np.ones((n, n)) / n
    # return H @ G

# HZX = centeric(ZX)
# HZY = centeric(ZY)

# Ensure both matrices have the same number of columns
r_x = ZX.shape[1]
r_y = ZY.shape[1]
r_ic   = min(r_x, r_y)

ZX = ZX[:, :r_ic]
ZY = ZY[:, :r_ic]


## 2.1 COCO

### IC COCO Implementation

In [4]:
# 5. Compute approximated COCO
def iccoco(ZX, ZY, tol):
    """
    COCO in IC feature space (mirrors exact generalized eigenproblem).
    ZX, ZY: centered IC features (n x r_x), (n x r_y)
    """
    n, r_x = ZX.shape
    _, r_y = ZY.shape
    r_ic = min(r_x, r_y)         # ensure same rank
    ZX = ZX[:, :r_ic]
    ZY = ZY[:, :r_ic]
    r_x = r_y = r_ic

    # Gram blocks in IC coordinates
    Gx = ZX.T @ ZX + tol*np.eye(r_ic)
    Gy = ZY.T @ ZY + tol*np.eye(r_ic)
    M  = (1/n) * ZX.T @ ZY                # (1/n) times HKH@HLH in IC basis

    # Zero blocks with correct shapes
    Z = np.zeros((r_ic, r_ic)) 

    # Left and right matrices
    left  = np.block([[Z, M],
                      [M.T,    Z]])

    right = np.block([[Gx,   Z],
                      [Z, Gy]])
    eigvals, eigvecs = eigh(left, right)
    idx = np.argsort(np.abs(eigvals))[::-1]
    ic_coco = eigvals[idx[0]]                   # IC COCO value (should match exact COCO)

    alpha_iccoco = eigvecs[:Gx.shape[0], idx[0]]           # IC-X coefficients
    beta_iccoco = eigvecs[Gx.shape[0]:, idx[0]]           # IC-Y coefficients

    # Witness functions in sample space
    f_iccoco = ZX @ alpha_iccoco
    g_iccoco = ZY @ beta_iccoco
    corr_fg_iccoco = np.corrcoef(f_iccoco, g_iccoco)[0, 1]

    return ic_coco, f_iccoco, g_iccoco, corr_fg_iccoco

# call
ic_coco, f_iccoco, g_iccoco, corr_fg_iccoco = iccoco(ZX, ZY, tol)

### Exact COCO Implementation

In [5]:
# 6. Exact COCO
def exactcoco(HKH, HLH, tol, max_rank):
    """
    Compute COCO independence statistic from two kernel matrices,
    without incomplete Cholesky decomposition.
    """
    n = n_exact
    # Use FULL kernels but with the COCO objective:
    Cxy = (1/n) * HKH @ HLH

    Z = np.zeros_like(Cxy)
    left  = np.block([[np.zeros_like(Cxy), Cxy],
                      [Cxy.T, np.zeros_like(Cxy.T)]])

    # Regularize HKH, HLH slightly to make right-hand matrix positive definite
    eps = max(tol, 1e-6)   # make sure ridge is not too tiny
    Kreg = HKH + eps * np.eye(n)
    Lreg = HLH + eps * np.eye(n)

    right = np.block([[Kreg, Z],
                      [Z,   Lreg]])

    eigvals, eigvecs = eigh(left, right)
    idx = np.argsort(np.abs(eigvals))[::-1]
    exact_coco = eigvals[idx[0]]   # gamma_max

    alpha = eigvecs[:n, idx[0]]
    beta  = eigvecs[n:, idx[0]]

    # witness functions:
    f_exactcoco = HKH @ alpha
    g_exactcoco = HLH @ beta
    corr_fg_exactcoco = np.corrcoef(f_exactcoco, g_exactcoco)[0, 1]


    return exact_coco, f_exactcoco, g_exactcoco, corr_fg_exactcoco

exact_coco, f_exactcoco, g_exactcoco, corr_fg_exactcoco = exactcoco(HKH, HLH, tol, r)

### COCO Results

In [6]:
# 7. Plot COCO results
fig, axs = plt.subplots(1, 3, figsize=(15, 5))

# COCO: f comparison
axs[0].plot(x, f_iccoco, 'm.', label='Approx COCO $f$')
axs[0].plot(x, f_exactcoco, 'c.', label='Exact COCO $f$')
axs[0].set_title('COCO $f$: approx vs exact')
axs[0].set_xlabel('x')
axs[0].legend()

# COCO: g comparison
axs[1].plot(x, g_iccoco,'m.', label='Approx COCO $g$')
axs[1].plot(x, g_exactcoco, 'c.', label='Exact COCO $g$')
axs[1].set_title('COCO $g$: approx vs exact')
axs[1].set_xlabel('x')
axs[1].legend()

# COCO: mapped data
axs[2].scatter(f_iccoco, g_iccoco, alpha=0.4, label='IC', color='magenta')
axs[2].scatter(f_exactcoco, g_exactcoco, alpha=0.4, label='Exact', color='cyan')
axs[2].set_title(f'COCO: IC = {ic_coco:.2f} VS Exact = {exact_coco:.2f}; Corr. = {corr_fg_iccoco:.2f}')
axs[2].set_xlabel('$f$')
axs[2].set_ylabel('$g$')
axs[2].legend()

plt.tight_layout()
plt.show()

## 2.2 Kernel CCA

### IC KCCA Implementation

In [7]:
# 8. Implemetation of CCA using Incomplete Cholesky

def ickcca(ZX, ZY, epsilon):
    """
    Approximated kernel CCA (KCCA) with regularization.
    Parameters:
        icK, icL: incomplete Cholesky factors (n x r)
        epsilon: regularization parameter
    Returns:
        ic_gamma: approx. max canonical correlation
        ic_alpha, ic_beta: coefficient vectors for icK and icL
        f_iccca, g_iccca: witness functions (mapped features)
        ic_kcca: approximated maximal canonical correlation

    Form the generalised eigenproblem
    | 0  Cxy | [a] = gamma | Cxx  0 | [a]
    |Cxy^T 0 | [b]         | 0  Cyy | [b]
    """
    # Regularised covariance matrices in IC space
    Cxx = ZX.T @ ZX + epsilon * np.eye(ZX.shape[1])
    Cyy = ZY.T @ ZY + epsilon * np.eye(ZY.shape[1])
    Cxy = ZX.T @ ZY

    Z = np.zeros_like(Cxy)
    left  = np.block([[Z,   Cxy],
                      [Cxy.T, Z]])
    right = np.block([[Cxx, Z],
                      [Z,   Cyy]])

    # Find the largest positive eigenvalue
    eigvals, eigvecs = eigh(left, right)
    idx = np.argsort(np.abs(eigvals))[::-1]
    gamma_ickcca = eigvals[idx[0]]

    # Extract CCA vectors
    alpha_ickcca = eigvecs[:Cxx.shape[0], idx[0]]
    beta_ickcca = eigvecs[Cxx.shape[0]:, idx[0]]

    # Witness functions as projections of training points
    f_ickcca = ZX @ alpha_ickcca
    g_ickcca = ZY @ beta_ickcca

    # Approximated correlation of mapped variables
    ic_kcca = np.corrcoef(f_ickcca, g_ickcca)[0, 1]
    
    return gamma_ickcca, alpha_ickcca, beta_ickcca, f_ickcca, g_ickcca, ic_kcca

gamma_ickcca, alpha_ickcca, beta_ickcca, f_ickcca, g_ickcca, ic_kcca = ickcca(ZX, ZY, tol)

### Exact KCCA Implementation

In [8]:
# 9. Implemetation of exact CCA
def exactkcca(K, L, epsilon):
    """
    Empirical kernel CCA (KCCA) with regularization.
    Parameters:
        K, L: centered Gram matrices n x n)
        epsilon: regularization parameter
    Returns:
        exact_kcca: maximal canonical correlation
        exact_gamma: maximal eigenvalue
        exact_alpha, exact_beta: coefficient vectors for KX and KY
        f_exactcca, g_exactcca: projected features (witness functions)
    """
    n = K.shape[0]
    # covariance operators in RKHS
    Cxx = K @ K + epsilon * np.eye(n)   # ≈ K^2 + κI
    Cyy = L @ L + epsilon * np.eye(n)   # ≈ L^2 + κI
    Cxy = K @ L                         # ≈ K L

    Z = np.zeros_like(Cxy)
    left  = np.block([[Z,   Cxy],
                      [Cxy.T, Z]])
    right = np.block([[Cxx, Z],
                      [Z,   Cyy]])

    eigvals, eigvecs = eigh(left, right)
    idx = np.argsort(np.abs(eigvals))[::-1]
    gamma_exactkcca = eigvals[idx[0]]

    alpha_exactkcca = eigvecs[:n, idx[0]]
    beta_exactkcca = eigvecs[n:, idx[0]]

    f_exactkcca = K @ alpha_exactkcca
    g_exactkcca = L @ beta_exactkcca
    exact_kcca = np.corrcoef(f_exactkcca, g_exactkcca)[0, 1]

    return gamma_exactkcca, alpha_exactkcca, beta_exactkcca, f_exactkcca, g_exactkcca, exact_kcca

gamma_exactkcca, alpha_exactkcca, beta_exactkcca, f_exactkcca, g_exactkcca, exact_kcca = exactkcca(HKH, HLH, tol)

### KCCA Results

In [9]:
# 10. Plot KCCA results
fig, axs = plt.subplots(1, 3, figsize=(15,5))

# KCCA: f comparison
axs[0].plot(x, f_ickcca, 'g.', label='Approx KCCA $f$')
axs[0].plot(x, f_exactkcca, 'b.', label='Exact KCCA $f$')
axs[0].set_title('KCCA $f$: approx vs exact')
axs[0].set_xlabel('x')
axs[0].legend()

# KCCA: g comparison
axs[1].plot(x, g_ickcca, 'g.', label='Approx KCCA $g$')
axs[1].plot(x, g_exactkcca, 'b.', label='Exact KCCA $g$')
axs[1].set_title('KCCA $g$: approx vs exact')
axs[1].set_xlabel('x')
axs[1].legend()

# KCCA: mapped data
axs[2].scatter(f_ickcca, g_ickcca, alpha=0.4, label='Approx KCCA', color='green')
axs[2].scatter(f_exactkcca, g_exactkcca, alpha=0.4, label='Exact KCCA', color='blue')
axs[2].set_title(f'KCCA: IC = {ic_kcca:.2f} VS Exact = {exact_kcca:.2f}')
axs[2].set_xlabel('$f$')
axs[2].set_ylabel('$g$')
axs[2].legend()

plt.tight_layout()
plt.show()

## Compare COCO VS KCCA

In [10]:
# 11. Plots COCO VS KCCA
fig, axs = plt.subplots(1, 4, figsize=(20,5))

# original data
axs[0].plot(x, y, 'k.')
axs[0].set_xlabel('X')
axs[0].set_ylabel('Y')
axs[0].set_title('Original data')

# Exact empirical calculation
axs[1].scatter(f_exactcoco, g_exactcoco, c='cyan', label='COCO', alpha=0.6)
axs[1].scatter(f_exactkcca, g_exactkcca, c='blue', label='KCCA', alpha=0.6)
axs[1].legend()
axs[1].set_xlabel('f')
axs[1].set_ylabel('g')
axs[1].set_title(f'Mapped data by COCO (={exact_coco:.2f}) and KCCA (={exact_kcca:.2f}) [Exact]')

# COCO Approximated using Incomplete Cholesky
axs[2].scatter(f_iccoco, g_iccoco, c='magenta', alpha=0.6)
axs[2].set_xlabel('f')
axs[2].set_ylabel('g')
axs[2].set_title(f'Approximated COCO mapped data (={ic_coco:.2f})')

# KCCA
axs[3].scatter(f_ickcca, g_ickcca, c='green', alpha=0.6)
axs[3].set_xlabel('f')
axs[3].set_ylabel('g')
axs[3].set_title(f' Approximated KCCA mapped data (={ic_kcca:.2f})')

plt.tight_layout()
plt.show()

In [17]:
# Compare witness functions
fig, axs = plt.subplots(1, 5, figsize=(25,5))

# IC f COCO
axs[0].plot(x, f_iccoco, 'm.', label='Approx COCO $f$')
axs[0].set_title('IC $f$ COCO')
axs[0].set_xlabel('x')

# IC f KCCA
axs[1].plot(x, f_ickcca, 'g.', label='Approx KCCA $f$')
axs[1].set_title('IC $f$ KCCA')
axs[1].set_xlabel('x')

# Exact f comparison
axs[2].plot(x, f_exactkcca, 'b.', label='Exact KCCA $f$')
axs[2].plot(x, f_exactcoco, 'c.', label='Exact COCO $f$')
axs[2].set_title('Exact $f$ Functions')
axs[2].set_xlabel('x')
axs[2].legend()

# Exact g comparison
axs[3].plot(x, g_exactkcca, 'b.', label='Exact KCCA $g$')
axs[3].plot(x, g_exactcoco, 'c.', label='Exact COCO $g$')
axs[3].set_title('Exact $g$ Functions')
axs[3].set_xlabel('x')
axs[3].legend()

# IC g comparison
axs[4].plot(x, g_iccoco,'m.', label='Approx COCO $g$')
axs[4].plot(x, g_ickcca, 'g.', label='Approx KCCA $g$')
axs[4].set_title('IC $g$ Functions')
axs[4].set_xlabel('x')
axs[4].legend()

plt.tight_layout()
plt.show()

In [11]:
# COCO (or KCCA) versus the kernel bandwidth γ
gammas = np.logspace(-2, 1, 20)   # e.g. 0.01 … 10
cocos  = []

for g in gammas:
    K = rbf_kernel(x, gamma=g)
    L = rbf_kernel(y, gamma=g)
    HKH = centerkernel(K)
    HLH = centerkernel(L)
    coco_g, _, _, _ = exactcoco(HKH, HLH, tol, r)   
    cocos.append(coco_g)

plt.figure(figsize=(5,4))
plt.semilogx(gammas, cocos, '-o')
plt.xlabel(r'$\gamma$ (RBF bandwidth)')
plt.ylabel('COCO')
plt.title('COCO as a function of kernel bandwidth')
plt.tight_layout()
plt.show()

# mapped data for a few γ 
gammas_to_show = [0.1, 0.5, 1.0]
fig, axs = plt.subplots(1, len(gammas_to_show), figsize=(5*len(gammas_to_show), 4))

for ax, g in zip(axs, gammas_to_show):
    K = rbf_kernel(x, gamma=g)
    L = rbf_kernel(y, gamma=g)
    HKH = centerkernel(K)
    HLH = centerkernel(L)
    _, f, g_vals, corr_fg = exactcoco(HKH, HLH, tol, r)

    ax.scatter(f, g_vals, s=10)
    ax.set_xlabel('f')
    ax.set_ylabel('g')
    ax.set_title(f'γ={g:.2g}, COCO={corr_fg:.2f}')

plt.tight_layout()
plt.show()


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=cb182644-878e-48cb-992b-68a78a5afe3d' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>