In [2]:
#|default_exp core_keops
## Standard libraries
import os
import math
import numpy as np
import time
from fastcore.all import *
from nbdev.showdoc import *

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import set_matplotlib_formats
# set_matplotlib_formats('svg', 'pdf') # For export
from matplotlib.colors import to_rgba
import seaborn as sns
sns.set()

## Progress bar
from tqdm.auto import tqdm

## project specifics
import diffusion_curvature
import pygsp

# Core (PyKeops)
> Minimal diffusion curvature implementation with PyKeops

A great computational challenge for this otherwise relatively simple algorithm is dealing with large matrices which have to be powered to fairly large degrees. Within the usual framework of dense or sparse matrices, these two aims are conflicting. Sparse matrices can be efficiently stored but cannot be efficiently powered, due to the way that GPUs are designed for block-based computations. Indeed, it is estimated that the matrix must be over 99% empty before sparse representation has any speed-up when powered on GPU. On the other hand, we expect diffusion curvature to work with matrices whose size exceeds that capable of being held in RAM, and certainly on GPU RAM. Adjacency matrices and distance matrices formed by tens of thousands of cells, or with point clouds containing millions of points, we need to be able to accommodate all of this. 

An elegant solution to this problem is described by the project PyKeops. They note that the large matrices, like the distance matrices or adjacency matrices, may not be sparse, especially when raised to several powers of diffusion, but they are *symbolic*: they are the result of some function applied to two different arrays. This library introduces the Lazy Tensor, which allows these symbolic arrays to remain conceptual until they really need to be collapsed. They promise a ten to a hundred times speedup over vanilla PyTorch, as well as extensions to matrices of a size that PyTorch isn't capable of handling.

This notebook provides a minimal reimplementation of the core logic of diffusion curvature using PyKeops.

Our implementation differs from past implementations in a few key ways:


# Implementation

In [3]:
# testing data
from diffusion_curvature.datasets import torus
X, ks = torus(1000)

## Basic Graph Operations, with PyKeops

In [8]:
#|export
import torch
from pykeops.torch import LazyTensor
def lazy_distance_matrix(
    X,
    use_cuda = torch.cuda.is_available(),
    ):
    X = torch.tensor(X, dtype=torch.float32, device = torch.device('cuda' if use_cuda else 'cpu'))
    x_i = LazyTensor(X[:, None, :])  # (N, 1, D) LazyTensor
    y_j = LazyTensor(X[None, :, :])  # (1, N, D) LazyTensor
    D_ij = ((x_i - y_j) ** 2).sum(-1).sqrt()
    return D_ij

In [9]:
D = lazy_distance_matrix(X)

In [14]:
I = torch.zeros(len(X), dtype=torch.float32)
I[0] = 1
I = LazyTensor(I)

In [24]:
D.shape

(1000, 1000)

In [23]:
D2 = D.keops_tensordot(D, (4, 7), (7,), (1,), (0,)).sum_reduction(dim=1)

AssertionError: 

In [22]:
D.flatten()

AttributeError: 'LazyTensor' object has no attribute 'flatten'

In [204]:
#|export
def lazy_gaussian_kernel(
        X,
        kernel_type = "fixed",
        sigma:float = 1, # if fixed, uses kernel bandwidth sigma. If not set, uses a heuristic to estimate a good sigma value
        k:float = 10, # if adaptive, creates a different kernel bandwidth for each point, based on the distance from that point to the kth nearest neighbor
        anisotropic_density_normalization:float = 0.5, # if nonzero, performs anisotropic density normalization
        use_cuda = torch.cuda.is_available(),
):
    supported_kernel_types = {'fixed', 'adaptive'}
    assert kernel_type in supported_kernel_types
    D = lazy_distance_matrix(X, use_cuda=use_cuda)
    if kernel_type == "fixed":
            # if not sigma:
            #     # estimate sigma using a heuristic
            #     sigma = median_heuristic(D)
            W = (1/(sigma*(2*torch.pi)**(0.5)))*((-D**2)/(2*sigma**2)).exp()
    elif kernel_type == "adaptive":
            raise NotImplementedError("Adaptive kernel runs up against limitations of pykeops...try fixed kernel instead")
            dk = D.Kmin(k, dim=1)[:,-1]
            # Populate matrices with this distance for easy division.
            div1 = LazyTensor(tensor(np.ones(len(X))[:,None]),axis=1) @ LazyTensor(dk[:,None],axis=0)
            div2 = LazyTensor(dk[:,None],axis=1) @ LazyTensor(tensor(np.ones(len(X))[:,None]),axis=0)
            # print("Distance to kth neighbors",distance_to_k_neighbor)
            # compute the gaussian kernel with an adaptive bandwidth
            W = (
                    1/(2*(2*torch.pi)**0.5) * (
                        (-D**2/(2*distance_to_k_neighbor**2)).exp()/distance_to_k_neighbor + 
                        (-D**2/(2*distance_to_k_neighbor**2)).exp()/distance_to_k_neighbor
                    )
                )
    if anisotropic_density_normalization:
        dn = 1/(W.sum(axis=1)**anisotropic_density_normalization)
        W = A / dn[:,None] / dn[None,:]
    return W

In [197]:
LazyTensor(tensor(np.ones(len(X))[:,None]),axis=1).shape

(1, 1000)

In [194]:
LazyTensor(tensor(np.ones(len(X)))).T.shape

(1, 1, 1000)

In [206]:
dk = D.Kmin(10, dim=1)[:,-1]

In [215]:
LazyTensor(dk[:,None],axis=0).shape

(1000, 1, 1)

In [217]:
tensor(np.ones(len(X))[:,None]) @ LazyTensor(dk[:,None],axis=0)

TypeError: unsupported operand type(s) for @: 'Tensor' and 'LazyTensor'

In [None]:
LazyTensor(tensor(np.ones(len(X))[:,None]),axis=1).shape

(1, 1000)

In [203]:
LazyTensor(dk[:,None],axis=0).shape

(1000, 1)

In [184]:
(A * dk)

(1000, 1000, 1000)

In [168]:
#|export
def lazy_diffusion_matrix(
    A
):
    """Computes the diffusion matrix from the adjacency matrix A"""
    D = A.sum(axis=1)
    # D = LazyTensor(D)
    return A/D[:,None]

In [None]:
def test_lazy_graph(A, X):
    # compute diffusion matrix, then use plot_3d to plot the diffusion on the first point of the graph
    P = lazy_diffusion_matrix(A)
    P = P[0]

In [180]:
A = lazy_gaussian_kernel(X, kernel_type = "fixed", anisotropic_density_normalization=False)

In [181]:
A.shape

(1000, 1000)

In [156]:
A = lazy_gaussian_kernel(X, kernel_type = "fixed", anisotropic_density_normalization=True)

In [158]:
A = lazy_gaussian_kernel(X, kernel_type = "adaptive", k=10, anisotropic_density_normalization=False)

In [205]:
A = lazy_gaussian_kernel(X, kernel_type = "adaptive", k=10, anisotropic_density_normalization=True)

AttributeError: 'LazyTensor' object has no attribute 'view'

In [171]:
A.shape

(1000, 1000, 1000)

In [169]:
P = lazy_diffusion_matrix(A)



In [170]:
P.shape

(1000, 1000, 1000)

In [164]:
P @ torch.eye(1000)[0]
# get the first row of P with pykeops
# convert to torch tensor

ValueError: The 'K @ v' syntax is only supported for LazyTensors 'K' whose trailing dimension is equal to 1. Here, K.shape = (1000, 1000, 1000).

In [93]:
D.argKmin(5, dim=1)[:,-1]

[KeOps] Generating code for ArgKMin_Reduction reduction (with parameters 0) of formula Sqrt(Sum((a-b)**2)) with a=Var(0,3,0), b=Var(1,3,1) ... OK


tensor([415, 878, 218, 549, 799, 889, 921, 815, 645, 716,  90, 529, 450, 875,
        317, 554, 850, 855, 643, 301, 622, 118, 978, 593,  73, 409, 334, 294,
        991, 230,  21, 459, 331, 390, 226, 665, 652, 762, 610, 572, 968, 994,
        451, 139, 481, 264, 743, 897, 742, 221, 916, 646, 999, 265, 314, 828,
        628, 985,  18, 188, 567, 987, 308, 328,  58,  97, 939, 221,  93, 323,
        707, 196, 819, 900, 441, 288, 200, 416, 745,  46, 159, 872, 190, 474,
        763, 113, 881, 819, 291, 808, 461, 400, 414, 994, 549, 983, 250, 645,
        674, 293, 476, 386, 832, 864, 883, 989, 518, 845, 859, 409, 560, 683,
        664, 821,  91,  13, 737, 791,  30, 529, 808, 304, 366, 254, 677, 451,
        848, 427, 588,  11, 595, 375, 617, 614, 977, 516, 311, 818, 780, 401,
        387,  29, 358, 543, 490, 853, 436, 108, 556, 381, 716, 250, 351, 443,
        285, 788, 172, 338, 549,  80, 988, 336, 853, 849, 854, 376, 788, 233,
        706, 314, 546, 786, 545, 263,  24, 531, 569, 726, 494,  

In [None]:
#|export

def phate_distances():
    assert G.Pt is not None
    if type(G.Pt) == np.ndarray:
        log_Pts = -np.log(G.Pt + 1e-6)
        D = pairwise_distances(log_Pts)
    elif type(G.Pt) == scipy.sparse.csr_matrix:
        # TODO: There's likely a more efficient way of doing this. 
        # But I mustn't tempt the devil of premature optimization
        Pt_np = G.Pt.toarray()
        log_Pts = -np.log(Pt_np + 1e-6)
        D = pairwise_distances(log_Pts)
    G.D = D
    return G

In [None]:
def wasserstein_spread_of_diffusion(
                D, # manifold geodesic distances
                Pt, # powered diffusion matrix/t-step ehat diffusions
                ):
        """
        Returns how "spread out" each diffusion is, with wasserstein distance
        Presumes that the manifold distances have been separately calculated
        """
        
        return jnp.sum(D * Pt, axis=-1)

# Tests

In [None]:
# sync changes to the library
from IPython.display import display, Javascript
import time
display(Javascript('IPython.notebook.save_checkpoint();'))
time.sleep(2)
!pixi run nbsync