In [1]:
#|default_exp 3c1_graph_construction_experiments
#|hide
## Standard libraries
import os
import math
import numpy as np
import time
# Configure environment
os.environ['XLA_PYTHON_CLIENT_PREALLOCATE']='false' # Tells Jax not to hog all of the memory to this process.

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.colors import to_rgba
import seaborn as sns
sns.set()

## Progress bar
from tqdm.notebook import tqdm, trange

## project specifics
import diffusion_curvature
from diffusion_curvature.datasets import *
from diffusion_curvature.graphs import *
from diffusion_curvature.core import *
import jax
import jax.numpy as jnp
jax.devices()

%load_ext autoreload
%autoreload 2

# 3d1 Effects of Graph Construction on Negative Curvature

In 3d, we noticed that the knn value has a suspiciously large effect on the signs of curvature measured. With $k=5$, DC has trouble picking up any negative curvature; with $k=15$, it struggles to identify anything as positive.

Here we'll test the method by probing its ability to separate saddles and spheres in various dimensions.

In [3]:
import graphtools
from diffusion_curvature.core import DiffusionCurvature
from diffusion_curvature.datasets import rejection_sample_from_saddle, sphere

def get_dc_of_saddles_and_spheres(
    dim = 3,
    num_samplings = 100,
    knn = 15,
):
    samplings = [2000]*num_samplings
    ks_dc_saddles = []
    ks_dc_spheres = []
    X_saddles_sampled = []
    X_spheres_sampled = []
    
    for n_points in tqdm(samplings):
        X_saddle, k_saddle = rejection_sample_from_saddle(n_points, dim)
        X_saddles_sampled.append(X_saddle)
        X_sphere, k_sphere = sphere(n_points, d=dim)
        X_spheres_sampled.append(X_sphere)
        # Compute Diffusion Curvature on Sphere
        G = graphtools.Graph(X_sphere, anisotropy=1, knn=knn, decay=None).to_pygsp()
        DC = DiffusionCurvature(
            laziness_method="Entropic",
            flattening_method="Mean Fixed",
            comparison_method="Subtraction",
            points_per_cluster=None, # construct separate comparison spaces around each point
            comparison_space_size_factor=1
        )
        ks = DC.curvature(G, t=25, dim=dim, knn=knn, idx=0)
        ks_dc_spheres.append(ks)
        # Compute Diffusion Curvature on Saddle
        G = graphtools.Graph(X_saddle, anisotropy=1, knn=knn, decay=None).to_pygsp()
        DC = DiffusionCurvature(
            laziness_method="Entropic",
            flattening_method="Mean Fixed",
            comparison_method="Subtraction",
            points_per_cluster=None, # construct separate comparison spaces around each point
            comparison_space_size_factor=1
        )
        ks = DC.curvature(G, t=25, dim=dim, knn=knn, idx=0)
        ks_dc_saddles.append(ks)
    
    # plot a histogram of the diffusion curvatures
    plt.hist(ks_dc, bins=20, color='blue', label = 'Saddles')
    plt.hist(ks_dc, bins=20, color='blue', label = 'Spheres')
    plt.legend()
    # plt.xlabel('')
    # plt.ylabel('Frequency')

In [None]:
get_dc_of_saddles_and_spheres(knn=15)

  0%|          | 0/100 [00:00<?, ?it/s]