In [1]:
import os
from pathlib import Path
import random
from tqdm import tqdm

import puppets_data

import numpy as np
import scipy
import networkx as nx

import ConnectionNetworkX as cnx

import matplotlib.pyplot as plt

from pyLDLE2 import datasets
from pyLDLE2 import buml_
from pyLDLE2 import util_


matplotlib.get_backend() =  module://matplotlib_inline.backend_inline


Initializing and pre-processing the puppets data.

In [2]:
p = "C:\\Users\\Sawyer\\Documents\\IdeaProjects\\project-data\\PuppetsData"
save_dir_root = "C:\\Users\\Sawyer\\Documents\\IdeaProjects\\connection-graph-wasserstein\\data"

# Select how many images you want to use
N_IMAGES = 1000

In [3]:
Y, labelsMat, _ = puppets_data.puppets_data(p)

X.shape =  (8100, 100)


In [4]:
X = np.array(Y.copy())
totalImages = X.shape[0]
sampleIndices = random.sample(range(totalImages), N_IMAGES)
X = [ X[i] for i in sampleIndices ]

X = np.array(X)

Initialize a model and use Local PCA at each vertex neighborhood.

In [5]:
N_NEAREST_NEIGHBORS = 30

buml_obj = buml_.BUML(local_opts={'algo':'LPCA', 'k': N_NEAREST_NEIGHBORS},
                      vis_opts={'c': labelsMat[:,0], 'save_dir': save_dir_root},
                      verbose=True, debug=True, exit_at='local_views')

buml_obj.fit(X=X)


local_opts['k_nn0'] = 750 is created.
Options provided:
local_opts:
{
    "Atilde_method": "LDLE_1",
    "N": 100,
    "U_method": "k_nn",
    "algo": "LPCA",
    "alpha": 1,
    "debug": true,
    "delta": 0.9,
    "gl_type": "unnorm",
    "k": 30,
    "k_nn": 49,
    "k_nn0": 750,
    "k_tune": 7,
    "lambda1_decay": 0.75,
    "lambda1_init": 8,
    "lambda1_min": 0.001,
    "max_iter": 300,
    "max_sparsity": 0.9,
    "metric": "euclidean",
    "n_proc": 12,
    "p": 0.99,
    "power": 5,
    "pp_n_thresh": 32,
    "radius": 0.5,
    "reg": 0.0,
    "scale_by": "gamma",
    "tau": 50,
    "to_postprocess": true,
    "verbose": true
}
intermed_opts:
{
    "algo": "best",
    "debug": true,
    "eta_max": 25,
    "eta_min": 5,
    "len_S_thresh": 256,
    "local_algo": "LPCA",
    "metric": "euclidean",
    "n_proc": 12,
    "n_times": 4,
    "verbose": true
}
global_opts:
{
    "add_dim": false,
    "align_transform": "rigid",
    "align_w_parent_only": true,
    "alpha": 0.3,
    

Read in the intrinsic dimension of the CNX and then initialize it by symmetrizing the kNN graph.

In [6]:
INTRINSIC_DIMENSION = 2

# U is a directed boolean adjacency matrix, so we symmetrize by adding U.T.
# This weights directed and double edges equally.
puppetGraphAdjacency = scipy.sparse.lil_matrix(buml_obj.LocalViews.U + buml_obj.LocalViews.U.T - scipy.sparse.identity(N_IMAGES, format='lil'))

puppetGraph = cnx.ConnectionNetworkX(puppetGraphAdjacency, INTRINSIC_DIMENSION)


Interpolate the local PCA views by registering the overlap of adjacent nodes' neighborhoods, and then using orthogonal procrustes to align the views. This becomes the connection at the corresponding edge.

In [7]:
nRemoteEdges = 0

# A remote edge occurs when |N_i \cap N_j| < dim; i.e., not enough overlapping
# neighbors to register the points and do procrustes. Currently handling this
# by removing said edges. With n=1000 images I observed 4.3% remote edges.

for i in tqdm(range(N_IMAGES)):
    n_i = nx.neighbors(puppetGraph, i)
    for j in [j for j in n_i if j > i]:

        n_ij = buml_obj.LocalViews.U[i,:].multiply(buml_obj.LocalViews.U[j,:]).nonzero()[1]

        if len(n_ij) >= INTRINSIC_DIMENSION:
            X_Uij_i = buml_obj.LocalViews.local_param_post.eval_({'view_index': i, 'data_mask': n_ij})
            X_Uij_j = buml_obj.LocalViews.local_param_post.eval_({'view_index': j, 'data_mask': n_ij})

            Tij, _ = scipy.linalg.orthogonal_procrustes(X_Uij_i, X_Uij_j)

            puppetGraph.updateEdgeSignature((i,j), Tij)

        else:
            puppetGraph.removeEdge((i,j))
            nRemoteEdges += 1

print('Proportion of remote edges : ', nRemoteEdges / puppetGraph.nNodes)

100%|██████████| 1000/1000 [00:59<00:00, 16.80it/s]

Proportion of remote edges :  0.039





In [28]:
puppetGraph.printConnectionLaplacianEigenvalues()

[9.54247183 9.31503712 8.97135205 8.69117917 8.31852708 8.05038314
 7.1924543  7.0809662  6.53217139 6.16554422]
MOST LIKELY INCONSISTENT: |lambda_min| >= 1e-8. 
