In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Torch device management
import torch

if torch.cuda.is_available():
    torch.cuda.set_device(0)
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

Using device: cuda


In [3]:
import embedders

In [4]:
# Load Polblogs

polblogs_dists, polblogs_labels = embedders.dataloaders.load("polblogs", labels=True)

Top CC has 1222 nodes; original graph has 1490 nodes.


In [5]:
# Specify signature - useful to re-initialize the manifold here

torch.manual_seed(0)  # Not all seeds are stable - this one trains for 3000 iterations at lr=1e-2 (burn-in 1e-3)

signature = [(-1, 6)]
pm = embedders.manifolds.ProductManifold(signature=signature)
print(pm.name)

# Rescale distances
dists_rescaled = polblogs_dists / polblogs_dists.max()

# Get embedding
embedders.coordinate_learning.train_coords(
    pm,
    dists_rescaled,
    device=device,
    burn_in_iterations=100,
    training_iterations=100 * 9,
    learning_rate=1e-1,
    burn_in_learning_rate=1e-2,
    scale_factor_learning_rate=1e-1,
)

h6_polblogs = pm.x_embed.detach().cpu().numpy()

H_1.0^6


  0%|          | 0/1000 [00:00<?, ?it/s]

 does not have profile information (Triggered internally at ../third_party/nvfuser/csrc/graph_fuser.cpp:104.)
  return _inner(u, v, keepdim=keepdim, dim=dim)


In [6]:
# Compare productDT and sklearn on this dataset
from sklearn.model_selection import cross_val_score

from hyperdt.tree import HyperbolicDecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier


def cv_eval(model, name, X, y):
    cv = cross_val_score(model, X, y, cv=5)
    print(f"{name}\t{cv.mean()*100:.2f} +/- {cv.std()*100:.2f}")


hdt = HyperbolicDecisionTreeClassifier(max_depth=3, skip_hyperboloid_check=True)
cv_eval(hdt, "HyperDT", h6_polblogs, polblogs_labels)

dt = DecisionTreeClassifier(max_depth=3)
cv_eval(dt, "DT", h6_polblogs, polblogs_labels)

HyperDT	90.83 +/- 1.22
DT	90.59 +/- 0.93


In [7]:
# Same thing, but now we do H2 x E2 x S2
torch.manual_seed(0)

signature = [(-1, 2), (0, 2), (1, 2)]
pm2 = embedders.manifolds.ProductManifold(signature=signature)
print(pm2.name)

# Get embedding
embedders.coordinate_learning.train_coords(
    pm2,
    dists_rescaled,
    device=device,
    burn_in_iterations=100,
    training_iterations=100 * 9,
    learning_rate=1e-1,
    burn_in_learning_rate=1e-2,
    scale_factor_learning_rate=1e-1,
)

h2_e2_s2_polblogs = pm2.x_embed.detach().cpu().numpy()

H_1.0^2 x E_0.0^2 x S_1.0^2


  0%|          | 0/1000 [00:00<?, ?it/s]

In [8]:
# We assume a dummy dimension for our Euclidean embeddings when we run ProductDT
import numpy as np


def fix_X(X, pos=3):
    return np.concatenate([X[:, :pos], np.ones((len(X), 1)), X[:, pos:]], axis=1)

In [9]:
# Compare productDT and sklearn on this dataset
from hyperdt.product_space_DT import ProductSpaceDT
from sklearn.tree import DecisionTreeClassifier

pdt = ProductSpaceDT(max_depth=3, signature=[(s[1], s[0]) for s in signature])
cv_eval(pdt, "ProductDT", fix_X(h2_e2_s2_polblogs), polblogs_labels)

dt = DecisionTreeClassifier(max_depth=3)
cv_eval(dt, "DT", h2_e2_s2_polblogs, polblogs_labels)

# TODO: Quentin: product perceptron eval
# TODO: Quentin: product SVM eval

INFO: Using numpy backend


ProductDT	90.34 +/- 0.22
DT	92.23 +/- 1.16


In [10]:
# Does it hold up componentwise?

# Hyperbolic
pdt_H = ProductSpaceDT(max_depth=3, signature=[(2, -1.0)])
cv_eval(pdt_H, "ProductDT (H)", h2_e2_s2_polblogs[:, pm2.man2dim[0]], polblogs_labels)

hdt_H = HyperbolicDecisionTreeClassifier(max_depth=3, skip_hyperboloid_check=True)
cv_eval(hdt_H, "HyperDT (H)", h2_e2_s2_polblogs[:, pm2.man2dim[0]], polblogs_labels)

dt_H = DecisionTreeClassifier(max_depth=3)
cv_eval(dt_H, "DT (H)", h2_e2_s2_polblogs[:, pm2.man2dim[0]], polblogs_labels)

# TODO: Quentin: product perceptron eval
# TODO: Quentin: product SVM eval

print()

# Euclidean
# For the euclidean ones, we won't use man2dim because fix_X() breaks it - we'll hardcode 3:6 instead
pdt_E = ProductSpaceDT(max_depth=3, signature=[(2, 0.0)])
cv_eval(pdt_E, "ProductDT (E)", fix_X(h2_e2_s2_polblogs)[:, 3:6], polblogs_labels)

dt_E = DecisionTreeClassifier(max_depth=3)
cv_eval(dt_E, "DT (E)", h2_e2_s2_polblogs[:, pm2.man2dim[1]], polblogs_labels)

# TODO: Quentin: product perceptron eval
# TODO: Quentin: product SVM eval

print()

# Sphere
pdt_S = ProductSpaceDT(max_depth=3, signature=[(2, 1.0)])
cv_eval(pdt_S, "ProductDT (S)", h2_e2_s2_polblogs[:, pm2.man2dim[2]], polblogs_labels)

hdt_S = HyperbolicDecisionTreeClassifier(max_depth=3, skip_hyperboloid_check=True, angle_midpoint_method="bisect")
cv_eval(hdt_S, "HyperDT (S)", h2_e2_s2_polblogs[:, pm2.man2dim[2]], polblogs_labels)

dt_S = DecisionTreeClassifier(max_depth=3)
cv_eval(dt_S, "DT (S)", h2_e2_s2_polblogs[:, pm2.man2dim[2]], polblogs_labels)

# TODO: Quentin: product perceptron eval
# TODO: Quentin: product SVM eval

ProductDT (H)	70.13 +/- 2.41
HyperDT (H)	70.13 +/- 2.41
DT (H)	68.98 +/- 3.30

ProductDT (E)	81.34 +/- 4.06
DT (E)	81.34 +/- 4.06

ProductDT (S)	90.26 +/- 1.02
HyperDT (S)	90.26 +/- 1.02
DT (S)	90.67 +/- 1.59


In [54]:
contributions = embedders.metrics.dist_component_by_manifold(pm2, pm2.x_embed)

# Why don't these add to 100%...?
print(contributions)
print(sum(contributions))

[0.25659456849098206, 0.3267468214035034, 0.4166586697101593]
1.0000000596046448
