In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Torch device management
import torch

if torch.cuda.is_available():
    torch.cuda.set_device(0)
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

Using device: cuda


In [3]:
import embedders

In [4]:
# Load Polblogs

polblogs_dists, polblogs_labels = embedders.dataloaders.load("polblogs", labels=True)

Top CC has 1222 nodes; original graph has 1490 nodes.


In [5]:
# Specify signature - useful to re-initialize the manifold here
signature = [(-1, 6)]
pm = embedders.manifolds.ProductManifold(signature=signature)
print(pm.name)

# Rescale distances
dists_rescaled = polblogs_dists / polblogs_dists.max()

# Get embedding
embedders.coordinate_learning.train_coords(
    pm,
    dists_rescaled,
    device=device,
    burn_in_iterations=100,
    training_iterations=100 * 9,
    learning_rate=1e-1,
    burn_in_learning_rate=1e-2,
    scale_factor_learning_rate=1e-1,
)

h6_polblogs = pm.x_embed.detach().cpu().numpy()

H_1.0^6


  0%|          | 0/1000 [00:00<?, ?it/s]

 does not have profile information (Triggered internally at ../third_party/nvfuser/csrc/graph_fuser.cpp:104.)
  return _inner(u, v, keepdim=keepdim, dim=dim)


In [6]:
# Compare productDT and sklearn on this dataset
from sklearn.model_selection import train_test_split

from hyperdt.tree import HyperbolicDecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier

X_train, X_test, y_train, y_test = train_test_split(h6_polblogs, polblogs_labels, test_size=0.2)

hdt = HyperbolicDecisionTreeClassifier(max_depth=3, skip_hyperboloid_check=True)
hdt.fit(X_train, y_train)
print(f"HyperDT\t{hdt.score(X_test, y_test)*100:.2f}")

dt = DecisionTreeClassifier(max_depth=3)
dt.fit(X_train, y_train)
print(f"DT\t{dt.score(X_test, y_test)*100:.2f}")

HyperDT	95.10
DT	93.88


In [7]:
# Same thing, but now we do H2 x E2 x S2
signature = [(-1, 2), (0, 2), (1, 2)]
pm2 = embedders.manifolds.ProductManifold(signature=signature)
print(pm2.name)

# Get embedding
embedders.coordinate_learning.train_coords(
    pm2,
    dists_rescaled,
    device=device,
    burn_in_iterations=100,
    training_iterations=100 * 9,
    learning_rate=1e-1,
    burn_in_learning_rate=1e-2,
    scale_factor_learning_rate=1e-1,
)

h2_e2_s2_polblogs = pm2.x_embed.detach().cpu().numpy()

H_1.0^2 x E_0.0^2 x S_1.0^2


  0%|          | 0/1000 [00:00<?, ?it/s]

In [32]:
# We assume a dummy dimension for our Euclidean embeddings when we run ProductDT
import numpy as np


def fix_X(X, pos=3):
    return np.concatenate([X[:, :pos], np.ones((len(X), 1)), X[:, pos:]], axis=1)

In [37]:
# Compare productDT and sklearn on this dataset
from hyperdt.product_space_DT import ProductSpaceDT
from sklearn.tree import DecisionTreeClassifier

X_train, X_test, y_train, y_test = train_test_split(h2_e2_s2_polblogs, polblogs_labels, test_size=0.2)

pdt = ProductSpaceDT(max_depth=3, signature=[(s[1], s[0]) for s in signature])
pdt.fit(fix_X(X_train), y_train)
print(f"ProductDT\t{pdt.score(fix_X(X_test), y_test)*100:.2f}")

dt = DecisionTreeClassifier(max_depth=3)
dt.fit(X_train, y_train)
print(f"DT\t\t{dt.score(X_test, y_test)*100:.2f}")

ProductDT	93.88
DT		91.02


In [45]:
# Does it hold up componentwise?

product_dt_hyper = ProductSpaceDT(max_depth=3, signature=[(2, -1.0)])
product_dt_hyper.fit(X_train[:, :3], y_train)
print(f"ProductDT (H)\t{product_dt_hyper.score(X_test[:, :3], y_test)*100:.2f}")

hyperdt_hyper = HyperbolicDecisionTreeClassifier(max_depth=3, skip_hyperboloid_check=True)
hyperdt_hyper.fit(X_train[:, :3], y_train)
print(f"HyperDT\t\t{hyperdt_hyper.score(X_test[:, :3], y_test)*100:.2f}")

dt = DecisionTreeClassifier(max_depth=3)
dt.fit(X_train[:, :3], y_train)
print(f"DT\t\t{dt.score(X_test[:, :3], y_test)*100:.2f}")

print()

product_dt_euclidean = ProductSpaceDT(max_depth=3, signature=[(2, 0.0)])
product_dt_euclidean.fit(fix_X(X_train)[:, 3:6], y_train)
print(f"ProductDT (E)\t{product_dt_euclidean.score(fix_X(X_test)[:, 3:6], y_test)*100:.2f}")

dt = DecisionTreeClassifier(max_depth=3)
dt.fit(X_train[:, 3:5], y_train)
print(f"DT\t\t{dt.score(X_test[:, 3:5], y_test)*100:.2f}")

print()

product_dt_sphere = ProductSpaceDT(max_depth=3, signature=[(2, 1.0)])
product_dt_sphere.fit(X_train[:, 5:], y_train)
print(f"ProductDT (S)\t{product_dt_sphere.score(X_test[:, 5:], y_test)*100:.2f}")

dt = DecisionTreeClassifier(max_depth=3)
dt.fit(X_train[:, 5:], y_train)
print(f"DT\t\t{dt.score(X_test[:, 5:], y_test)*100:.2f}")

ProductDT (H)	74.69
HyperDT		74.69
DT		75.10

ProductDT (E)	89.39
DT		89.39

ProductDT (S)	84.49
DT		84.08
