In [1]:
import os
import sys
sys.path.append("/home/romainlhardy/code/hyperbolic-cancer/PoincareMaps")

import torch

from PoincareMaps.data import prepare_data, compute_rfa
from PoincareMaps.model import PoincareEmbedding, PoincareDistance, poincare_root, poincare_translation
from PoincareMaps.rsgd import RiemannianSGD
from PoincareMaps.train import train
from torch.utils.data import TensorDataset, DataLoader

In [None]:
dset = "krumsiek11_blobs"
root = "root"

features, labels = prepare_data(f"/home/romainlhardy/code/hyperbolic-cancer/PoincareMaps/datasets/{dset}", with_labels=True, normalize=False, n_pca=20)
print(features.shape)
print(labels.shape)

In [None]:
rfa = compute_rfa(
    features,
    mode="features", 
    k_neighbours=30, 
    distlocal="minkowski",
    distfn="MFIsym", 
    connected=True, 
    sigma=1.0
) # Pairwise distances in the original data space

In [None]:
device = "cuda"
indices = torch.arange(len(rfa))

indices = indices.to(device)
rfa = rfa.to(device)

dataset = TensorDataset(indices, rfa)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

print(f"Dataset size: {len(dataset)}")

In [5]:
predictor = PoincareEmbedding(
    len(dataset), 
    2,
    dist=PoincareDistance,
    max_norm=1,
    Qdist="laplace", 
    lossfn="klSym",
    gamma=2.0,
    cuda=0
).to(device)
# predictor.size = dataset size
# predictor.lt = embedding matrix (inputs are indices)
# predictor.dist = distance function
# predictor.lossfn = loss function
# predictor.Qdist = distribution of the Poincaré ball
# predictor.gamma = gamma (temperature)

In [None]:
batch = next(iter(dataloader))
inputs, targets = batch
outputs = predictor(inputs.to(device)) # [batch_size, len(dataset)]

assert outputs.sum(dim=-1).allclose(torch.ones(len(batch[0])).to(device))

predictor.lossfn(outputs, targets) # Try to match the distance distributions in the data space and the embedding space

In [7]:
optimizer = RiemannianSGD(predictor.parameters(), lr=0.1) # RiemannianSGD optimizer

In [None]:
class PoincareOptions:
    def __init__(self, debugplot=False, epochs=500, batchsize=-1, lr=0.1, burnin=500, lrm=1.0, earlystop=0.0001, cuda=0):
        self.debugplot = debugplot
        self.epochs = epochs
        self.batchsize = batchsize
        self.lr = lr
        self.burnin = burnin
        self.lrm = lrm
        self.earlystop = earlystop
        self.cuda = cuda

opt = PoincareOptions()
opt.batchsize = 16
embeddings, loss, epoch = train(
    predictor,
    dataset,
    optimizer,
    opt,
    fout=f"/home/romainlhardy/code/hyperbolic-cancer/data/outputs/{dset}",
    labels=labels,
    earlystop=1e-6,
    color_dict=None
)

In [10]:
from PoincareMaps.visualize import plotPoincareDisc, plot_poincare_disc

root_hat = poincare_root(root, labels, features)
embeddings_rotated = poincare_translation(-embeddings[root_hat, :], embeddings)
color_dict = plotPoincareDisc(embeddings.T, labels, file_name=f"/home/romainlhardy/code/hyperbolic-cancer/data/outputs/{dset}_raw")

plot_poincare_disc(
    embeddings_rotated,
    labels=labels,
    file_name=f"/home/romainlhardy/code/hyperbolic-cancer/data/outputs/{dset}_rot", 
    coldict=color_dict,
    d1=9.5, 
    d2=9.0
)