# HoroRF
> Comparing two hyperbolic RF methods

In [6]:
%load_ext autoreload
%autoreload 2

In [6]:
%%bash

# Run HoroRF:at 1000 
cd HoroRF
/home/phil/mambaforge/envs/hdt/bin/python train_hyp_rf.py -h
mv ./logs/output ./logs/output_$(date +%Y%m%d_%H%M%S)_hororf

[32;1m2023-09-18 09:03:44,089 [hororf.rf_trainer][0m 1 GPUs available
[32;1m2023-09-18 09:03:44,089 [hororf.rf_trainer][0m Using seed 17 on class 2
[32;1m2023-09-18 09:03:44,094 [hororf.utils][0m 977 datapoints in dataset 'datasets.polblogs_geomstats'
[32;1m2023-09-18 09:03:44,094 [hororf.utils][0m 977 datapoints in test dataset 'datasets.polblogs_geomstats'
[32;1m2023-09-18 09:03:44,095 [hororf.rf_trainer][0m 781 train and 196 test samples for fold 0
[32;1m2023-09-18 09:06:06,657 [hororf.rf_trainer][0m Hyperbolic tree f1 micro: 0.8878, f1 macro: 0.8870, AUPR: 0.0000. Mean depth of 6.00
[32;1m2023-09-18 09:06:06,658 [hororf.rf_trainer][0m 781 train and 196 test samples for fold 1
[32;1m2023-09-18 09:08:48,420 [hororf.rf_trainer][0m Hyperbolic tree f1 micro: 0.9337, f1 macro: 0.9332, AUPR: 0.0000. Mean depth of 6.00
[32;1m2023-09-18 09:08:48,421 [hororf.rf_trainer][0m 782 train and 195 test samples for fold 2
[32;1m2023-09-18 09:11:20,500 [hororf.rf_trainer][0m Hyper

In [48]:
# For using hororf outputs
# vals = [
#     0.8878, 0.9337, 0.9385, 0.8974, 0.9385
# ]

import numpy as np

dim = 16
dataname = "gaussian"
# dataname = "neuroseed"

for suffix in ["hrf", "results_micro", "rf"]:
    vals = np.loadtxt(f"./HoroRF/logs/big_bench/hororf_{dataname}_{dim}/{suffix}.txt", delimiter="\t")
    print(suffix, f"{np.mean(vals) * 100:.2f}", f"{np.std(vals)*100:.2f}")

hrf 99.79 0.25
results_micro 98.66 0.90
rf 99.79 0.25


In [13]:
# For 16-dimensional embeddings, HoroRF had a micro-F1 score of 0.675. Let's try ours:

import numpy as np
import yaml

from sklearn.metrics import f1_score
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from hyperdt.forest import HyperbolicRandomForestClassifier
from hyperdt.conversions import convert

# Read params from yml file


def evaluate_hdt():
    params = yaml.safe_load(open("HoroRF/params.yml", "r"))

    # Dataset
    print(f"Using loader from file: {params['dataset_file']}")
    print()  # For tqdm compatibility
    if params["dataset_file"] == "datasets.gaussian":
        from HoroRF.datasets.gaussian import get_training_data, get_testing_data
    elif params["dataset_file"] == "datasets.neuroseed":
        from HoroRF.datasets.neuroseed import get_training_data, get_testing_data
    elif params["dataset_file"] == "datasets.polblogs_geomstats":
        from HoroRF.datasets.polblogs_geomstats import get_training_data, get_testing_data

    # Get data
    X_train, y_train = get_training_data(class_label=params["class_label"], seed=params["seed"])
    X_train = convert(X_train.numpy(), "poincare", "hyperboloid")
    X_test, y_test = get_testing_data(class_label=params["class_label"], seed=params["seed"])
    X_test = convert(X_test.numpy(), "poincare", "hyperboloid")

    # Hyperparams
    args = {
        "n_estimators": params["num_trees"],
        "max_depth": params["max_depth"],
        "min_samples_leaf": params["min_samples_leaf"],
    }

    # 5-fold cross-validation
    kf = KFold(n_splits=5, shuffle=True, random_state=params["seed"])
    f1_scores_hrf = []
    f1_scores_rf = []
    for train_index, test_index in kf.split(X_train):
        # Hyperbolic
        hrf = HyperbolicRandomForestClassifier(**args)
        hrf.fit(X_train[train_index], y_train[train_index], use_tqdm=True, seed=params["seed"])
        y_pred = hrf.predict(X_train[test_index])
        f1_scores_hrf.append(f1_score(y_train[test_index], y_pred, average="micro"))

        # Euclidean
        rf = RandomForestClassifier(**args, random_state=params["seed"])
        rf.fit(X_train[train_index], y_train[train_index])
        y_pred = rf.predict(X_train[test_index])
        f1_scores_rf.append(f1_score(y_train[test_index], y_pred, average="micro"))

    return f1_scores_hrf, f1_scores_rf


f1_scores_hrf, f1_scores_rf = evaluate_hdt()
print(f"Hyperbolic: {np.mean(f1_scores_hrf):.3f} +/- {np.std(f1_scores_hrf):.3f}")
print(f"Euclidean: {np.mean(f1_scores_rf):.3f} +/- {np.std(f1_scores_rf):.3f}")

Using loader from file: datasets.neuroseed



100%|██████████| 24/24 [00:00<00:00, 28.18it/s]
100%|██████████| 24/24 [00:00<00:00, 28.01it/s]
100%|██████████| 24/24 [00:00<00:00, 30.22it/s]
100%|██████████| 24/24 [00:00<00:00, 28.65it/s]
100%|██████████| 24/24 [00:00<00:00, 29.08it/s]


Hyperbolic: 0.867 +/- 0.012
Euclidean: 0.885 +/- 0.016


In [12]:
from HoroRF.datasets.gaussian import get_training_data, get_testing_data

get_training_data(class_label=2, seed=0)[0].shape

torch.Size([1000, 2])

In [42]:
from HoroRF.datasets.neuroseed import get_training_data, get_testing_data

get_training_data(class_label=2, seed=0)[1]

tensor([44, 44,  9, 22, 44, 44,  9, 44, 44,  9,  9, 22, 22, 44, 22, 44, 22, 22,
        44, 22,  3, 22, 22, 44, 44, 44,  2,  3, 22, 22, 22,  2, 22,  2, 44, 44,
        22,  9, 44, 44, 22, 44, 22, 22, 22,  9,  9, 44, 44, 22, 44,  3, 44, 22,
         9, 43, 44, 22, 22, 44,  9,  3,  9,  2, 44, 43, 44, 22,  9,  3, 44,  9,
        44, 44, 22,  9,  3,  3, 44,  2, 22,  2,  3, 22, 44,  3,  3,  3, 44, 44,
        44,  2,  2, 22, 22, 22,  3,  2,  9, 22])

In [39]:
labels = adata.var["taxonomy_1"]
labels_counts = labels.value_counts()
keep = labels_counts[labels_counts > 1000].index

labels_filtered = labels[labels.isin(keep)]
labels_filtered.index

Index(['776992', '1050608', '190299', '358030', '239283', '4030157', '35786',
       '174924', '370251', '191389',
       ...
       '268328', '228988', '155616', '158709', '299059', '515774', '311952',
       '568082', '1112813', '562583'],
      dtype='object', length=32863)

In [41]:
import numpy as np

indices = np.random.choice(labels_filtered.index, 125, replace=False)
indices

array(['145236', '4442899', '112801', '269532', '95741', '301910',
       '74869', '1967053', '768535', '691952', '294040', '470879',
       '358439', '95522', '268755', '4448558', '593016', '318205',
       '4298060', '4475224', '1096766', '1108726', '3219862', '193763',
       '2545365', '252198', '516020', '271500', '354401', '241499',
       '4437436', '971971', '344456', '322087', '4371949', '554911',
       '202816', '4444213', '4416974', '548878', '164915', '370295',
       '4445508', '4321043', '4416763', '1087825', '997439', '4256699',
       '3862524', '47181', '174004', '407459', '683241', '4364083',
       '115049', '206331', '343699', '964799', '1667530', '4459355',
       '583472', '4377731', '1105919', '814570', '709691', '145786',
       '332210', '228043', '810672', '199344', '904468', '668257',
       '4322804', '4320437', '4367317', '807112', '280233', '147940',
       '1066654', '4469223', '563671', '2838675', '4468097', '4349553',
       '1074801', '1117187', '9985

In [31]:
# Figure out error with my method:

seed = 15
dim = 4
from HoroRF.datasets.neuroseed import get_training_data, get_testing_data
from src.hyperdt.conversions import convert

X, y = get_training_data(class_label=dim, seed=seed, num_samples=1000)
X_h = convert(X.numpy(), "poincare", "hyperboloid")

In [34]:
from src.hyperdt.forest import HyperbolicRandomForestClassifier

hrf = HyperbolicRandomForestClassifier(n_estimators=24, max_depth=6)

hrf.fit(X_h, y.numpy(), use_tqdm=True, seed=seed)

100%|██████████| 24/24 [00:02<00:00,  9.98it/s]


In [35]:
f1_score(y.numpy(), hrf.predict(X_h), average="micro")

0.045

In [8]:
# Troubleshooting poor neuroseed performance

from HoroRF.datasets.neuroseed import get_training_data, get_testing_data
from src.hyperdt.conversions import convert
from src.hyperdt.tree import HyperbolicDecisionTreeClassifier

X, y = get_training_data(class_label=2, seed=0, num_samples=400)
X_h = convert(X.numpy(), "poincare", "hyperboloid")

In [9]:
# So here's somewhere we fail:
# n = 400, seed = 1, dim = 8

import numpy as np

from src.hyperdt.forest import HyperbolicRandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score

n = 400
seed = 1
dim = 8

X, y = get_training_data(class_label=dim, seed=seed, num_samples=n)
X_h = convert(X.numpy(), "poincare", "hyperboloid")
y = y.numpy()
# Relabel
_, y = np.unique(y, return_inverse=True)

cv = KFold(n_splits=5, shuffle=True, random_state=seed)

hrf = HyperbolicRandomForestClassifier(n_estimators=1, max_depth=6)

for train_index, test_index in cv.split(X):
    hrf.fit(X[train_index], y[train_index], use_tqdm=True, seed=seed)
    y_pred = hrf.predict(X[test_index])
    print(f1_score(y[test_index], y_pred, average="micro"))

100%|██████████| 1/1 [00:00<00:00, 46.29it/s]


TypeError: sum() received an invalid combination of arguments - got (out=NoneType, axis=int, ), but expected one of:
 * (*, torch.dtype dtype)
      didn't match because some of the keywords were incorrect: out, axis
 * (tuple of ints dim, bool keepdim, *, torch.dtype dtype)
 * (tuple of names dim, bool keepdim, *, torch.dtype dtype)


In [10]:
hrf = HyperbolicRandomForestClassifier(n_estimators=1, max_depth=6)

In [12]:
hrf.fit(X_h, y)

f1_score(y, hrf.predict(X_h), average="micro")

0.925

In [25]:
hrf.classes_

array([ 2,  3,  9, 22, 43, 44])

In [34]:
import numpy as np

reference = hrf.trees[0].classes_

assert np.all(reference == hrf.classes_)
assert np.all(reference == [tree.classes_ for tree in hrf.trees])

In [42]:
# Let's step through each method, I guess

from src.hyperdt.tree import HyperbolicDecisionTreeClassifier

X, y = get_training_data(class_label=4, seed=15, num_samples=1000)
X_h = convert(X.numpy(), "poincare", "hyperboloid")
hdt = HyperbolicRandomForestClassifier(max_depth=6)
hdt.fit(X_h, y)

In [44]:
# First up, we see that f1 score is messed up:

from sklearn.metrics import f1_score

y_pred = hdt.predict(X_h)
f1_score(y, hdt.classes_[y_pred], average="micro")

IndexError: index 9 is out of bounds for axis 0 with size 6

In [52]:
hdt.trees[0].classes_

array([ 2,  3,  9, 22, 43, 44])

In [55]:
hdt.classes_

array([ 2,  3,  9, 22, 43, 44])

In [54]:
np.unique(y)

array([ 2,  3,  9, 22, 43, 44])

In [61]:
hdt2 = HyperbolicDecisionTreeClassifier()
hdt2.fit(X_h, y)
f1_score(hdt2.predict(X_h), y, average="micro")

0.86875

In [75]:
hrf2 = HyperbolicRandomForestClassifier()
hrf2.fit(X_h, y)
f1_score(hrf2.predict(X_h), y, average="micro")

0.02375

In [64]:
# So retraining the tree gives us the same issue:
hrf2.trees[0].fit(X_h, y)
f1_score(hrf2.trees[0].predict(X_h), y, average="micro")

0.02375

In [69]:
hrf2.trees[0]._get_probs(y)

array([0.04875, 0.06875, 0.16625, 0.3575 , 0.02625, 0.3325 ])

In [76]:
type(hrf2.trees[0])

src.hyperdt.tree.DecisionTreeClassifier

In [12]:
import numpy as np

seed = 0
dim = 8

from HoroRF.datasets.gaussian import get_training_data, get_testing_data

X, y = get_training_data(class_label=dim, seed=seed, num_samples=1000, convert=False)
X = X.numpy()
y = y.numpy()
# X_h = convert(X.numpy(), "poincare", "hyperboloid")

from src.hyperdt.forest import HyperbolicRandomForestClassifier

hrf = HyperbolicRandomForestClassifier(n_estimators=1, max_depth=6)
hrf.fit(X, y)

ValueError: Points must lie on a hyperboloid: Lorentzian Inner Product does not equal the curvature of 1.

In [13]:
np.sum(X[:, 1:] ** 2, axis=1) - X[:, 0] ** 2

array([-1.        , -1.015625  , -1.        , -1.        , -0.99999952,
       -1.        , -1.        , -1.00000095, -1.        , -1.        ,
       -1.00000381, -1.00000012, -1.        , -1.        , -1.        ,
       -1.00000006, -1.        , -1.00000763, -1.        , -1.        ,
       -1.        , -1.        , -1.        , -0.99999237, -1.        ,
       -0.99999619, -1.        , -1.        , -1.015625  , -1.        ,
       -0.99999976, -0.99999999, -1.        , -1.        , -1.00001526,
       -0.99999997, -1.        , -1.        , -1.00000012, -1.        ,
       -1.00000048, -1.        , -1.        , -1.00000381, -1.        ,
       -1.        , -1.        , -1.        , -1.00000191, -1.        ,
       -1.00000012, -0.99999999, -1.        , -1.        , -0.99999999,
       -1.        , -1.        , -1.        , -1.00000048, -1.        ,
       -1.        , -1.        , -1.        , -1.        , -1.        ,
       -1.        , -1.        , -1.        , -1.        , -1.  

In [2]:
from HoroRF.datasets.gaussian import get_training_data, get_testing_data

X, y = get_training_data(class_label=2, seed=0, num_samples=125, convert_to_poincare=False)
X

tensor([[ 1.0345e+01,  3.9903e+00,  9.4921e+00],
        [ 4.8074e+00,  4.5363e+00,  1.2381e+00],
        [ 1.1840e+01,  4.7350e+00,  1.0806e+01],
        [ 3.0232e+00,  2.8361e+00,  3.1029e-01],
        [ 5.6779e+00,  2.3141e+00,  5.0876e+00],
        [ 1.0710e+01,  1.0251e+01,  2.9358e+00],
        [ 3.4341e+01,  3.3381e+01,  8.0009e+00],
        [ 2.2317e+00,  1.9815e+00,  2.3266e-01],
        [ 7.0170e+00,  2.8241e+00,  6.3453e+00],
        [ 4.8746e+00,  1.8847e+00,  4.3828e+00],
        [ 3.5791e+00,  3.3669e+00,  6.8831e-01],
        [ 1.0764e+01,  4.1736e+00,  9.8718e+00],
        [ 1.7322e+01,  1.6808e+01,  4.0674e+00],
        [ 1.5636e+01,  6.0609e+00,  1.4379e+01],
        [ 1.2036e+01,  4.5829e+00,  1.1085e+01],
        [ 5.3760e+00,  5.0937e+00,  1.3984e+00],
        [ 1.9776e+00,  1.7060e+00,  2.3058e-02],
        [ 1.6574e+01,  6.5419e+00,  1.5195e+01],
        [ 1.4714e+00,  1.0318e+00,  3.1724e-01],
        [ 2.8383e+00,  1.2815e+00,  2.3268e+00],
        [ 1.7756e+01

In [4]:
X[:, 0].min()

tensor(1.1298, dtype=torch.float64)

In [5]:
import anndata

In [18]:
adata = anndata.read_h5ad("/home/phil/americangut/data/big_table_with_embeddings.h5ad")

adata.varm

AxisArrays with keys: component_embeddings_euclidean_128, component_embeddings_euclidean_16, component_embeddings_euclidean_2, component_embeddings_euclidean_32, component_embeddings_euclidean_4, component_embeddings_euclidean_64, component_embeddings_euclidean_8, component_embeddings_hyperboloid_128, component_embeddings_hyperboloid_16, component_embeddings_hyperboloid_2, component_embeddings_hyperboloid_32, component_embeddings_hyperboloid_4, component_embeddings_hyperboloid_64, component_embeddings_hyperboloid_8, component_embeddings_poincare_128, component_embeddings_poincare_16, component_embeddings_poincare_2, component_embeddings_poincare_32, component_embeddings_poincare_4, component_embeddings_poincare_64, component_embeddings_poincare_8

In [8]:
adata.varm["component_embeddings_poincare_8"]

Unnamed: 0,0,1,2,3,4,5,6,7
776992,0.055982,-0.048648,0.016134,0.119837,-0.538352,-0.155689,-0.665964,-0.037884
1050608,-0.113340,-0.229799,0.133146,0.012594,-0.227479,-0.169610,-0.767100,-0.163189
190299,0.014210,-0.004440,-0.026317,0.363405,-0.272027,-0.070862,-0.652109,-0.376673
358030,0.101718,-0.107645,-0.035459,0.213485,-0.343706,0.013884,-0.741350,-0.205995
239283,-0.455948,-0.474233,-0.215332,0.207518,-0.190398,-0.150162,-0.257874,-0.362781
...,...,...,...,...,...,...,...,...
1105573,0.231628,-0.376365,0.233156,-0.044553,-0.380642,0.012487,-0.228628,-0.574599
311952,0.181175,-0.108128,-0.049317,0.300540,-0.263796,-0.065274,-0.683462,-0.317625
568082,-0.384161,-0.525431,-0.174787,0.065428,-0.226401,-0.251183,-0.290468,-0.349166
1112813,-0.041194,-0.109654,0.138560,0.091734,-0.755047,-0.224828,-0.274242,-0.204795


In [8]:
# Update adata to only have pandas dataframes
import anndata
import numpy as np
import pandas as pd

adata = anndata.read_h5ad("/home/phil/americangut/data/big_table_with_embeddings.h5ad")
for key in adata.varm.keys():
    if isinstance(adata.varm[key], pd.DataFrame):
        continue
    data = adata.varm[key]
    # Put last column first
    data = np.concatenate([data[:, -1:], data[:, :-1]], axis=1)
    adata.varm[key] = pd.DataFrame(
        data, index=adata.var_names, columns=[str(x) for x in range(adata.varm[key].shape[1])]
    )
    adata.varm[key].columns = [str(x) for x in adata.varm[key].columns]
    print(key, "Success")

# Save back in old location "fixed"
adata.write_h5ad("/home/phil/americangut/data/big_table_with_embeddings_fixed.h5ad")

component_embeddings_hyperboloid_128 Success
component_embeddings_hyperboloid_16 Success
component_embeddings_hyperboloid_2 Success
component_embeddings_hyperboloid_32 Success
component_embeddings_hyperboloid_4 Success
component_embeddings_hyperboloid_64 Success
component_embeddings_hyperboloid_8 Success


In [None]:
adata

In [15]:
pd.DataFrame(adata.varm["component_embeddings_hyperboloid_32"], index=adata.var_names)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,23,24,25,26,27,28,29,30,31,32
776992,0.003104,-0.011133,-0.050578,0.002356,-0.012173,0.016810,0.007254,-0.037227,0.026162,-0.002505,...,-0.004643,-0.005937,0.220302,0.013990,0.022461,0.001150,-0.004111,0.009860,-0.025747,1.028866
1050608,0.014252,0.035550,-0.035257,-0.001957,0.000065,0.026523,0.006831,0.017551,0.007044,0.038042,...,0.008326,-0.007701,0.219405,0.000881,-0.017631,-0.013085,-0.012843,-0.021901,-0.006877,1.028866
190299,-0.008894,0.007607,-0.031327,0.009640,0.028208,-0.047692,0.005988,0.000386,0.021859,-0.016243,...,0.013791,0.009728,0.219097,-0.008274,-0.026201,-0.034112,-0.025770,0.015903,0.002340,1.028866
358030,0.002423,0.016628,-0.044778,0.008861,0.008701,-0.028064,0.006646,-0.016058,-0.011688,-0.003859,...,0.015823,0.015618,0.223476,0.002004,-0.009921,0.006784,-0.027722,0.005263,-0.012998,1.028866
239283,0.011081,-0.026398,0.054537,-0.040107,-0.005428,0.049692,-0.038423,0.003105,-0.024795,-0.041465,...,0.010443,-0.015846,0.192871,0.000068,-0.016512,-0.007969,-0.037395,-0.025141,0.007671,1.028866
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1105573,0.026905,0.045144,0.073505,-0.000142,-0.004424,0.016115,-0.025248,-0.040511,0.003926,0.013144,...,0.026041,-0.021510,0.198608,-0.007999,-0.019387,0.016371,-0.001558,-0.021053,-0.001082,1.028866
311952,-0.010087,0.021303,-0.026052,0.008121,0.010004,-0.025448,0.027076,-0.009810,0.006768,-0.021746,...,0.048315,-0.015067,0.222175,-0.005761,-0.012113,0.003765,-0.013163,0.009541,-0.008268,1.028866
568082,-0.005790,-0.025528,0.051072,-0.033486,0.024429,0.055403,-0.037198,0.017570,-0.015689,-0.043806,...,0.007930,-0.007972,0.187760,0.025688,-0.024158,-0.014025,-0.040272,-0.011225,0.003613,1.028866
1112813,-0.007514,-0.047063,0.028568,-0.004904,-0.012186,0.021110,-0.013998,-0.006658,0.007722,-0.002570,...,0.012517,0.030499,0.206941,0.010880,0.052308,-0.013916,0.009443,0.010271,-0.017667,1.028866


In [13]:
adata.var["taxonomy_1"].value_counts()[:7]

taxonomy_1
p__Firmicutes        11533
p__Proteobacteria    10620
p__Bacteroidetes      4990
p__Actinobacteria     2977
p__Acidobacteria      1640
p__Planctomycetes     1103
p__Cyanobacteria       763
Name: count, dtype: int64

In [1]:
from HoroRF.datasets.neuroseed import get_training_data
from src.hyperdt.forest import HyperbolicRandomForestClassifier

X, y = get_training_data(8, 0, convert_to_poincare=False)
X = X.numpy()
y = y.numpy()

hrf = HyperbolicRandomForestClassifier(n_estimators=1, max_depth=6)
hrf.fit(X, y)

ValueError: Points must lie on a hyperboloid: Lorentzian Inner Product does not equal the curvature of 1.

In [13]:
import numpy as np

np.sum(X[:, :-1] ** 2, axis=1) - X[:, -1] ** 2

array([ 6.62415129e-01,  3.02970891e-01,  7.42093042e-01,  3.53212256e-01,
       -7.33556736e-02,  4.51581423e-01, -4.65659764e-02,  7.03111294e-01,
        3.81061869e-01,  4.19054824e-01,  3.41937011e-01,  4.31299274e-01,
        7.78975350e-01,  3.39038215e-01,  4.73590453e-01,  6.87743183e-01,
        1.84712044e-01,  1.00699370e-01,  5.92619295e-01, -1.09447602e-01,
        3.21642641e-01,  6.29782433e-01,  7.37520295e-01,  5.79525058e-01,
        6.19710516e-01,  2.15009685e-01,  5.33383103e-01,  6.46306006e-01,
        6.56358909e-01,  8.98726419e-02,  4.96476280e-01,  4.50566421e-01,
        7.16286269e-01,  7.00943954e-01,  2.31951404e-01,  7.56062362e-01,
        3.00312454e-01,  4.99192027e-01,  2.05736839e-01,  4.90585509e-01,
        5.40890290e-01, -9.60216588e-02,  3.71083033e-01,  4.16787794e-01,
        4.58929303e-01, -3.05046968e-01,  7.57719097e-01,  5.52870490e-01,
        5.69618231e-01,  1.05618678e-01,  7.70246727e-01,  8.66156052e-02,
        7.17320870e-01, -

In [58]:
from sklearn.ensemble import RandomForestClassifier
import numpy as np

rf = RandomForestClassifier(n_estimators=1, max_depth=1)
rf.fit(np.random.rand(100000, 2), np.random.randint(0, 3, 100000))

In [1]:
from src.hyperdt.tree import HyperbolicDecisionTreeClassifier
from HoroRF.datasets.gaussian import get_training_data
from src.hyperdt.conversions import convert

hdt = HyperbolicDecisionTreeClassifier(max_depth=1)
x, y = get_training_data(2, 0, num_samples=100, convert_to_poincare=False)
hdt.fit(x.numpy(), y.numpy())

INFO: Using numpy backend


In [2]:
np.allclose(
    np.sum(x.numpy()[:, hdt.dims] ** 2, axis=1) - x.numpy()[:, hdt.timelike_dim] ** 2, -1 / hdt.curvature, atol=1e-1
)

NameError: name 'np' is not defined