## HyperDT vs sklearn with MNIST data

In [37]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [44]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from hyperdt.tree import HyperbolicDecisionTreeClassifier


### Load MNIST dataset

In [39]:
(train_X, train_y), (test_X, test_y) = mnist.load_data()
print(f'train_X.shape={train_X.shape}, test_X.shape={test_X.shape}')

train_X.shape=(60000, 28, 28), test_X.shape=(10000, 28, 28)


In [40]:
# Flatten the images
train_X = train_X.reshape(train_X.shape[0], -1)
test_X = test_X.reshape(test_X.shape[0], -1)
print(f'train_X.shape={train_X.shape}, test_X.shape={test_X.shape}')

train_X.shape=(60000, 784), test_X.shape=(10000, 784)


In [41]:
# Add time dimension as Euclidean distance of all other dimensions
train_X_time = np.concatenate([np.linalg.norm(train_X, axis=1, keepdims=True), train_X], axis=1)
test_X_time = np.concatenate([np.linalg.norm(test_X, axis=1, keepdims=True), test_X], axis=1)
print(f'train_X_time.shape={train_X_time.shape}, test_X_time.shape={test_X_time.shape}')

train_X_time.shape=(60000, 785), test_X_time.shape=(10000, 785)


### sklearn decision tree model in Euclidean space

In [42]:
# Train decision tree classifier
dtc = DecisionTreeClassifier(random_state=42)
dtc.fit(train_X, train_y)

In [43]:
# Predict test set labels
pred_y = dtc.predict(test_X)

# Compute accuracy
accuracy = accuracy_score(test_y, pred_y)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.88


### HyperDT on augmented MNIST data

In [46]:
# Fit hyperbolic decision tree classifier
hdt = HyperbolicDecisionTreeClassifier(timelike_dim=0, max_depth=2)
hdt.fit(train_X_time, train_y)

# Compute accuracy
hdt_score = hdt.score(test_X_time, test_y)
print(hdt_score)

ValueError: Points must lie on a hyperboloid: Minkowski norm does not equal -1.0.