## HyperDT vs sklearn with MNIST data

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from hyperdt.tree import HyperbolicDecisionTreeClassifier

### Load MNIST dataset

In [None]:
(train_X, train_y), (test_X, test_y) = mnist.load_data()
print(f'train_X.shape={train_X.shape}, test_X.shape={test_X.shape}')

In [None]:
# Flatten the images
train_X = train_X.reshape(train_X.shape[0], -1).astype(np.float64)
test_X = test_X.reshape(test_X.shape[0], -1).astype(np.float64)
print(f'train_X.shape={train_X.shape}, test_X.shape={test_X.shape}')

In [None]:
# Add time dimension as Euclidean distance of all other dimensions
K = 1
train_x_0 = np.sqrt(np.sum(train_X**2, axis=1) + 1/K).reshape(-1, 1)
test_x_0 = np.sqrt(np.sum(test_X**2, axis=1) + 1/K).reshape(-1, 1)
train_X_time = np.concatenate([train_x_0, train_X], axis=1)
test_X_time = np.concatenate([test_x_0, test_X], axis=1)
print(f'train_X_time.shape={train_X_time.shape}, test_X_time.shape={test_X_time.shape}')

### sklearn decision tree model in Euclidean space

In [None]:
# Train decision tree classifier
dtc = DecisionTreeClassifier(random_state=42)
dtc.fit(train_X, train_y)

In [None]:
# Predict test set labels
pred_y = dtc.predict(test_X)

# Compute accuracy
accuracy = accuracy_score(test_y, pred_y)
print(f"Accuracy: {accuracy:.2f}")

### HyperDT on augmented MNIST data

In [None]:
def Minkowski(X):
    return np.sum(X[:, 1:]**2, axis=1) - X[:, 0]**2

In [None]:
# Fit hyperbolic decision tree classifier
hdt = HyperbolicDecisionTreeClassifier(timelike_dim=0, max_depth=2)
hdt.fit(train_X_time, train_y)

# Compute accuracy
hdt_score = hdt.score(test_X_time, test_y)
print(hdt_score)