In [48]:
import numpy as np
import pandas as pd

import cudf
import os

from sklearn import datasets
from sklearn.metrics import adjusted_rand_score
from sklearn.cluster import KMeans
from sklearn.manifold.t_sne import trustworthiness

from cuml.manifold.umap import UMAP

In [58]:
data, labels = datasets.make_blobs(
    n_samples=500, n_features=10, centers=5)

In [59]:
embedding = UMAP().fit_transform(data)

In [60]:
score = adjusted_rand_score(labels,
            KMeans(5).fit_predict(embedding))

assert score == 1.0

In [61]:
iris = datasets.load_iris()
data = iris.data

In [62]:
embedding = UMAP(
    n_neighbors=10, min_dist=0.01,  init="random"
).fit_transform(data)

In [63]:
trust = trustworthiness(iris.data, embedding, 10)
assert trust >= 0.95

In [64]:
iris_selection = np.random.choice(
    [True, False], 150, replace=True, p=[0.75, 0.25])
data = iris.data[iris_selection]

In [65]:
fitter = UMAP(n_neighbors=10, min_dist=0.01, verbose=True)
fitter.fit(data)

new_data = iris.data[~iris_selection]
embedding = fitter.transform(new_data)

In [66]:
trust = trustworthiness(new_data, embedding, 10)
assert trust >= 0.90