In [None]:

from dask.distributed import Client
from dask_cuda import LocalCUDACluster

In [None]:
cluster = LocalCUDACluster()

In [None]:
# Change this if you already have a running cluster.
client = Client(cluster)

It is very important the following cells be executed only *after* the CUDA cluster has been started.

In [None]:
import cuml

import math
import numpy as np

from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split

from cuml.manifold import UMAP


In [None]:
n_samples = 100000
n_features = 500

n_centers = 10

train_split = .60 # Use 30% to train, 70% to transform

## Generate Data

First, we will generate some data, which will be split for training and transforming.

In [None]:
X, y = make_blobs(n_samples,
                  n_features= n_features, 
                  centers = n_centers, 
                  cluster_std = 0.1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_split)

## Train Local UMAP Embeddings

In [None]:
%%time
model = UMAP(n_components=2)
model.fit(X_train)

original_embed = model.transform(X_train)

In [None]:
xform_embed = model.transform(X_train)

## Send Model With Some Data to Dask Workers

In [None]:
workers = list(client.has_what().keys())

In [None]:
dist_data = client.scatter(X_test)

## Embarassingly Parallel Transform

In [None]:
def _xform(model, data):
    return model.transform(data)

In [None]:
splits = np.array_split(X_test, len(workers))

In [None]:
scattered = client.scatter(splits)

In [None]:
from cuml.dask.common import to_dask_df

results = [client.submit(_xform, model, part).result() for part in splits]

In [None]:
results

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, figsize=(14, 10))
# plt.scatter(original_embed[:,1], original_embed[:,0], s=0.8, cmap='Spectral', alpha=1.0)

for idx, d in enumerate(results):
    if idx == 0:
        plt.scatter(d[:,1], d[:,0], s=10.0, cmap='Spectral', alpha=1.0)
    
plt.show()