In [1]:
import cudf
import dask_cudf

In [2]:
from dask_cuml.linear_regression import LinearRegression

In order to run this notebook, you will first need to run a dask scheduler and number of dask workers:
- Run a dask scheduler with:  ```dask-scheduler --scheduler-file=cluster.json```
- Run N dask workers with:  ```mpirun -np N dask-mpi --no-nanny --nthreads 10 --no-scheduler --scheduler-file cluster.json```

In [3]:
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
cluster = LocalCUDACluster(threads_per_worker = 10)

In [4]:
client = Client(cluster)

In [5]:
# import numba.cuda

# devs = [i.id for i in numba.cuda.cudadrv.devices.gpus]
# workers = list(client.has_what().keys())
# worker_devs = workers[0:min(len(devs), len(workers))]

In [6]:
# def set_visible(i, n):
#     import os, numba.cuda
#     all_devices = list(range(n))
#     vd = ",".join(map(str, all_devices[i:] + all_devices[:i]))
#     print(str(vd))
# #     numba.cuda.close()
# #     numba.cuda.select_device(i)
#     print("Selecting Device : "  + str(i))
#     os.environ["CUDA_VISIBLE_DEVICES"] = vd

# dev_assigned = [client.submit(set_visible, dev, len(devs), workers = [worker]) for dev, worker in zip(devs, worker_devs)]

In [7]:
import pandas as pd

X = cudf.DataFrame([('a', [0, 1, 2, 3, 4])])
y = cudf.Series([0, 1, 2, 3, 4])


In [8]:
X_df = dask_cudf.from_cudf(X, chunksize=1).persist()
y_df = dask_cudf.from_cudf(y, chunksize=1).persist()

In [9]:
import numba.cuda
import cuml
def print_device(arr):
    print(str(numba.cuda.get_current_device()))
    print(str(cuml.device_of_ptr(arr.as_gpu_matrix(order="F"))))
    
[client.submit(print_device, part) for part in X_df.to_delayed()]

[<Future: status: pending, key: print_device-3f5ea27f646ee1a43992228b5f876f97>,
 <Future: status: pending, key: print_device-3f450cfa51e3c56f0456098ebb281dce>,
 <Future: status: pending, key: print_device-318c2ebda16e8cc49c1553b9b72cd907>,
 <Future: status: pending, key: print_device-de19f13edf010fd77776266c224d0bd1>]

In [10]:
client.who_has()

{"('from_cudf-2748108b4d90493dad42bf02170aa0fe', 0)": (),
 "('from_cudf-2748108b4d90493dad42bf02170aa0fe', 1)": (),
 "('from_cudf-2748108b4d90493dad42bf02170aa0fe', 2)": (),
 "('from_cudf-2748108b4d90493dad42bf02170aa0fe', 3)": (),
 "('from_cudf-769e57f53663474b8598073ed35cf512', 0)": (),
 "('from_cudf-769e57f53663474b8598073ed35cf512', 1)": (),
 "('from_cudf-769e57f53663474b8598073ed35cf512', 2)": (),
 "('from_cudf-769e57f53663474b8598073ed35cf512', 3)": (),
 'print_device-318c2ebda16e8cc49c1553b9b72cd907': (),
 'print_device-3f450cfa51e3c56f0456098ebb281dce': (),
 'print_device-3f5ea27f646ee1a43992228b5f876f97': (),
 'print_device-de19f13edf010fd77776266c224d0bd1': ()}

Set each worker to host dfs on a different device. 

__Note__: You can ignore this if you started your workers with "CUDA_VISIBLE_DEVICE" already

In [11]:
client.who_has()

{"('from_cudf-2748108b4d90493dad42bf02170aa0fe', 0)": (),
 "('from_cudf-2748108b4d90493dad42bf02170aa0fe', 1)": (),
 "('from_cudf-2748108b4d90493dad42bf02170aa0fe', 2)": (),
 "('from_cudf-2748108b4d90493dad42bf02170aa0fe', 3)": (),
 "('from_cudf-769e57f53663474b8598073ed35cf512', 0)": (),
 "('from_cudf-769e57f53663474b8598073ed35cf512', 1)": (),
 "('from_cudf-769e57f53663474b8598073ed35cf512', 2)": (),
 "('from_cudf-769e57f53663474b8598073ed35cf512', 3)": (),
 'print_device-318c2ebda16e8cc49c1553b9b72cd907': (),
 'print_device-3f450cfa51e3c56f0456098ebb281dce': (),
 'print_device-3f5ea27f646ee1a43992228b5f876f97': (),
 'print_device-de19f13edf010fd77776266c224d0bd1': ()}

In [12]:
lr = LinearRegression()

In [13]:
res = lr.fit(X_df, y_df)

input_devarrays: [(('127.0.0.1', 46201), <Future: status: finished, type: tuple, key: inputs_to_device_arrays-093756d88eb63925908b95436050e63a>), (('127.0.0.1', 45779), <Future: status: finished, type: tuple, key: inputs_to_device_arrays-e582b95a666be250639d6b4b113f2d2c>)]
exec_node: ('127.0.0.1', 46201)
ipc_handles: [<Future: status: pending, key: get_input_ipc_handles-589c51a26ed26d68e7644914aebe47a2>]
raw_arrays: [<Future: status: finished, type: tuple, key: inputs_to_device_arrays-093756d88eb63925908b95436050e63a>]


res

In [14]:
lr.coeffs

(('127.0.0.1', 46201),
 <Future: status: finished, type: Series, key: get_result-fddfe99cf522db4e8e017df35c75bbab>)

In [15]:
g = lr.predict(X_df)

WORKER PARTS: [(('127.0.0.1', 46201), <Future: status: finished, type: DataFrame, key: ('from_cudf-2748108b4d90493dad42bf02170aa0fe', 3)>), (('127.0.0.1', 45779), <Future: status: finished, type: DataFrame, key: ('from_cudf-2748108b4d90493dad42bf02170aa0fe', 0)>), (('127.0.0.1', 46201), <Future: status: finished, type: DataFrame, key: ('from_cudf-2748108b4d90493dad42bf02170aa0fe', 1)>), (('127.0.0.1', 45779), <Future: status: finished, type: DataFrame, key: ('from_cudf-2748108b4d90493dad42bf02170aa0fe', 2)>)]
ON WORKER: 2
NOT ON WORKER: 2
IPCHANDLES = [<Future: status: pending, key: get_ipc_handles-74dcef214b98757381967beb20ff5b9f>, <Future: status: pending, key: get_ipc_handles-7f846146563505420d34042e1845dbfe>]
RAW_ARRAYS=[<Future: status: pending, key: as_gpu_matrix-35f93fe34b45823e4241f911d685b2d3>, <Future: status: pending, key: as_gpu_matrix-28b949e9a97f60e5c34e2e24c170870f>]
f=<Future: status: finished, type: tuple, key: _predict_on_worker-1543bafb0085a77d103498610a04e3a4>


In [16]:
g

tornado.gen.Return(<Future: status: finished, type: Series, key: get_result-1dcbe07e4042802d242dde736d045dca>)