In [1]:
import cudf
import dask_cudf

In [2]:
import dask_cudf

In [3]:
from dask_cuml.linear_regression import LinearRegression

In order to run this notebook, you will first need to run a dask scheduler and number of dask workers:
- Run a dask scheduler with:  ```dask-scheduler --scheduler-file=cluster.json```
- Run N dask workers with:  ```mpirun -np N dask-mpi --no-nanny --nthreads 10 --no-scheduler --scheduler-file cluster.json```

In [4]:
from dask_cuda import LocalCUDACluster
cluster = LocalCUDACluster(threads_per_worker = 10)


In [5]:
from dask.distributed import Client
client = Client(cluster)
client

0,1
Client  Scheduler: tcp://127.0.0.1:46755  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 2  Cores: 20  Memory: 50.39 GB


In [6]:
import numba.cuda

devs = [i.id for i in numba.cuda.cudadrv.devices.gpus]
workers = list(client.has_what().keys())
worker_devs = workers[0:min(len(devs), len(workers))]

In [7]:
def set_visible(i, n):
    import os, numba.cuda
    all_devices = list(range(n))
    vd = ",".join(map(str, all_devices[i:] + all_devices[:i]))
    print(str(vd))
    numba.cuda.close()
    numba.cuda.select_device(i)
    print("Selecting Device : "  + str(i))
    os.environ["CUDA_VISIBLE_DEVICES"] = vd

dev_assigned = [client.submit(set_visible, dev, len(devs), workers = [worker]) for dev, worker in zip(devs, worker_devs)]

In [8]:
import pandas as pd

X = cudf.DataFrame([('a', [0, 1, 2, 3, 4])])
y = cudf.Series([0, 1, 2, 3, 4])


In [9]:
X_df = dask_cudf.from_cudf(X, chunksize=1).persist()
y_df = dask_cudf.from_cudf(y, chunksize=1).persist()

In [10]:
import numba.cuda
import cuml
def print_device(arr):
    print(str(numba.cuda.get_current_device()))
    print(str(cuml.device_of_ptr(arr.as_gpu_matrix(order="F"))))
    
[client.submit(print_device, part) for part in X_df.to_delayed()]

[<Future: status: pending, key: print_device-fa2def07c1ad81017ab2924945106a5c>,
 <Future: status: pending, key: print_device-c45e4bb6ac7f43dbd4eba609a7b425a0>,
 <Future: status: pending, key: print_device-34627e871cae334ded4e233cd91406d0>,
 <Future: status: pending, key: print_device-b1fc434d40eb1e311cbea505670b9a13>]

In [11]:
client.who_has()

{"('from_cudf-cd37dc024e134c12a714e9b17461255e', 0)": (),
 "('from_cudf-cd37dc024e134c12a714e9b17461255e', 1)": (),
 "('from_cudf-cd37dc024e134c12a714e9b17461255e', 2)": (),
 "('from_cudf-cd37dc024e134c12a714e9b17461255e', 3)": (),
 "('from_cudf-ea58007f60de4e0b875070816af6162b', 0)": (),
 "('from_cudf-ea58007f60de4e0b875070816af6162b', 1)": (),
 "('from_cudf-ea58007f60de4e0b875070816af6162b', 2)": (),
 "('from_cudf-ea58007f60de4e0b875070816af6162b', 3)": (),
 'print_device-34627e871cae334ded4e233cd91406d0': (),
 'print_device-b1fc434d40eb1e311cbea505670b9a13': (),
 'print_device-c45e4bb6ac7f43dbd4eba609a7b425a0': (),
 'print_device-fa2def07c1ad81017ab2924945106a5c': (),
 'set_visible-1a0295c5d8555edc245d9f27ca96b873': ('tcp://127.0.0.1:39867',),
 'set_visible-3149f083394e13eb32cf2537c871b1a8': ('tcp://127.0.0.1:46701',)}

Set each worker to host dfs on a different device. 

__Note__: You can ignore this if you started your workers with "CUDA_VISIBLE_DEVICE" already

In [12]:
client.who_has()

{"('from_cudf-cd37dc024e134c12a714e9b17461255e', 0)": (),
 "('from_cudf-cd37dc024e134c12a714e9b17461255e', 1)": (),
 "('from_cudf-cd37dc024e134c12a714e9b17461255e', 2)": (),
 "('from_cudf-cd37dc024e134c12a714e9b17461255e', 3)": (),
 "('from_cudf-ea58007f60de4e0b875070816af6162b', 0)": (),
 "('from_cudf-ea58007f60de4e0b875070816af6162b', 1)": (),
 "('from_cudf-ea58007f60de4e0b875070816af6162b', 2)": (),
 "('from_cudf-ea58007f60de4e0b875070816af6162b', 3)": (),
 'print_device-34627e871cae334ded4e233cd91406d0': (),
 'print_device-b1fc434d40eb1e311cbea505670b9a13': (),
 'print_device-c45e4bb6ac7f43dbd4eba609a7b425a0': (),
 'print_device-fa2def07c1ad81017ab2924945106a5c': (),
 'set_visible-1a0295c5d8555edc245d9f27ca96b873': ('tcp://127.0.0.1:39867',),
 'set_visible-3149f083394e13eb32cf2537c871b1a8': ('tcp://127.0.0.1:46701',)}

In [13]:
lr = LinearRegression()

In [14]:
res = lr.fit(X_df, y_df)

input_devarrays: [(('127.0.0.1', 39867), <Future: status: finished, type: tuple, key: inputs_to_device_arrays-35afee8043ed64e2a471d968fd5f5379>), (('127.0.0.1', 46701), <Future: status: finished, type: tuple, key: inputs_to_device_arrays-166f78615428cce0aa9eac5141fb5341>)]
exec_node: ('127.0.0.1', 39867)
ipc_handles: [<Future: status: pending, key: get_input_ipc_handles-824c00e8925cea052e9f7c882846a100>]
raw_arrays: [<Future: status: finished, type: tuple, key: inputs_to_device_arrays-35afee8043ed64e2a471d968fd5f5379>]
COEFS: (('127.0.0.1', 39867), <Future: status: pending, key: extract_part-7bc5063212fb7eae4adc886b2ff38e7c>)
INTER: <Future: status: pending, key: extract_part-0ba0929262e97ee1f9f195b311354db0>
RES: <Future: status: pending, key: extract_part-0ba0929262e97ee1f9f195b311354db0>


res

In [15]:
lr.intercept_.result()

5

In [16]:
client.who_has()

{"('from_cudf-cd37dc024e134c12a714e9b17461255e', 0)": ('tcp://127.0.0.1:39867',),
 "('from_cudf-cd37dc024e134c12a714e9b17461255e', 1)": ('tcp://127.0.0.1:46701',),
 "('from_cudf-cd37dc024e134c12a714e9b17461255e', 2)": ('tcp://127.0.0.1:39867',),
 "('from_cudf-cd37dc024e134c12a714e9b17461255e', 3)": ('tcp://127.0.0.1:46701',),
 "('from_cudf-ea58007f60de4e0b875070816af6162b', 0)": ('tcp://127.0.0.1:39867',),
 "('from_cudf-ea58007f60de4e0b875070816af6162b', 1)": ('tcp://127.0.0.1:46701',),
 "('from_cudf-ea58007f60de4e0b875070816af6162b', 2)": ('tcp://127.0.0.1:39867',),
 "('from_cudf-ea58007f60de4e0b875070816af6162b', 3)": ('tcp://127.0.0.1:46701',),
 '_fit_on_worker-9c44b51477cd6c40f7578a4c62b40678': (),
 'extract_part-0ba0929262e97ee1f9f195b311354db0': ('tcp://127.0.0.1:39867',),
 'extract_part-7bc5063212fb7eae4adc886b2ff38e7c': ('tcp://127.0.0.1:39867',),
 'get_input_ipc_handles-824c00e8925cea052e9f7c882846a100': (),
 'get_result-cfeb9d6fbf537487e79bf6fa5111c539': (),
 'inputs_to_devic

In [17]:
g = lr.predict(X_df)

WORKER PARTS: [(('127.0.0.1', 46701), <Future: status: finished, type: DataFrame, key: ('from_cudf-ea58007f60de4e0b875070816af6162b', 3)>), (('127.0.0.1', 39867), <Future: status: finished, type: DataFrame, key: ('from_cudf-ea58007f60de4e0b875070816af6162b', 2)>), (('127.0.0.1', 39867), <Future: status: finished, type: DataFrame, key: ('from_cudf-ea58007f60de4e0b875070816af6162b', 0)>), (('127.0.0.1', 46701), <Future: status: finished, type: DataFrame, key: ('from_cudf-ea58007f60de4e0b875070816af6162b', 1)>)]
ON WORKER: 2
NOT ON WORKER: 2
IPCHANDLES = [<Future: status: pending, key: get_ipc_handles-5b44687ba651d0ffc8fa67274c2e816b>, <Future: status: pending, key: get_ipc_handles-94c595584fbd6789157d7ca78945b46a>]
RAW_ARRAYS=[<Future: status: pending, key: as_gpu_matrix-3fe28086efdb6d6c9e9fc61ab1607d78>, <Future: status: pending, key: as_gpu_matrix-2f4c298c0ba4f4c9810fb016dc69db1d>]
f=<Future: status: finished, type: tuple, key: _predict_on_worker-2a06e0f3cbf9526c700e3cea5a9ff8db>


In [18]:
print(str(g.result()))

      
0    1
1    2
2    3
3    4
4    5


distributed.nanny - ERROR - Failed to restart worker after its process exited
Traceback (most recent call last):
  File "/share/conda/cuml/lib/python3.5/site-packages/distributed/nanny.py", line 291, in _on_exit
    yield self.instantiate()
  File "/share/conda/cuml/lib/python3.5/site-packages/tornado/gen.py", line 1133, in run
    value = future.result()
  File "/share/conda/cuml/lib/python3.5/asyncio/futures.py", line 294, in result
    raise self._exception
  File "/share/conda/cuml/lib/python3.5/site-packages/tornado/gen.py", line 1141, in run
    yielded = self.gen.throw(*exc_info)
  File "/share/conda/cuml/lib/python3.5/site-packages/distributed/nanny.py", line 226, in instantiate
    self.process.start()
  File "/share/conda/cuml/lib/python3.5/site-packages/tornado/gen.py", line 1133, in run
    value = future.result()
  File "/share/conda/cuml/lib/python3.5/asyncio/futures.py", line 294, in result
    raise self._exception
  File "/share/conda/cuml/lib/python3.5/site-packages/t