In order to run this notebook, you will first need to run a dask scheduler and number of dask workers:
- Run a dask scheduler with:  ```dask-scheduler --scheduler-file=cluster.json```
- Run N dask workers with:  ```mpirun -np N dask-mpi --no-nanny --nthreads 10 --no-scheduler --scheduler-file cluster.json```

In [1]:
from dask.distributed import Client


In [2]:
# Run this if you want to run a cluster internally

# from dask_cuda import LocalCUDACluster
# cluster = LocalCUDACluster(threads_per_worker = 1)

# client = Client(cluster)
# client

In [3]:
# Run this if you are using an MPI-based cluster
client = Client(scheduler_file="cluster.json")

In [4]:

devs = [0, 1]
workers = list(client.has_what().keys())
worker_devs = workers[0:min(len(devs), len(workers))]

In [5]:
def set_visible(i, n):
    import os
    all_devices = list(range(n))
    vd = ",".join(map(str, all_devices[i:] + all_devices[:i]))
    print(str(vd))
    print("Selecting Device : "  + str(i))
    os.environ["CUDA_VISIBLE_DEVICES"] = vd
    
    import numba.cuda
    print("Cur device: " + str(numba.cuda.get_current_device().id))
    
dev_assigned = [client.submit(set_visible, dev, len(devs), workers = [worker]) for dev, worker in zip(devs, worker_devs)]

In [6]:
import cudf
import dask_cudf
import numba.cuda
from dask_cuml.linear_regression import LinearRegression

In [7]:
print(str(dev_assigned))

[<Future: status: finished, type: NoneType, key: set_visible-8e4386533916266bf607bb3518c945d4>, <Future: status: finished, type: NoneType, key: set_visible-2202c4a38e62dd9f3d1ca93dd67fd403>]


In [8]:
import pandas as pd

X = cudf.DataFrame([('a', [0, 1, 2, 3, 4])])
y = cudf.Series([0, 1, 2, 3, 4])


In [9]:
X_df = dask_cudf.from_cudf(X, chunksize=1).persist()
y_df = dask_cudf.from_cudf(y, chunksize=1).persist()

In [10]:
client.who_has()

{"('from_cudf-751bbd7c13a7491ea1843aaec30aada5', 3)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-751bbd7c13a7491ea1843aaec30aada5', 0)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-751bbd7c13a7491ea1843aaec30aada5', 2)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-751bbd7c13a7491ea1843aaec30aada5', 1)": ('tcp://10.2.166.167:32914',),
 'set_visible-8e4386533916266bf607bb3518c945d4': ('tcp://10.2.166.167:32914',),
 'set_visible-2202c4a38e62dd9f3d1ca93dd67fd403': ('tcp://10.2.166.167:46333',),
 "('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 0)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 2)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 3)": (),
 "('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 1)": (),
 "('from_cudf-d91b39c7838a4d24a07560970ef72ea8', 0)": (),
 "('from_cudf-d91b39c7838a4d24a07560970ef72ea8', 3)": (),
 "('from_cudf-d91b39c7838a4d24a07560970ef72ea8', 2)": (),
 "('from_cudf-d91b39c7838a4d24a07560970e

In [11]:
import numba.cuda
import cuml

def print_device(arr):
    
    import os
    dev = arr.compute().as_gpu_matrix(order="F")
    
    print("CUDA_VISIBLE_DEVICES: "+ str(os.environ["CUDA_VISIBLE_DEVICES"]))
    print("ARRAY: "+ str(dev))
    print("CUR DEVICE: " + str(numba.cuda.get_current_device().id))
    print("Pointer Device: "  + str(cuml.device_of_ptr(dev)))
    
[client.submit(print_device, part) for part in X_df.to_delayed()]

[<Future: status: pending, key: print_device-bc85a57a3c65c69c64903296b87e790c>,
 <Future: status: pending, key: print_device-c7097b39de14ba717dd60f9893f36409>,
 <Future: status: pending, key: print_device-9beccb8719a3ea53f75f7ecf09eaefc9>,
 <Future: status: pending, key: print_device-75c7d9d2286c9bc862b969b29b508c33>]

Set each worker to host dfs on a different device. 

__Note__: You can ignore this if you started your workers with "CUDA_VISIBLE_DEVICE" already

In [12]:
client.who_has()

{"('from_cudf-751bbd7c13a7491ea1843aaec30aada5', 3)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-751bbd7c13a7491ea1843aaec30aada5', 0)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-751bbd7c13a7491ea1843aaec30aada5', 2)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-751bbd7c13a7491ea1843aaec30aada5', 1)": ('tcp://10.2.166.167:32914',),
 'set_visible-8e4386533916266bf607bb3518c945d4': ('tcp://10.2.166.167:32914',),
 'set_visible-2202c4a38e62dd9f3d1ca93dd67fd403': ('tcp://10.2.166.167:46333',),
 "('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 0)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 2)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 3)": ('tcp://10.2.166.167:46333',),
 "('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 1)": ('tcp://10.2.166.167:46333',),
 "('from_cudf-d91b39c7838a4d24a07560970ef72ea8', 0)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-d91b39c7838a4d24a07560970ef72ea8', 3)": ('tcp://10.2.166.167:

In [13]:
lr = LinearRegression()

In [14]:
res = lr.fit(X_df, y_df)

input_devarrays: [(('10.2.166.167', 46333), <Future: status: finished, type: tuple, key: inputs_to_device_arrays-13afd9fa21a01c81ef6cba9436e73ac2>), (('10.2.166.167', 32914), <Future: status: finished, type: tuple, key: inputs_to_device_arrays-08705425368b280de70a3f6f65244ad2>)]
exec_node: ('10.2.166.167', 46333)
ipc_handles: [<Future: status: pending, key: get_input_ipc_handles-36da53a2e4ad3cf988c4ef4097966343>]
raw_arrays: [<Future: status: finished, type: tuple, key: inputs_to_device_arrays-13afd9fa21a01c81ef6cba9436e73ac2>]
COEFS: (('10.2.166.167', 46333), <Future: status: pending, key: extract_part-7501b3b7cace05bc9aa8eb3b54255ea4>)
INTER: <Future: status: pending, key: extract_part-2fc7dbe0923f0381dfb372b1d17ab749>
RES: <Future: status: pending, key: extract_part-2fc7dbe0923f0381dfb372b1d17ab749>


res

In [15]:
lr.intercept_.result()

5

In [16]:
client.who_has()

{"('from_cudf-751bbd7c13a7491ea1843aaec30aada5', 3)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-751bbd7c13a7491ea1843aaec30aada5', 0)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-751bbd7c13a7491ea1843aaec30aada5', 2)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-751bbd7c13a7491ea1843aaec30aada5', 1)": ('tcp://10.2.166.167:32914',),
 'set_visible-8e4386533916266bf607bb3518c945d4': ('tcp://10.2.166.167:32914',),
 'set_visible-2202c4a38e62dd9f3d1ca93dd67fd403': ('tcp://10.2.166.167:46333',),
 "('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 0)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 2)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 3)": ('tcp://10.2.166.167:46333',),
 "('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 1)": ('tcp://10.2.166.167:46333',),
 "('from_cudf-d91b39c7838a4d24a07560970ef72ea8', 0)": ('tcp://10.2.166.167:32914',),
 "('from_cudf-d91b39c7838a4d24a07560970ef72ea8', 3)": ('tcp://10.2.166.167:

In [17]:
g = lr.predict(X_df)

WORKER PARTS: [(('10.2.166.167', 46333), <Future: status: finished, type: DataFrame, key: ('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 3)>), (('10.2.166.167', 46333), <Future: status: finished, type: DataFrame, key: ('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 1)>), (('10.2.166.167', 32914), <Future: status: finished, type: DataFrame, key: ('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 2)>), (('10.2.166.167', 32914), <Future: status: finished, type: DataFrame, key: ('from_cudf-a39ddd2795dd4791af5723a98609e1ed', 0)>)]
ON WORKER: 2
NOT ON WORKER: 2
IPCHANDLES = [<Future: status: pending, key: get_ipc_handles-e26c71b03604d33dd9e7cc31f29c0f10>, <Future: status: pending, key: get_ipc_handles-9167d6f86c22aa591bcc01a6853e4a80>]
RAW_ARRAYS=[<Future: status: pending, key: as_gpu_matrix-faf5adf1a68f028bc1c32247064b3cf3>, <Future: status: pending, key: as_gpu_matrix-fdb976f5f4231d888c2bc8eb13de5d5e>]
f=<Future: status: finished, type: tuple, key: _predict_on_worker-e0db8b092ea628a41b0b72ec63e

In [18]:
print(str(g.result()))

      
0    1
1    2
2    3
3    4
4    5
