Skip to content

Commit

Permalink
Update IPC to support new versions of arrow/cudf (#39)
Browse files Browse the repository at this point in the history
* Update IPC to support new versions of arrow/cudf
  • Loading branch information
guilhermeleobas committed Mar 3, 2023
1 parent f65dbc0 commit 0fa0e7a
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 18 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ heavyai with GPU capabilities.
To install heavyai for GPU Dataframe support (conda-only):

```bash
mamba create -n heavyai-gpu -c rapidsai -c nvidia -c conda-forge -c defaults cudf heavyai pyheavydb
mamba create -n heavyai-gpu -c rapidsai -c nvidia -c conda-forge -c defaults \
--no-channel-priority \
cudf heavyai pyheavydb pytest shapely geopandas pyarrow=*=*cuda
```

Documentation
Expand Down
27 changes: 11 additions & 16 deletions heavyai/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,8 @@ def _parse_tdf_gpu(tdf):
"""

import pyarrow as pa
from cudf.comm.gpuarrow import GpuArrowReader
from cudf.core.dataframe import DataFrame
from pyarrow._cuda import Context, IpcMemHandle
from numba import cuda

ipc_handle = IpcMemHandle.from_buffer(pa.py_buffer(tdf.df_handle))
ctx = Context()
Expand Down Expand Up @@ -139,24 +137,21 @@ def _parse_tdf_gpu(tdf):
# columns
pass

dtype = np.dtype(np.byte)
darr = cuda.devicearray.DeviceNDArray(
shape=ipc_buf.size,
strides=dtype.itemsize,
dtype=dtype,
gpu_data=ipc_buf.to_numba(),
)

reader = GpuArrowReader(schema, darr)
batch = pa._cuda.read_record_batch(ipc_buf, schema)
table = pa.Table.from_batches([batch])
df = DataFrame()
df.set_tdf = MethodType(set_tdf, df)
df.get_tdf = MethodType(get_tdf, df)

for k, v in reader.to_dict().items():
if k in dict_memo:
df[k] = pa.DictionaryArray.from_arrays(v.to_arrow(), dict_memo[k])
else:
df[k] = v
# convert table -> cuDF first
for name in table.column_names:
df[name] = table[name]

# remap dictionary nodes
for name in dict_memo.keys():
indices = df[name].to_arrow()
dictionary = dict_memo[name]
df[name] = pa.DictionaryArray.from_arrays(indices, dictionary)

df.set_tdf(tdf)

Expand Down
1 change: 0 additions & 1 deletion heavyai/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,6 @@ def select_ipc_gpu(
where HeavyDB running.
"""
try:
from cudf.comm.gpuarrow import GpuArrowReader # noqa
from cudf.core.dataframe import DataFrame # noqa
except ImportError:
raise ImportError(
Expand Down

0 comments on commit 0fa0e7a

Please sign in to comment.