-
Notifications
You must be signed in to change notification settings - Fork 859
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement .apply_rows() to do row-by-row transformation
- Loading branch information
Showing
4 changed files
with
190 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from weakref import WeakKeyDictionary | ||
import functools | ||
|
||
from numba.utils import pysignature, exec_ | ||
from numba import cuda | ||
|
||
|
||
def apply_rows(df, func, incols, outcols, kwargs): | ||
# Get input columns | ||
inputs = {k: df[k].to_gpu_array() for k in incols} | ||
# Allocate output columns | ||
outputs = {} | ||
for k, dt in outcols.items(): | ||
outputs[k] = cuda.device_array(len(df), dtype=dt) | ||
# Get signature of user function | ||
sig = pysignature(func) | ||
# Compile kernel | ||
kernel = _load_cache_or_make_row_wise_kernel(func, sig.parameters.keys(), | ||
kwargs.keys()) | ||
# Bind argument | ||
args = {} | ||
for dct in [inputs, outputs, kwargs]: | ||
args.update(dct) | ||
bound = sig.bind(**args) | ||
# Launch kernel | ||
blksz = 64 | ||
blkct = min(16, max(1, len(df) // blksz)) | ||
kernel[blkct, blksz](*bound.args) | ||
# Prepare output frame | ||
outdf = df.copy() | ||
for k in sorted(outcols): | ||
outdf[k] = outputs[k] | ||
return outdf | ||
|
||
|
||
def _make_row_wise_kernel(func, argnames, extras): | ||
""" | ||
Make a kernel that does a stride loop over the input columns. | ||
""" | ||
# Build kernel source | ||
argnames = list(map(_mangle_user, argnames)) | ||
extras = list(map(_mangle_user, extras)) | ||
source = """ | ||
def elemwise({args}): | ||
{body} | ||
""" | ||
|
||
args = ', '.join(argnames) | ||
body = [] | ||
|
||
body.append('tid = cuda.grid(1)') | ||
body.append('ntid = cuda.gridsize(1)') | ||
|
||
for a in argnames: | ||
if a not in extras: | ||
start = 'tid' | ||
stop = '' | ||
stride = 'ntid' | ||
srcidx = '{a} = {a}[{start}:{stop}:{stride}]' | ||
body.append(srcidx.format(a=a, start=start, stop=stop, | ||
stride=stride)) | ||
|
||
body.append("inner({})".format(args)) | ||
|
||
indented = ['{}{}'.format(' ' * 4, ln) for ln in body] | ||
# Finalize source | ||
concrete = source.format(args=args, body='\n'.join(indented)) | ||
# Get bytecode | ||
glbs = {'inner': cuda.jit(device=True)(func), | ||
'cuda': cuda} | ||
exec_(concrete, glbs) | ||
# Compile as CUDA kernel | ||
kernel = cuda.jit(glbs['elemwise']) | ||
return kernel | ||
|
||
|
||
_cache = WeakKeyDictionary() | ||
|
||
|
||
@functools.wraps(_make_row_wise_kernel) | ||
def _load_cache_or_make_row_wise_kernel(func, *args, **kwargs): | ||
"""Caching version of ``_make_row_wise_kernel``. | ||
""" | ||
try: | ||
return _cache[func] | ||
except KeyError: | ||
kernel = _make_row_wise_kernel(func, *args, **kwargs) | ||
_cache[func] = kernel | ||
return kernel | ||
|
||
|
||
def _mangle_user(name): | ||
"""Mangle user variable name | ||
""" | ||
return "__user_{}".format(name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
""" | ||
Test method that apply GPU kernel to a frame. | ||
""" | ||
|
||
import pytest | ||
import numpy as np | ||
|
||
from pygdf import DataFrame | ||
|
||
|
||
@pytest.mark.parametrize('nelem', [1, 2, 64, 128, 1000, 5000]) | ||
def test_df_apply_rows(nelem): | ||
def kernel(in1, in2, in3, out1, out2, extra1, extra2): | ||
for i, (x, y, z) in enumerate(zip(in1, in2, in3)): | ||
out1[i] = extra2 * x - extra1 * y | ||
out2[i] = y - extra1 * z | ||
|
||
df = DataFrame() | ||
df['in1'] = in1 = np.arange(nelem) | ||
df['in2'] = in2 = np.arange(nelem) | ||
df['in3'] = in3 = np.arange(nelem) | ||
|
||
extra1 = 2.3 | ||
extra2 = 3.4 | ||
|
||
expect_out1 = extra2 * in1 - extra1 * in2 | ||
expect_out2 = in2 - extra1 * in3 | ||
|
||
outdf = df.apply_rows(kernel, | ||
incols=['in1', 'in2', 'in3'], | ||
outcols=dict(out1=np.float64, out2=np.float64), | ||
kwargs=dict(extra1=extra1, extra2=extra2)) | ||
|
||
got_out1 = outdf['out1'].to_array() | ||
got_out2 = outdf['out2'].to_array() | ||
|
||
np.testing.assert_array_almost_equal(got_out1, expect_out1) | ||
np.testing.assert_array_almost_equal(got_out2, expect_out2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters