Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support NumExpr and add a 'broadcast_arrays' function. #119

Merged
merged 4 commits into from
Feb 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.117
0.1.119
2 changes: 2 additions & 0 deletions awkward1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from awkward1.highlevel import Record
from awkward1.highlevel import FillableArray

from awkward1._numexpr import evaluate as numexpr

from awkward1.operations.convert import *
from awkward1.operations.describe import *
from awkward1.operations.structure import *
Expand Down
91 changes: 91 additions & 0 deletions awkward1/_numexpr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE

from __future__ import absolute_import

import sys

import awkward1.layout
import awkward1.operations.convert
import awkward1._util

def getArguments(names, local_dict=None, global_dict=None):
"""Replaces numexpr.necompiler.getArguments to avoid calling numpy.asarray on Awkward Arrays."""
call_frame = sys._getframe(2)

clear_local_dict = False
if local_dict is None:
local_dict = call_frame.f_locals
clear_local_dict = True
try:
frame_globals = call_frame.f_globals
if global_dict is None:
global_dict = frame_globals

# If `call_frame` is the top frame of the interpreter we can't clear its
# `local_dict`, because it is actually the `global_dict`.
clear_local_dict = clear_local_dict and not frame_globals is local_dict

arguments = []
for name in names:
try:
a = local_dict[name]
except KeyError:
a = global_dict[name]
arguments.append(a) # <--- This is different from NumExpr: don't call numpy.asarray(a)
finally:
# If we generated local_dict via an explicit reference to f_locals,
# clear the dict to prevent creating extra ref counts in the caller's scope
# See https://github.com/pydata/numexpr/issues/310
if clear_local_dict:
local_dict.clear()

return arguments

def evaluate(expression, local_dict=None, global_dict=None, order="K", casting="safe", **kwargs):
import numexpr

context = numexpr.necompiler.getContext(kwargs, frame_depth=1)
expr_key = (expression, tuple(sorted(context.items())))
if expr_key not in numexpr.necompiler._names_cache:
numexpr.necompiler._names_cache[expr_key] = numexpr.necompiler.getExprNames(expression, context)
names, ex_uses_vml = numexpr.necompiler._names_cache[expr_key]
arguments = getArguments(names, local_dict, global_dict)

arrays = [awkward1.operations.convert.tolayout(x, allowrecord=True, allowother=True) for x in arguments]

def getfunction(inputs):
if all(isinstance(x, awkward1.layout.NumpyArray) or not isinstance(x, awkward1.layout.Content) for x in inputs):
return lambda depth: (awkward1.layout.NumpyArray(numexpr.evaluate(expression, dict(zip(names, inputs)), {}, order=order, casting=casting, **kwargs)),)
else:
return None

out = awkward1._util.broadcast_and_apply(arrays, getfunction)
assert isinstance(out, tuple) and len(out) == 1
return awkward1._util.wrap(out[0], awkward1._util.behaviorof(arrays))

evaluate.evaluate = evaluate

def re_evaluate(local_dict=None):
import numexpr

try:
compiled_ex = numexpr.necompiler._numexpr_last["ex"]
except KeyError:
raise RuntimeError("not a previous evaluate() execution found")
names = numexpr.necompiler._numexpr_last["argnames"]
arguments = getArguments(names, local_dict)

arrays = [awkward1.operations.convert.tolayout(x, allowrecord=True, allowother=True) for x in arguments]

def getfunction(inputs):
if all(isinstance(x, awkward1.layout.NumpyArray) or not isinstance(x, awkward1.layout.Content) for x in inputs):

return lambda depth: (awkward1.layout.NumpyArray(numexpr.re_evaluate(dict(zip(names, inputs)))),)

return None

out = awkward1._util.broadcast_and_apply(arrays, getfunction)
assert isinstance(out, tuple) and len(out) == 1
return awkward1._util.wrap(out[0], awkward1._util.behaviorof(arrays))

evaluate.re_evaluate = re_evaluate
17 changes: 13 additions & 4 deletions awkward1/_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,23 +44,32 @@ def array_ufunc(ufunc, method, inputs, kwargs, behavior):

inputs = [awkward1.operations.convert.tolayout(x, allowrecord=True, allowother=True) for x in inputs]

def adjust(custom, inputs, kwargs):
tmp = custom(*inputs, **kwargs)
if not isinstance(tmp, tuple):
return (tmp,)
else:
return tmp

def getfunction(inputs):
signature = (ufunc,) + tuple(x.parameters.get("__record__") if isinstance(x, awkward1.layout.Content) else type(x) for x in inputs)
custom = awkward1._util.overload(behavior, signature)
if custom is not None:
return lambda depth: custom(*inputs, **kwargs)
return lambda depth: adjust(custom, inputs, kwargs)

signature = (ufunc,) + tuple(x.parameters.get("__array__") if isinstance(x, awkward1.layout.Content) else type(x) for x in inputs)
custom = awkward1._util.overload(behavior, signature)
if custom is not None:
return lambda depth: custom(*inputs, **kwargs)
return lambda depth: adjust(custom, inputs, kwargs)

if all(isinstance(x, awkward1.layout.NumpyArray) or not isinstance(x, awkward1.layout.Content) for x in inputs):
return lambda depth: awkward1.layout.NumpyArray(getattr(ufunc, method)(*inputs, **kwargs))
return lambda depth: (awkward1.layout.NumpyArray(getattr(ufunc, method)(*inputs, **kwargs)),)

return None

return awkward1._util.wrap(awkward1._util.broadcast_and_apply(inputs, getfunction), behavior)
out = awkward1._util.broadcast_and_apply(inputs, getfunction)
assert isinstance(out, tuple) and len(out) == 1
return awkward1._util.wrap(out[0], behavior)

try:
NDArrayOperatorsMixin = numpy.lib.mixins.NDArrayOperatorsMixin
Expand Down
41 changes: 30 additions & 11 deletions awkward1/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,24 +215,29 @@ def apply(inputs, depth):

tags = numpy.empty(length, dtype=numpy.int8)
index = numpy.empty(length, dtype=numpy.int64)
contents = []
outcontents = []
for tag, combo in enumerate(numpy.unique(combos)):
mask = (combos == combo)
tags[mask] = tag
index[mask] = numpy.arange(numpy.count_nonzero(mask))
nextinputs = []
numoutputs = None
for i, x in enumerate(inputs):
if isinstance(x, uniontypes):
nextinputs.append(x[mask].project(combo[str(i)]))
elif isinstance(x, awkward1.layout.Content):
nextinputs.append(x[mask])
else:
nextinputs.append(x)
contents.append(apply(nextinputs, depth))
outcontents.append(apply(nextinputs, depth))
assert isinstance(outcontents[-1], tuple)
if numoutputs is not None:
assert numoutputs == len(outcontents[-1])
numoutputs = len(outcontents[-1])

tags = awkward1.layout.Index8(tags)
index = awkward1.layout.Index64(index)
return awkward1.layout.UnionArray8_64(tags, index, contents).simplify()
return tuple(awkward1.layout.UnionArray8_64(tags, index, [x[i] for x in outcontents]).simplify() for i in range(numoutputs))

elif any(isinstance(x, optiontypes) for x in inputs):
mask = None
Expand Down Expand Up @@ -260,7 +265,9 @@ def apply(inputs, depth):
else:
nextinputs.append(awkward1.layout.IndexedOptionArray64(nextindex, x).project(nextmask))

return awkward1.layout.IndexedOptionArray64(index, apply(nextinputs, depth)).simplify()
outcontent = apply(nextinputs, depth)
assert isinstance(outcontent, tuple)
return tuple(awkward1.layout.IndexedOptionArray64(index, x).simplify() for x in outcontent)

elif any(isinstance(x, listtypes) for x in inputs):
if all(isinstance(x, awkward1.layout.RegularArray) or not isinstance(x, listtypes) for x in inputs):
Expand All @@ -280,7 +287,9 @@ def apply(inputs, depth):
raise ValueError("cannot broadcast RegularArray of size {0} with RegularArray of size {1}".format(x.size, maxsize))
else:
nextinputs.append(x)
return awkward1.layout.RegularArray(apply(nextinputs, depth + 1), maxsize)
outcontent = apply(nextinputs, depth + 1)
assert isinstance(outcontent, tuple)
return tuple(awkward1.layout.RegularArray(x, maxsize) for x in outcontent)

else:
for x in inputs:
Expand All @@ -297,7 +306,10 @@ def apply(inputs, depth):
nextinputs.append(awkward1.layout.RegularArray(x, 1).broadcast_tooffsets64(offsets).content)
else:
nextinputs.append(x)
return awkward1.layout.ListOffsetArray64(offsets, apply(nextinputs, depth + 1))

outcontent = apply(nextinputs, depth + 1)
assert isinstance(outcontent, tuple)
return tuple(awkward1.layout.ListOffsetArray64(offsets, x) for x in outcontent)

elif any(isinstance(x, recordtypes) for x in inputs):
keys = None
Expand All @@ -319,16 +331,23 @@ def apply(inputs, depth):
if len(keys) == 0:
return awkward1.layout.RecordArray(length, istuple)
else:
contents = []
outcontents = []
numoutputs = None
for key in keys:
contents.append(apply([x if not isinstance(x, recordtypes) else x[key] for x in inputs], depth))
return awkward1.layout.RecordArray(contents, keys)
outcontents.append(apply([x if not isinstance(x, recordtypes) else x[key] for x in inputs], depth))
assert isinstance(outcontents[-1], tuple)
if numoutputs is not None:
assert numoutputs == len(outcontents[-1])
numoutputs = len(outcontents[-1])
return tuple(awkward1.layout.RecordArray([x[i] for x in outcontents], keys) for i in range(numoutputs))

else:
raise ValueError("cannot broadcast: {0}".format(", ".join(type(x) for x in inputs)))
raise ValueError("cannot broadcast: {0}".format(", ".join(repr(type(x)) for x in inputs)))

isscalar = []
return broadcast_unpack(apply(broadcast_pack(inputs, isscalar), 0), isscalar)
out = apply(broadcast_pack(inputs, isscalar), 0)
assert isinstance(out, tuple)
return tuple(broadcast_unpack(x, isscalar) for x in out)

def broadcast_pack(inputs, isscalar):
maxlen = -1
Expand Down
19 changes: 17 additions & 2 deletions awkward1/operations/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ def getfunction(inputs):
if isinstance(base, awkward1.layout.RecordArray):
if not isinstance(what, awkward1.layout.Content):
what = awkward1.layout.NumpyArray(numpy.lib.stride_tricks.as_strided([what], shape=(len(base),), strides=(0,)))
return lambda depth: base.setitem_field(where, what)
return lambda depth: (base.setitem_field(where, what),)
else:
return None

out = awkward1._util.broadcast_and_apply([base, what], getfunction)
return awkward1._util.wrap(out, behavior=awkward1._util.behaviorof(base, what))
assert isinstance(out, tuple) and len(out) == 1
return awkward1._util.wrap(out[0], behavior=awkward1._util.behaviorof(base, what))

def isna(array):
import awkward1.highlevel
Expand Down Expand Up @@ -151,6 +152,20 @@ def concatenate(arrays, axis=0, mergebool=True):

return awkward1._util.wrap(out, behavior=awkward1._util.behaviorof(*arrays))

@awkward1._numpy.implements(numpy.broadcast_arrays)
def broadcast_arrays(*arrays):
inputs = [awkward1.operations.convert.tolayout(x, allowrecord=True, allowother=False) for x in arrays]

def getfunction(inputs):
if all(isinstance(x, awkward1.layout.NumpyArray) for x in inputs):
return lambda depth: tuple(inputs)
else:
return None

out = awkward1._util.broadcast_and_apply(inputs, getfunction)
assert isinstance(out, tuple)
return [awkward1._util.wrap(x, awkward1._util.behaviorof(arrays)) for x in out]

@awkward1._numpy.implements(numpy.where)
def where(condition, *args, **kwargs):
import awkward1.highlevel
Expand Down
1 change: 1 addition & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pytest>=3.9
numba>=0.46.0
pandas>=0.24.0
numexpr
25 changes: 25 additions & 0 deletions tests/test_PR119_numexpr_and_broadcast_arrays.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE

from __future__ import absolute_import

import sys

import pytest
import numpy

import awkward1

def test_numexpr():
a = awkward1.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]])
b = awkward1.Array([100, 200, 300])
assert awkward1.tolist(awkward1.numexpr.evaluate("a + b")) == [[101.1, 102.2, 103.3], [], [304.4, 305.5]]
a = [1, 2, 3]
assert awkward1.tolist(awkward1.numexpr.re_evaluate()) == [101, 202, 303]

def test_broadcast_arrays():
a = awkward1.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]])
b = awkward1.Array([100, 200, 300])

out = awkward1.broadcast_arrays(a, b)
assert awkward1.tolist(out[0]) == [[1.1, 2.2, 3.3], [], [4.4, 5.5]]
assert awkward1.tolist(out[1]) == [[100, 100, 100], [], [300, 300]]