Skip to content

Commit

Permalink
C++ refactoring: ak.unzip (#1354)
Browse files Browse the repository at this point in the history
* ak.unzip and testing

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ioanaif and pre-commit-ci[bot] committed Mar 10, 2022
1 parent d036ab1 commit 249684d
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 67 deletions.
126 changes: 59 additions & 67 deletions src/awkward/_v2/operations/structure/ak_unzip.py
Expand Up @@ -6,70 +6,62 @@


def unzip(array, highlevel=True, behavior=None):
raise ak._v2._util.error(NotImplementedError)

# """
# Args:
# array: Array to unzip into individual fields.
# highlevel (bool): If True, return an #ak.Array; otherwise, return
# a low-level #ak.layout.Content subclass.
# behavior (None or dict): Custom #ak.behavior for the output array, if
# high-level.

# If the `array` contains tuples or records, this operation splits them
# into a Python tuple of arrays, one for each field.

# If the `array` does not contain tuples or records, the single `array`
# is placed in a length 1 Python tuple.

# For example,

# >>> array = ak.Array([{"x": 1.1, "y": [1]},
# ... {"x": 2.2, "y": [2, 2]},
# ... {"x": 3.3, "y": [3, 3, 3]}])
# >>> x, y = ak.unzip(array)
# >>> x
# <Array [1.1, 2.2, 3.3] type='3 * float64'>
# >>> y
# <Array [[1], [2, 2], [3, 3, 3]] type='3 * var * int64'>
# """
# behavior = ak._util.behaviorof(array, behavior=behavior)
# layout = ak.operations.convert.to_layout(array, allow_record=True, allow_other=False)
# fields = ak.operations.describe.fields(layout)

### FIXME: In v2, you can use return_array=False in Content.recursively_apply
### to perform an action on the whole array but not return a result.
### No need for (brittle) specialized code like the following.

# def check_for_union(layout):
# if isinstance(layout, ak.partition.PartitionedArray):
# for x in layout.partitions:
# check_for_union(x)

# elif isinstance(layout, ak.layout.RecordArray):
# pass # don't descend into nested records

# elif isinstance(layout, ak.layout.Record):
# pass # don't descend into nested records

# elif isinstance(
# layout,
# (
# ak.layout.UnionArray8_32,
# ak.layout.UnionArray8_U32,
# ak.layout.UnionArray8_64,
# ),
# ):
# for content in layout.contents:
# if set(ak.operations.describe.fields(content)) != set(fields):
# raise ak._v2._util.error(ValueError("union of different sets of fields, cannot ak.unzip"))

# elif hasattr(layout, "content"):
# check_for_union(layout.content)

# check_for_union(layout)

# if len(fields) == 0:
# return (ak._util.maybe_wrap(layout, behavior, highlevel),)
# else:
# return tuple(ak._util.maybe_wrap(layout[n], behavior, highlevel) for n in fields)
"""
Args:
array: Array to unzip into individual fields.
highlevel (bool): If True, return an #ak.Array; otherwise, return
a low-level #ak.layout.Content subclass.
behavior (None or dict): Custom #ak.behavior for the output array, if
high-level.
If the `array` contains tuples or records, this operation splits them
into a Python tuple of arrays, one for each field.
If the `array` does not contain tuples or records, the single `array`
is placed in a length 1 Python tuple.
For example,
>>> array = ak.Array([{"x": 1.1, "y": [1]},
... {"x": 2.2, "y": [2, 2]},
... {"x": 3.3, "y": [3, 3, 3]}])
>>> x, y = ak.unzip(array)
>>> x
<Array [1.1, 2.2, 3.3] type='3 * float64'>
>>> y
<Array [[1], [2, 2], [3, 3, 3]] type='3 * var * int64'>
"""
with ak._v2._util.OperationErrorContext(
"ak._v2.unzip",
dict(array=array, highlevel=highlevel, behavior=behavior),
):
return _impl(array, highlevel, behavior)


def _impl(array, highlevel, behavior):
behavior = ak._v2._util.behavior_of(array, behavior=behavior)
layout = ak._v2.operations.convert.to_layout(
array, allow_record=True, allow_other=False
)
fields = ak._v2.operations.describe.fields(layout)

def check_for_union(layout, **kwargs):
if isinstance(layout, (ak._v2.contents.RecordArray, ak._v2.Record)):
pass # don't descend into nested records

elif isinstance(layout, ak._v2.contents.UnionArray):
for content in layout.contents:
if set(ak.operations.describe.fields(content)) != set(fields):
raise ak._v2._util.error(
ValueError("union of different sets of fields, cannot ak.unzip")
)

elif hasattr(layout, "content"):
check_for_union(layout.content)

layout.recursively_apply(check_for_union, return_array=False)

if len(fields) == 0:
return (ak._v2._util.wrap(layout, behavior, highlevel),)
else:
return tuple(ak._v2._util.wrap(layout[n], behavior, highlevel) for n in fields)
59 changes: 59 additions & 0 deletions tests/v2/test_0898-unzip-heterogeneous-records.py
@@ -0,0 +1,59 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE


import pytest # noqa: F401
import numpy as np # noqa: F401
import awkward as ak # noqa: F401

to_list = ak._v2.operations.convert.to_list


def test():
a = ak._v2.Array([{"this": 100}])
b = ak._v2.Array([{"this": 90, "that": 100}])
c = ak._v2.operations.structure.concatenate((a, b))

with pytest.raises(ValueError):
ak._v2.operations.structure.unzip(c)

a = ak._v2.Array([{"this": 100}])
b = ak._v2.Array([{"this": 90}])
c = ak._v2.operations.structure.concatenate((a, b))

(tmp,) = ak._v2.operations.structure.unzip(c)

assert tmp.tolist() == [100, 90]


def test_zip_and_unzip():
x = ak._v2.Array([[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]])
y = ak._v2.Array([1.1, 2.2, 3.3, 4.4, 5.5])

one = ak._v2.operations.structure.zip({"x": x, "y": y})
two = ak._v2.operations.structure.zip({"x": x, "y": y}, depth_limit=1)
xx, yy = ak._v2.operations.structure.unzip(two)
assert isinstance(one.layout, ak._v2.contents.Content)
assert isinstance(two.layout, ak._v2.contents.Content)
assert isinstance(xx.layout, ak._v2.contents.Content)
assert isinstance(yy.layout, ak._v2.contents.Content)
assert to_list(one) == [
[{"x": 1, "y": 1.1}, {"x": 2, "y": 1.1}, {"x": 3, "y": 1.1}],
[],
[{"x": 4, "y": 3.3}, {"x": 5, "y": 3.3}],
[{"x": 6, "y": 4.4}],
[
{"x": 7, "y": 5.5},
{"x": 8, "y": 5.5},
{"x": 9, "y": 5.5},
{"x": 10, "y": 5.5},
],
]
assert to_list(two) == [
{"x": [1, 2, 3], "y": 1.1},
{"x": [], "y": 2.2},
{"x": [4, 5], "y": 3.3},
{"x": [6], "y": 4.4},
{"x": [7, 8, 9, 10], "y": 5.5},
]
assert to_list(xx) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]]
assert to_list(yy) == [1.1, 2.2, 3.3, 4.4, 5.5]

0 comments on commit 249684d

Please sign in to comment.