Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C++ refactoring: ak.unzip #1354

Merged
merged 2 commits into from Mar 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
126 changes: 59 additions & 67 deletions src/awkward/_v2/operations/structure/ak_unzip.py
Expand Up @@ -6,70 +6,62 @@


def unzip(array, highlevel=True, behavior=None):
raise ak._v2._util.error(NotImplementedError)

# """
# Args:
# array: Array to unzip into individual fields.
# highlevel (bool): If True, return an #ak.Array; otherwise, return
# a low-level #ak.layout.Content subclass.
# behavior (None or dict): Custom #ak.behavior for the output array, if
# high-level.

# If the `array` contains tuples or records, this operation splits them
# into a Python tuple of arrays, one for each field.

# If the `array` does not contain tuples or records, the single `array`
# is placed in a length 1 Python tuple.

# For example,

# >>> array = ak.Array([{"x": 1.1, "y": [1]},
# ... {"x": 2.2, "y": [2, 2]},
# ... {"x": 3.3, "y": [3, 3, 3]}])
# >>> x, y = ak.unzip(array)
# >>> x
# <Array [1.1, 2.2, 3.3] type='3 * float64'>
# >>> y
# <Array [[1], [2, 2], [3, 3, 3]] type='3 * var * int64'>
# """
# behavior = ak._util.behaviorof(array, behavior=behavior)
# layout = ak.operations.convert.to_layout(array, allow_record=True, allow_other=False)
# fields = ak.operations.describe.fields(layout)

### FIXME: In v2, you can use return_array=False in Content.recursively_apply
### to perform an action on the whole array but not return a result.
### No need for (brittle) specialized code like the following.

# def check_for_union(layout):
# if isinstance(layout, ak.partition.PartitionedArray):
# for x in layout.partitions:
# check_for_union(x)

# elif isinstance(layout, ak.layout.RecordArray):
# pass # don't descend into nested records

# elif isinstance(layout, ak.layout.Record):
# pass # don't descend into nested records

# elif isinstance(
# layout,
# (
# ak.layout.UnionArray8_32,
# ak.layout.UnionArray8_U32,
# ak.layout.UnionArray8_64,
# ),
# ):
# for content in layout.contents:
# if set(ak.operations.describe.fields(content)) != set(fields):
# raise ak._v2._util.error(ValueError("union of different sets of fields, cannot ak.unzip"))

# elif hasattr(layout, "content"):
# check_for_union(layout.content)

# check_for_union(layout)

# if len(fields) == 0:
# return (ak._util.maybe_wrap(layout, behavior, highlevel),)
# else:
# return tuple(ak._util.maybe_wrap(layout[n], behavior, highlevel) for n in fields)
"""
Args:
array: Array to unzip into individual fields.
highlevel (bool): If True, return an #ak.Array; otherwise, return
a low-level #ak.layout.Content subclass.
behavior (None or dict): Custom #ak.behavior for the output array, if
high-level.

If the `array` contains tuples or records, this operation splits them
into a Python tuple of arrays, one for each field.

If the `array` does not contain tuples or records, the single `array`
is placed in a length 1 Python tuple.

For example,

>>> array = ak.Array([{"x": 1.1, "y": [1]},
... {"x": 2.2, "y": [2, 2]},
... {"x": 3.3, "y": [3, 3, 3]}])
>>> x, y = ak.unzip(array)
>>> x
<Array [1.1, 2.2, 3.3] type='3 * float64'>
>>> y
<Array [[1], [2, 2], [3, 3, 3]] type='3 * var * int64'>
"""
with ak._v2._util.OperationErrorContext(
"ak._v2.unzip",
dict(array=array, highlevel=highlevel, behavior=behavior),
):
return _impl(array, highlevel, behavior)


def _impl(array, highlevel, behavior):
behavior = ak._v2._util.behavior_of(array, behavior=behavior)
layout = ak._v2.operations.convert.to_layout(
array, allow_record=True, allow_other=False
)
fields = ak._v2.operations.describe.fields(layout)

def check_for_union(layout, **kwargs):
if isinstance(layout, (ak._v2.contents.RecordArray, ak._v2.Record)):
pass # don't descend into nested records

elif isinstance(layout, ak._v2.contents.UnionArray):
for content in layout.contents:
if set(ak.operations.describe.fields(content)) != set(fields):
raise ak._v2._util.error(
ValueError("union of different sets of fields, cannot ak.unzip")
)

elif hasattr(layout, "content"):
check_for_union(layout.content)

layout.recursively_apply(check_for_union, return_array=False)

if len(fields) == 0:
return (ak._v2._util.wrap(layout, behavior, highlevel),)
else:
return tuple(ak._v2._util.wrap(layout[n], behavior, highlevel) for n in fields)
59 changes: 59 additions & 0 deletions tests/v2/test_0898-unzip-heterogeneous-records.py
@@ -0,0 +1,59 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE


import pytest # noqa: F401
import numpy as np # noqa: F401
import awkward as ak # noqa: F401

to_list = ak._v2.operations.convert.to_list


def test():
a = ak._v2.Array([{"this": 100}])
b = ak._v2.Array([{"this": 90, "that": 100}])
c = ak._v2.operations.structure.concatenate((a, b))

with pytest.raises(ValueError):
ak._v2.operations.structure.unzip(c)

a = ak._v2.Array([{"this": 100}])
b = ak._v2.Array([{"this": 90}])
c = ak._v2.operations.structure.concatenate((a, b))

(tmp,) = ak._v2.operations.structure.unzip(c)

assert tmp.tolist() == [100, 90]


def test_zip_and_unzip():
x = ak._v2.Array([[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]])
y = ak._v2.Array([1.1, 2.2, 3.3, 4.4, 5.5])

one = ak._v2.operations.structure.zip({"x": x, "y": y})
two = ak._v2.operations.structure.zip({"x": x, "y": y}, depth_limit=1)
xx, yy = ak._v2.operations.structure.unzip(two)
assert isinstance(one.layout, ak._v2.contents.Content)
assert isinstance(two.layout, ak._v2.contents.Content)
assert isinstance(xx.layout, ak._v2.contents.Content)
assert isinstance(yy.layout, ak._v2.contents.Content)
assert to_list(one) == [
[{"x": 1, "y": 1.1}, {"x": 2, "y": 1.1}, {"x": 3, "y": 1.1}],
[],
[{"x": 4, "y": 3.3}, {"x": 5, "y": 3.3}],
[{"x": 6, "y": 4.4}],
[
{"x": 7, "y": 5.5},
{"x": 8, "y": 5.5},
{"x": 9, "y": 5.5},
{"x": 10, "y": 5.5},
],
]
assert to_list(two) == [
{"x": [1, 2, 3], "y": 1.1},
{"x": [], "y": 2.2},
{"x": [4, 5], "y": 3.3},
{"x": [6], "y": 4.4},
{"x": [7, 8, 9, 10], "y": 5.5},
]
assert to_list(xx) == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]]
assert to_list(yy) == [1.1, 2.2, 3.3, 4.4, 5.5]