/
akstr_is_in.py
81 lines (61 loc) · 2.92 KB
/
akstr_is_in.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
from __future__ import annotations
import awkward as ak
from awkward._backends.typetracer import TypeTracerBackend
from awkward._dispatch import high_level_function
from awkward._layout import HighLevelContext, ensure_same_backend
__all__ = ("is_in",)
typetracer = TypeTracerBackend.instance()
@high_level_function(module="ak.str")
def is_in(
array, value_set, *, skip_nones=False, highlevel=True, behavior=None, attrs=None
):
"""
Args:
array: Array-like data (anything #ak.to_layout recognizes).
value_set: Array-like data (anything #ak.to_layout recognizes), set of
values to search for in `array`.
skip_nones (bool): If True, None values in `array` are not matched
against `value_set`; otherwise, None is considered a legal value.
highlevel (bool): If True, return an #ak.Array; otherwise, return
a low-level #ak.contents.Content subclass.
behavior (None or dict): Custom #ak.behavior for the output array, if
high-level.
attrs (None or dict): Custom attributes for the output array, if
high-level.
Returns True for each string in `array` if it matches any pattern in
`value_set`; otherwise, returns False.
Note: this function does not raise an error if the `array` does not
contain any string or bytestring data.
Requires the pyarrow library and calls
[pyarrow.compute.is_in](https://arrow.apache.org/docs/python/generated/pyarrow.compute.is_in.html).
"""
# Dispatch
yield (array, value_set)
# Implementation
return _impl(array, value_set, skip_nones, highlevel, behavior, attrs)
def _is_maybe_optional_list_of_string(layout):
if layout.is_list and layout.parameter("__array__") in {"string", "bytestring"}:
return True
elif layout.is_option or layout.is_indexed:
return _is_maybe_optional_list_of_string(layout.content)
else:
return False
def _impl(array, value_set, skip_nones, highlevel, behavior, attrs):
from awkward._connect.pyarrow import import_pyarrow_compute
from awkward.operations.str import _apply_through_arrow
pc = import_pyarrow_compute("ak.str.is_in")
with HighLevelContext(behavior=behavior, attrs=attrs) as ctx:
layout, value_set_layout = ensure_same_backend(
ctx.unwrap(array, allow_record=False),
ctx.unwrap(value_set, allow_record=False),
)
if not _is_maybe_optional_list_of_string(value_set_layout):
raise TypeError("`value_set` must be 1D array of (possibly missing) strings")
def apply(layout, **kwargs):
if _is_maybe_optional_list_of_string(layout):
return _apply_through_arrow(
pc.is_in, layout, value_set_layout, skip_nulls=skip_nones
)
out = ak._do.recursively_apply(layout, apply)
return ctx.wrap(out, highlevel=highlevel)