Skip to content

Commit

Permalink
Only 'simplify'ed option-type and union-type arrays are now considere…
Browse files Browse the repository at this point in the history
…d valid. (#729)
  • Loading branch information
jpivarski committed Feb 11, 2021
1 parent 0d1c861 commit 4ad1562
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 46 deletions.
9 changes: 8 additions & 1 deletion src/libawkward/Slice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,14 @@ namespace awkward {
const SliceItemPtr& content)
: index_(index)
, originalmask_(originalmask)
, content_(content) { }
, content_(content) {
if (dynamic_cast<SliceMissingOf<int64_t>*>(content.get())) {
throw std::runtime_error(
std::string("constructing SliceMissing directly inside of SliceMissing; "
"is the array used as a slice valid (ak.validity_error(slice_array))?")
+ FILENAME(__LINE__));
}
}

template <typename T>
int64_t
Expand Down
11 changes: 11 additions & 0 deletions src/libawkward/array/BitMaskedArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,17 @@ namespace awkward {
+ std::string("): ") + std::string("len(content) < length")
+ FILENAME(__LINE__));
}
else if (dynamic_cast<BitMaskedArray*>(content_.get()) ||
dynamic_cast<ByteMaskedArray*>(content_.get()) ||
dynamic_cast<IndexedArray32*>(content_.get()) ||
dynamic_cast<IndexedArrayU32*>(content_.get()) ||
dynamic_cast<IndexedArray64*>(content_.get()) ||
dynamic_cast<IndexedOptionArray32*>(content_.get()) ||
dynamic_cast<IndexedOptionArray64*>(content_.get()) ||
dynamic_cast<UnmaskedArray*>(content_.get())) {
return classname() + " contains " + content_.get()->classname() +
", the operation that made it might have forgotten to call 'simplify_optiontype()'";
}
else {
return content_.get()->validityerror(path + std::string(".content"));
}
Expand Down
29 changes: 21 additions & 8 deletions src/libawkward/array/ByteMaskedArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,17 @@ namespace awkward {
+ std::string("): ") + std::string("len(content) < len(mask)")
+ FILENAME(__LINE__));
}
else if (dynamic_cast<BitMaskedArray*>(content_.get()) ||
dynamic_cast<ByteMaskedArray*>(content_.get()) ||
dynamic_cast<IndexedArray32*>(content_.get()) ||
dynamic_cast<IndexedArrayU32*>(content_.get()) ||
dynamic_cast<IndexedArray64*>(content_.get()) ||
dynamic_cast<IndexedOptionArray32*>(content_.get()) ||
dynamic_cast<IndexedOptionArray64*>(content_.get()) ||
dynamic_cast<UnmaskedArray*>(content_.get())) {
return classname() + " contains " + content_.get()->classname() +
", the operation that made it might have forgotten to call 'simplify_optiontype()'";
}
else {
return content_.get()->validityerror(path + std::string(".content"));
}
Expand Down Expand Up @@ -1331,14 +1342,15 @@ namespace awkward {
outindex.length());
util::handle_error(err3, classname(), identities_.get());

IndexedOptionArray64 tmp(Identities::none(),
parameters_,
outindex,
raw->content());
return std::make_shared<ListOffsetArray64>(
raw->identities(),
raw->parameters(),
outoffsets,
std::make_shared<IndexedOptionArray64>(Identities::none(),
parameters_,
outindex,
raw->content()));
tmp.simplify_optiontype());
}
else {
throw std::runtime_error(
Expand Down Expand Up @@ -1416,14 +1428,15 @@ namespace awkward {
outindex.length());
util::handle_error(err3, classname(), identities_.get());

IndexedOptionArray64 tmp(Identities::none(),
util::Parameters(),
outindex,
raw->content());
return std::make_shared<ListOffsetArray64>(
raw->identities(),
raw->parameters(),
outoffsets,
std::make_shared<IndexedOptionArray64>(Identities::none(),
util::Parameters(),
outindex,
raw->content()));
tmp.simplify_optiontype());
}
else {
throw std::runtime_error(
Expand Down
59 changes: 35 additions & 24 deletions src/libawkward/array/IndexedArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1533,15 +1533,26 @@ namespace awkward {
index_.length(),
content_.get()->length(),
ISOPTION);
if (err.str == nullptr) {
return content_.get()->validityerror(path + std::string(".content"));
}
else {
if (err.str != nullptr) {
return (std::string("at ") + path + std::string(" (") + classname()
+ std::string("): ") + std::string(err.str)
+ std::string(" at i=") + std::to_string(err.identity)
+ std::string(err.filename == nullptr ? "" : err.filename));
}
else if (dynamic_cast<BitMaskedArray*>(content_.get()) ||
dynamic_cast<ByteMaskedArray*>(content_.get()) ||
dynamic_cast<IndexedArray32*>(content_.get()) ||
dynamic_cast<IndexedArrayU32*>(content_.get()) ||
dynamic_cast<IndexedArray64*>(content_.get()) ||
dynamic_cast<IndexedOptionArray32*>(content_.get()) ||
dynamic_cast<IndexedOptionArray64*>(content_.get()) ||
dynamic_cast<UnmaskedArray*>(content_.get())) {
return classname() + " contains " + content_.get()->classname() +
", the operation that made it might have forgotten to call 'simplify_optiontype()'";
}
else {
return content_.get()->validityerror(path + std::string(".content"));
}
}

template <typename T, bool ISOPTION>
Expand Down Expand Up @@ -2417,11 +2428,11 @@ namespace awkward {
next_length);
util::handle_error(err3, classname(), identities_.get());

out = std::make_shared<IndexedArrayOf<int64_t, ISOPTION>>(
Identities::none(),
parameters_,
nextoutindex,
out);
IndexedArrayOf<int64_t, ISOPTION> tmp(Identities::none(),
parameters_,
nextoutindex,
out);
out = tmp.simplify_optiontype();

if (inject_nones) {
out = std::make_shared<RegularArray>(Identities::none(),
Expand Down Expand Up @@ -2456,15 +2467,15 @@ namespace awkward {
outindex.length());
util::handle_error(err4, classname(), identities_.get());

IndexedArrayOf<int64_t, ISOPTION> tmp(Identities::none(),
parameters_,
outindex,
raw->content());
return std::make_shared<ListOffsetArray64>(
raw->identities(),
raw->parameters(),
outoffsets,
std::make_shared<IndexedArrayOf<int64_t, ISOPTION>>(
Identities::none(),
parameters_,
outindex,
raw->content()));
tmp.simplify_optiontype());
}
if (IndexedArrayOf<int64_t, ISOPTION>* raw =
dynamic_cast<IndexedArrayOf<int64_t, ISOPTION>*>(out.get())) {
Expand Down Expand Up @@ -2549,11 +2560,11 @@ namespace awkward {
next_length);
util::handle_error(err3, classname(), identities_.get());

out = std::make_shared<IndexedArrayOf<int64_t, ISOPTION>>(
Identities::none(),
util::Parameters(),
nextoutindex,
out);
IndexedArrayOf<int64_t, ISOPTION> tmp(Identities::none(),
util::Parameters(),
nextoutindex,
out);
out = tmp.simplify_optiontype();

if (inject_nones) {
out = std::make_shared<RegularArray>(Identities::none(),
Expand Down Expand Up @@ -2588,15 +2599,15 @@ namespace awkward {
outindex.length());
util::handle_error(err4, classname(), identities_.get());

IndexedArrayOf<int64_t, ISOPTION> tmp(Identities::none(),
util::Parameters(),
outindex,
raw->content());
return std::make_shared<ListOffsetArray64>(
raw->identities(),
raw->parameters(),
outoffsets,
std::make_shared<IndexedArrayOf<int64_t, ISOPTION>>(
Identities::none(),
util::Parameters(),
outindex,
raw->content()));
tmp.simplify_optiontype());
}
if (IndexedArrayOf<int64_t, ISOPTION>* raw =
dynamic_cast<IndexedArrayOf<int64_t, ISOPTION>*>(out.get())) {
Expand Down
8 changes: 8 additions & 0 deletions src/libawkward/array/UnionArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1454,6 +1454,14 @@ namespace awkward {
if (paramcheck != std::string("")) {
return paramcheck;
}
for (int64_t i = 0; i < numcontents(); i++) {
if (dynamic_cast<UnionArray8_32*>(content(i).get()) ||
dynamic_cast<UnionArray8_U32*>(content(i).get()) ||
dynamic_cast<UnionArray8_64*>(content(i).get())) {
return classname() + " contains " + content(i).get()->classname() +
", the operation that made it might have forgotten to call 'simplify_uniontype()'";
}
}
if (index_.length() < tags_.length()) {
return (std::string("at ") + path + std::string(" (") + classname()
+ std::string("): ") + std::string("len(index) < len(tags)")
Expand Down
33 changes: 22 additions & 11 deletions src/libawkward/array/UnmaskedArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,20 @@ namespace awkward {
if (paramcheck != std::string("")) {
return paramcheck;
}
return content_.get()->validityerror(path + std::string(".content"));
if (dynamic_cast<BitMaskedArray*>(content_.get()) ||
dynamic_cast<ByteMaskedArray*>(content_.get()) ||
dynamic_cast<IndexedArray32*>(content_.get()) ||
dynamic_cast<IndexedArrayU32*>(content_.get()) ||
dynamic_cast<IndexedArray64*>(content_.get()) ||
dynamic_cast<IndexedOptionArray32*>(content_.get()) ||
dynamic_cast<IndexedOptionArray64*>(content_.get()) ||
dynamic_cast<UnmaskedArray*>(content_.get())) {
return classname() + " contains " + content_.get()->classname() +
", the operation that made it might have forgotten to call 'simplify_optiontype()'";
}
else {
return content_.get()->validityerror(path + std::string(".content"));
}
}

const ContentPtr
Expand Down Expand Up @@ -968,14 +981,13 @@ namespace awkward {
stable,
keepdims);
if (RegularArray* raw = dynamic_cast<RegularArray*>(out.get())) {
std::shared_ptr<Content> wrapped = std::make_shared<UnmaskedArray>(
Identities::none(),
parameters_,
raw->content());
UnmaskedArray tmp(Identities::none(),
parameters_,
raw->content());
return std::make_shared<RegularArray>(
raw->identities(),
raw->parameters(),
wrapped,
tmp.simplify_optiontype(),
raw->size(),
length());
}
Expand Down Expand Up @@ -1003,14 +1015,13 @@ namespace awkward {
stable,
keepdims);
if (RegularArray* raw = dynamic_cast<RegularArray*>(out.get())) {
std::shared_ptr<Content> wrapped = std::make_shared<UnmaskedArray>(
Identities::none(),
parameters_,
raw->content());
UnmaskedArray tmp(Identities::none(),
parameters_,
raw->content());
return std::make_shared<RegularArray>(
raw->identities(),
raw->parameters(),
wrapped,
tmp.simplify_optiontype(),
raw->size(),
length());
}
Expand Down
3 changes: 2 additions & 1 deletion tests/test_0118-numba-cpointers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,8 @@ def test_IndexedArray_deep_at():
indexedarray2 = ak.layout.IndexedArray64(index2, indexedarray1)
index3 = ak.layout.Index32(np.array([1, 2], dtype=np.int32))
indexedarray3 = ak.layout.IndexedArray32(index3, indexedarray2)
array = ak.Array(indexedarray3, check_valid=True)
# This is not a valid array (IndexedArray inside IndexedArray), but instructive! :)
array = ak.Array(indexedarray3, check_valid=False)

@numba.njit
def f1(x):
Expand Down
24 changes: 23 additions & 1 deletion tests/test_0723-ensure-that-jagged-slice-fits-array-length.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import awkward as ak # noqa: F401


def test():
def test_first_issue():
a = ak.layout.NumpyArray(np.arange(122))
idx = ak.layout.Index64([0, 2, 4, 6, 8, 10, 12])
a = ak.layout.ListOffsetArray64(idx, a)
Expand All @@ -26,3 +26,25 @@ def test():
[],
[],
]


def test_second_issue():
a = ak.layout.NumpyArray(np.arange(122))
idx = ak.layout.Index64([0, 2, 4, 6, 8, 10, 12])
a = ak.layout.ListOffsetArray64(idx, a)
idx = ak.layout.Index64([0, -1, 1, 2, -1, 3, 4, 5])
a = ak.layout.IndexedOptionArray64(idx, a)
a = ak.Array(a)
assert ak.is_valid(a)

assert ak.is_valid(ak.argsort(a))
assert a[ak.argsort(a)].tolist() == [
[0, 1],
None,
[2, 3],
[4, 5],
None,
[6, 7],
[8, 9],
[10, 11],
]

0 comments on commit 4ad1562

Please sign in to comment.