Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revised getitem operation for masked jagged indexers #322

Merged
merged 11 commits into from Jul 9, 2020
36 changes: 36 additions & 0 deletions src/libawkward/Content.cpp
Expand Up @@ -1189,6 +1189,37 @@ namespace awkward {
index.length());
}

const ContentPtr getitem_next_missing_jagged(const SliceMissing64& missing,
const Slice& tail,
const Index64& advanced,
const ContentPtr& that) {
const SliceJagged64* jagged = dynamic_cast<SliceJagged64*>(missing.content().get());
const Index64 index = missing.index();
ContentPtr content = that.get()->getitem_at_nowrap(0);
if ( content.get()->length() != index.length() ) {
throw std::invalid_argument("Contents length does not match length of masked jagged slice");
}
nsmith- marked this conversation as resolved.
Show resolved Hide resolved
Index64 outputmask(index.length());
Index64 starts(index.length());
Index64 stops(index.length());
int64_t k=0;
for(int64_t i=0; i < index.length(); ++i) {
int64_t start = jagged->offsets().getitem_at_nowrap(k);
starts.setitem_at_nowrap(i, start);
if ( index.getitem_at_nowrap(i) < 0 ) {
outputmask.setitem_at_nowrap(i, -1);
stops.setitem_at_nowrap(i, start);
} else {
outputmask.setitem_at_nowrap(i, i);
int64_t stop = jagged->offsets().getitem_at_nowrap(++k);
stops.setitem_at_nowrap(i, stop);
}
}
nsmith- marked this conversation as resolved.
Show resolved Hide resolved
ContentPtr tmp = content.get()->getitem_next_jagged(starts, stops, jagged->content(), tail);
IndexedOptionArray64 out(Identities::none(), util::Parameters(), outputmask, tmp);
return std::make_shared<RegularArray>(Identities::none(), util::Parameters(), out.simplify_optiontype(), index.length());
nsmith- marked this conversation as resolved.
Show resolved Hide resolved
}

bool check_missing_jagged_same(const ContentPtr& that,
const Index8& bytemask,
const SliceMissing64& missing) {
Expand Down Expand Up @@ -1275,6 +1306,11 @@ namespace awkward {
"with NumPy-style advanced indexing");
}

// would length ever not be 1 when the content is jagged?
if ( length() == 1 && dynamic_cast<SliceJagged64*>(missing.content().get())) {
return getitem_next_missing_jagged(missing, tail, advanced, shallow_copy());
}
nsmith- marked this conversation as resolved.
Show resolved Hide resolved

ContentPtr tmp = check_missing_jagged(shallow_copy(), missing);
nsmith- marked this conversation as resolved.
Show resolved Hide resolved
ContentPtr next = tmp.get()->getitem_next(missing.content(),
tail,
Expand Down
24 changes: 22 additions & 2 deletions src/libawkward/array/ByteMaskedArray.cpp
Expand Up @@ -1297,9 +1297,29 @@ namespace awkward {
Index64 nextcarry = pair.first;
Index64 outindex = pair.second;

if ( slicestarts.length() != length() or slicestops.length() != length() ) {
throw std::runtime_error("Slices look weird");
}
Index64 carrystarts(slicestarts.length() - numnull);
Index64 carrystops(slicestops.length() - numnull);
int64_t k=0;
for(int64_t i=0; i < length(); ++i) {
int64_t from = outindex.getitem_at_nowrap(i);
// here we could OR this with starts[i]==stops[i] to pass singletons
// as if they were nulls. We would have to hack a bit on nextcarry and outindex though
if ( from >= 0 ) {
carrystarts.setitem_at_nowrap(k, slicestarts.getitem_at_nowrap(i));
carrystops.setitem_at_nowrap(k, slicestops.getitem_at_nowrap(i));
k++;
}
}
if ( k != carrystarts.length() ) {
throw std::runtime_error("failed to carry starts and stops properly");
}
nsmith- marked this conversation as resolved.
Show resolved Hide resolved

ContentPtr next = content_.get()->carry(nextcarry);
ContentPtr out = next.get()->getitem_next_jagged(slicestarts,
slicestops,
ContentPtr out = next.get()->getitem_next_jagged(carrystarts,
carrystops,
slicecontent,
tail);
IndexedOptionArray64 out2(identities_, parameters_, outindex, out);
Expand Down
25 changes: 23 additions & 2 deletions src/libawkward/array/IndexedArray.cpp
Expand Up @@ -2281,9 +2281,29 @@ namespace awkward {
Index64 nextcarry = pair.first;
IndexOf<T> outindex = pair.second;

if ( slicestarts.length() != length() or slicestops.length() != length() ) {
throw std::runtime_error("Slices look weird");
}
Index64 carrystarts(slicestarts.length() - numnull);
Index64 carrystops(slicestops.length() - numnull);
int64_t k=0;
for(int64_t i=0; i < length(); ++i) {
int64_t from = outindex.getitem_at_nowrap(i);
// here we could OR this with starts[i]==stops[i] to pass singletons
// as if they were nulls. We would have to hack a bit on nextcarry and outindex though
if ( from >= 0 ) {
carrystarts.setitem_at_nowrap(k, slicestarts.getitem_at_nowrap(i));
carrystops.setitem_at_nowrap(k, slicestops.getitem_at_nowrap(i));
k++;
}
}
if ( k != carrystarts.length() ) {
throw std::runtime_error("failed to carry starts and stops properly");
}
nsmith- marked this conversation as resolved.
Show resolved Hide resolved

ContentPtr next = content_.get()->carry(nextcarry);
ContentPtr out = next.get()->getitem_next_jagged(slicestarts,
slicestops,
ContentPtr out = next.get()->getitem_next_jagged(carrystarts,
carrystops,
slicecontent,
tail);
IndexedArrayOf<T, ISOPTION> out2(identities_,
Expand All @@ -2303,6 +2323,7 @@ namespace awkward {
util::handle_error(err, classname(), identities_.get());

ContentPtr next = content_.get()->carry(nextcarry);
// shouldn't we carry starts and stops here as well?
return next.get()->getitem_next_jagged(slicestarts,
slicestops,
slicecontent,
nsmith- marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
20 changes: 10 additions & 10 deletions tests/test_0111-jagged-and-masked-getitem.py
Expand Up @@ -171,7 +171,7 @@ def test_double_jagged():

def test_masked_jagged():
array = awkward1.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8, 9.9]], check_valid=True)
assert awkward1.to_list(array[awkward1.Array([[-1, -2], None, [], None, [-2, 0], [], [1]], check_valid=True)]) == [[3.3, 2.2], None, [], None, [4.4, 4.4], [], [8.8]]
assert awkward1.to_list(array[awkward1.Array([[-1, -2], None, [], None, [-2, 0]], check_valid=True)]) == [[3.3, 2.2], None, [], None, [8.8, 7.7]]

def test_jagged_masked():
array = awkward1.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8, 9.9]], check_valid=True)
Expand Down Expand Up @@ -203,8 +203,8 @@ def test_emptyarray():

assert awkward1.to_list(listoffsetarray[awkward1.Array([[], [], [], []], check_valid=True)]) == [[], [], [], []]
assert awkward1.to_list(listoffsetarray[awkward1.Array([[], [None], [], []], check_valid=True)]) == [[], [None], [], []]
assert awkward1.to_list(listoffsetarray[awkward1.Array([[], [], None, [], []], check_valid=True)]) == [[], [], None, [], []]
assert awkward1.to_list(listoffsetarray[awkward1.Array([[], [None], None, [], []], check_valid=True)]) == [[], [None], None, [], []]
assert awkward1.to_list(listoffsetarray[awkward1.Array([[], [], None, []], check_valid=True)]) == [[], [], None, []]
assert awkward1.to_list(listoffsetarray[awkward1.Array([[], [None], None, []], check_valid=True)]) == [[], [None], None, []]

with pytest.raises(ValueError):
listoffsetarray[awkward1.Array([[], [0], [], []], check_valid=True)]
Expand All @@ -218,8 +218,7 @@ def test_record():
array = awkward1.Array([{"x": [0, 1, 2], "y": [0.0, 1.1, 2.2, 3.3]}, {"x": [3, 4, 5, 6], "y": [4.4, 5.5]}, {"x": [7, 8], "y": [6.6, 7.7, 8.8, 9.9]}], check_valid=True)
assert awkward1.to_list(array[awkward1.Array([[-1, 1], [0, 0, 1], [-1, -2]], check_valid=True)]) == [{"x": [2, 1], "y": [3.3, 1.1]}, {"x": [3, 3, 4], "y": [4.4, 4.4, 5.5]}, {"x": [8, 7], "y": [9.9, 8.8]}]
assert awkward1.to_list(array[awkward1.Array([[-1, 1], [0, 0, None, 1], [-1, -2]], check_valid=True)]) == [{"x": [2, 1], "y": [3.3, 1.1]}, {"x": [3, 3, None, 4], "y": [4.4, 4.4, None, 5.5]}, {"x": [8, 7], "y": [9.9, 8.8]}]
assert awkward1.to_list(array[awkward1.Array([[-1, 1], None, [0, 0, 1], [-1, -2]], check_valid=True)]) == [{"x": [2, 1], "y": [3.3, 1.1]}, None, {"x": [3, 3, 4], "y": [4.4, 4.4, 5.5]}, {"x": [8, 7], "y": [9.9, 8.8]}]
assert awkward1.to_list(array[awkward1.Array([[-1, 1], None, [0, 0, None, 1], [-1, -2]], check_valid=True)]) == [{"x": [2, 1], "y": [3.3, 1.1]}, None, {"x": [3, 3, None, 4], "y": [4.4, 4.4, None, 5.5]}, {"x": [8, 7], "y": [9.9, 8.8]}]
assert awkward1.to_list(array[awkward1.Array([[-1, 1], None, [-1, -2]], check_valid=True)]) == [{"x": [2, 1], "y": [3.3, 1.1]}, None, {"x": [8, 7], "y": [9.9, 8.8]}]

def test_indexedarray():
array = awkward1.from_iter([[0.0, 1.1, 2.2], [3.3, 4.4], [5.5], [6.6, 7.7, 8.8, 9.9]], highlevel=False)
Expand All @@ -229,17 +228,17 @@ def test_indexedarray():

assert awkward1.to_list(indexedarray[awkward1.Array([[0, -1], [0], [], [1, 1]], check_valid=True)]) == [[6.6, 9.9], [5.5], [], [1.1, 1.1]]
assert awkward1.to_list(indexedarray[awkward1.Array([[0, -1], [0], [None], [1, None, 1]], check_valid=True)]) == [[6.6, 9.9], [5.5], [None], [1.1, None, 1.1]]
assert awkward1.to_list(indexedarray[awkward1.Array([[0, -1], [0], None, [], [1, 1]], check_valid=True)]) == [[6.6, 9.9], [5.5], None, [], [1.1, 1.1]]
assert awkward1.to_list(indexedarray[awkward1.Array([[0, -1], [0], None, [None], [1, None, 1]], check_valid=True)]) == [[6.6, 9.9], [5.5], None, [None], [1.1, None, 1.1]]
assert awkward1.to_list(indexedarray[awkward1.Array([[0, -1], [0], None, [1, 1]], check_valid=True)]) == [[6.6, 9.9], [5.5], None, [1.1, 1.1]]
assert awkward1.to_list(indexedarray[awkward1.Array([[0, -1], [0], None, [None]], check_valid=True)]) == [[6.6, 9.9], [5.5], None, [None]]

index = awkward1.layout.Index64(numpy.array([3, 2, 1, 0], dtype=numpy.int64))
indexedarray = awkward1.layout.IndexedOptionArray64(index, array)
assert awkward1.to_list(indexedarray) == [[6.6, 7.7, 8.8, 9.9], [5.5], [3.3, 4.4], [0.0, 1.1, 2.2]]

assert awkward1.to_list(indexedarray[awkward1.Array([[0, -1], [0], [], [1, 1]], check_valid=True)]) == [[6.6, 9.9], [5.5], [], [1.1, 1.1]]
assert awkward1.to_list(indexedarray[awkward1.Array([[0, -1], [0], [None], [1, None, 1]], check_valid=True)]) == [[6.6, 9.9], [5.5], [None], [1.1, None, 1.1]]
assert awkward1.to_list(indexedarray[awkward1.Array([[0, -1], [0], None, [], [1, 1]], check_valid=True)]) == [[6.6, 9.9], [5.5], None, [], [1.1, 1.1]]
assert awkward1.to_list(indexedarray[awkward1.Array([[0, -1], [0], None, [None], [1, None, 1]], check_valid=True)]) == [[6.6, 9.9], [5.5], None, [None], [1.1, None, 1.1]]
assert awkward1.to_list(indexedarray[awkward1.Array([[0, -1], [0], None, []], check_valid=True)]) == [[6.6, 9.9], [5.5], None, []]
assert awkward1.to_list(indexedarray[awkward1.Array([[0, -1], [0], None, [1, None, 1]], check_valid=True)]) == [[6.6, 9.9], [5.5], None, [1.1, None, 1.1]]

def test_indexedarray2():
array = awkward1.from_iter([[0.0, 1.1, 2.2], [3.3, 4.4], [5.5], [6.6, 7.7, 8.8, 9.9]], highlevel=False)
Expand Down Expand Up @@ -283,8 +282,9 @@ def test_indexedarray3():

assert (awkward1.to_list(array[awkward1.Array([[2, 1, 1, 0], [1], None, [0]])])) == [[2.2, 1.1, 1.1, 0.0], [4.4], None, [5.5]]

assert awkward1.to_list(array[awkward1.Array([[2, 1, 1, 0], None, [1], [0]])]) == [[2.2, 1.1, 1.1, 0], None, None, [5.5]]
with pytest.raises(ValueError):
array[awkward1.Array([[2, 1, 1, 0], None, [1], [0]])]
array[awkward1.Array([[2, 1, 1, 0], None, [1], [0], None])]

def test_sequential():
array = awkward1.Array(numpy.arange(2*3*5).reshape(2, 3, 5).tolist(), check_valid=True)
Expand Down
58 changes: 58 additions & 0 deletions tests/test_0315-integerindex.py
@@ -0,0 +1,58 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/master/LICENSE

from __future__ import absolute_import

import sys

import pytest

import awkward1

def test_integerindex_null():
a = awkward1.Array([[0, 1, 2], None, [5, 6], [7]])
b = awkward1.Array([[0, 1, 2], [3, 4], [5, 6], [7]])
c = awkward1.Array([[1], [1], [0], [0]])
d = awkward1.Array([[1], None, [0], [0]])
e = awkward1.Array([[1], None, None, [0]])

assert awkward1.to_list(a[c]) == [[1], None, [5], [7]]
assert awkward1.to_list(a[d]) == [[1], None, [5], [7]]
assert awkward1.to_list(a[e]) == [[1], None, None, [7]]
assert awkward1.to_list(b[c]) == [[1], [4], [5], [7]]
assert awkward1.to_list(b[d]) == [[1], None, [5], [7]]
assert awkward1.to_list(b[e]) == [[1], None, None, [7]]

def test_boolindex_null():
a = awkward1.Array([[0, 1, 2], None, [5, 6]])
b = awkward1.Array([[0, 1, 2], [3, 4], [5, 6]])
c = awkward1.Array([[False, True, False], [False, True], [True, False]])
d = awkward1.Array([[False, True, False], None, [True, False]])
e = awkward1.Array([[False, True, False], None, None])

assert awkward1.to_list(a[c]) == [[1], None, [5]]
assert awkward1.to_list(a[d]) == [[1], None, [5]]
assert awkward1.to_list(a[e]) == [[1], None, None]
assert awkward1.to_list(b[c]) == [[1], [4], [5]]
assert awkward1.to_list(b[d]) == [[1], None, [5]]
assert awkward1.to_list(b[e]) == [[1], None, None]

b2 = b.mask[[True, False, True]]
assert awkward1.to_list(b2[c]) == [[1], None, [5]]
assert awkward1.to_list(b2[d]) == [[1], None, [5]]
assert awkward1.to_list(b2[e]) == [[1], None, None]

def test_integerindex_null_more():
f = awkward1.Array([[0, None, 2], None, [3, 4], []])
g1 = awkward1.Array([[1, 2, None], None, [], [None]])
g2 = awkward1.Array([[], None, None, []])
g3 = awkward1.Array([[], [], [], []])

assert awkward1.to_list(f[g1]) == [[None, 2, None], None, [], [None]]
assert awkward1.to_list(f[g2]) == [[], None, None, []]
assert awkward1.to_list(f[g3]) == [[], None, [], []]

def test_silly_stuff():
a = awkward1.Array([[0, 1, 2], 3])
b = [[2], [0]]
with pytest.raises(ValueError):
a[b]