Skip to content

Commit

Permalink
Start flatten implementation and add tests (#45)
Browse files Browse the repository at this point in the history
Implements `flatten` for all `Content` types that existed at the start of this PR (not `IndexedArray`) for `axis=0`.

* start flatten implementation and add tests

* [skip ci] correct EmptyArray

* [skip ci] correct Record

* [skip ci] correct RegularArray

* [skip ci] correct RawArray

* Example of an extreme (but legal) ListArray.

* Keep version number ahead of latest release.

* [skip ci] operations in cpu kernels

* [skip ci] cleanup debugging messages

* [skip ci] restructure functions

* use offsets in calculations

* [skip ci] address Jim's comments to fix ListArray and add more tests

* flatten non contiguous NumpyArray

Co-authored-by: Jim Pivarski <jpivarski@users.noreply.github.com>
  • Loading branch information
ianna and jpivarski committed Jan 13, 2020
1 parent 9bc9340 commit 8a71e52
Show file tree
Hide file tree
Showing 12 changed files with 298 additions and 28 deletions.
2 changes: 1 addition & 1 deletion include/awkward/array/RawArray.h
Expand Up @@ -377,7 +377,7 @@ namespace awkward {

// operations
const std::shared_ptr<Content> flatten(int64_t axis) const override {
throw std::runtime_error("FIXME: not implemented");
throw std::invalid_argument("RawArray is strictly one-dimentional");
}

protected:
Expand Down
17 changes: 17 additions & 0 deletions include/awkward/cpu-kernels/operations.h
@@ -0,0 +1,17 @@
// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE

#ifndef AWKWARDCPU_OPERATIONS_H_
#define AWKWARDCPU_OPERATIONS_H_

#include "awkward/cpu-kernels/util.h"

extern "C" {
EXPORT_SYMBOL struct Error awkward_listarray32_flatten_length_64(int64_t* tolen, const int32_t* fromstarts, const int32_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset);
EXPORT_SYMBOL struct Error awkward_listarrayU32_flatten_length_64(int64_t* tolen, const uint32_t* fromstarts, const uint32_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset);
EXPORT_SYMBOL struct Error awkward_listarray64_flatten_length_64(int64_t* tolen, const int64_t* fromstarts, const int64_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset);
EXPORT_SYMBOL struct Error awkward_listarray32_flatten_64(int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset);
EXPORT_SYMBOL struct Error awkward_listarrayU32_flatten_64(int64_t* tocarry, const uint32_t* fromstarts, const uint32_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset);
EXPORT_SYMBOL struct Error awkward_listarray64_flatten_64(int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset);
}

#endif // AWKWARDCPU_GETITEM_H_
4 changes: 4 additions & 0 deletions include/awkward/util.h
Expand Up @@ -65,6 +65,10 @@ namespace awkward {
template <typename T>
ERROR awkward_listarray_getitem_carry_64(T* tostarts, T* tostops, const T* fromstarts, const T* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lencarry);
template <typename T>
ERROR awkward_listarray_flatten_length_64(int64_t* tolen, const T* fromstarts, const T* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset);
template <typename T>
ERROR awkward_listarray_flatten_64(int64_t* tocarry, const T* fromstarts, const T* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset);
template <typename T>
ERROR awkward_indexedarray_numnull(int64_t* numnull, const T* fromindex, int64_t indexoffset, int64_t lenindex);
template <typename T>
ERROR awkward_indexedarray_getitem_nextcarry_outindex_64(int64_t* tocarry, T* toindex, const T* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent);
Expand Down
57 changes: 57 additions & 0 deletions src/cpu-kernels/operations.cpp
@@ -0,0 +1,57 @@
// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE

#include <cstring>

#include "awkward/cpu-kernels/operations.h"
template <typename C>
ERROR awkward_listarray_flatten_length(int64_t* tolen, const C* fromstarts, const C* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
*tolen = 0;
for (int64_t i = 0; i < lenstarts; i++) {
int64_t start = (C)fromstarts[startsoffset + i];
int64_t stop = (C)fromstops[stopsoffset + i];
if (start < 0 || stop < 0) {
return failure("all start and stop values must be non-negative", kSliceNone, i);
}
int64_t length = stop - start;
*tolen += length;
}
return success();
}
ERROR awkward_listarray32_flatten_length_64(int64_t* tolen, const int32_t* fromstarts, const int32_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
return awkward_listarray_flatten_length<int32_t>(tolen, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset);
}
ERROR awkward_listarrayU32_flatten_length_64(int64_t* tolen, const uint32_t* fromstarts, const uint32_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
return awkward_listarray_flatten_length<uint32_t>(tolen, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset);
}
ERROR awkward_listarray64_flatten_length_64(int64_t* tolen, const int64_t* fromstarts, const int64_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
return awkward_listarray_flatten_length<int64_t>(tolen, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset);
}

template <typename C, typename T>
ERROR awkward_listarray_flatten(T* tocarry, const C* fromstarts, const C* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
int64_t at = 0;
for (int64_t i = 0; i < lenstarts; i++) {
int64_t start = (C)fromstarts[startsoffset + i];
int64_t stop = (C)fromstops[stopsoffset + i];
if (start < 0 || stop < 0) {
return failure("all start and stop values must be non-negative", kSliceNone, i);
}
int64_t length = stop - start;
if (length > 0) {
for(int64_t l = 0; l < length; l++) {
tocarry[at] = start + l;
++at;
}
}
}
return success();
}
ERROR awkward_listarray32_flatten_64(int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
return awkward_listarray_flatten<int32_t, int64_t>(tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset);
}
ERROR awkward_listarrayU32_flatten_64(int64_t* tocarry, const uint32_t* fromstarts, const uint32_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
return awkward_listarray_flatten<uint32_t, int64_t>(tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset);
}
ERROR awkward_listarray64_flatten_64(int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
return awkward_listarray_flatten<int64_t, int64_t>(tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset);
}
2 changes: 1 addition & 1 deletion src/libawkward/array/EmptyArray.cpp
Expand Up @@ -125,7 +125,7 @@ namespace awkward {
}

const std::shared_ptr<Content> EmptyArray::flatten(int64_t axis) const {
throw std::runtime_error("FIXME: not implemented");
return std::make_shared<EmptyArray>(Identities::none(), util::Parameters());
}

const std::shared_ptr<Content> EmptyArray::getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const {
Expand Down
31 changes: 30 additions & 1 deletion src/libawkward/array/ListArray.cpp
Expand Up @@ -5,6 +5,7 @@

#include "awkward/cpu-kernels/identities.h"
#include "awkward/cpu-kernels/getitem.h"
#include "awkward/cpu-kernels/operations.h"
#include "awkward/type/ListType.h"
#include "awkward/type/ArrayType.h"
#include "awkward/type/UnknownType.h"
Expand Down Expand Up @@ -353,7 +354,35 @@ namespace awkward {

template <typename T>
const std::shared_ptr<Content> ListArrayOf<T>::flatten(int64_t axis) const {
throw std::runtime_error("FIXME: not implemented");
if(axis <= -1)
throw std::invalid_argument("axis must be a non-negative integer (can't count from the end)");
int64_t lenstarts = starts_.length();
if (stops_.length() < lenstarts) {
util::handle_error(failure("len(stops) < len(starts)", kSliceNone, kSliceNone), classname(), identities_.get());
}

int64_t lenarray(0);
struct Error err = util::awkward_listarray_flatten_length_64(
&lenarray,
starts_.ptr().get(),
stops_.ptr().get(),
lenstarts,
starts_.offset(),
stops_.offset());
util::handle_error(err, classname(), identities_.get());

Index64 indxarray(lenarray);

struct Error err1 = util::awkward_listarray_flatten_64<T>(
indxarray.ptr().get(),
starts_.ptr().get(),
stops_.ptr().get(),
lenstarts,
starts_.offset(),
stops_.offset());
util::handle_error(err1, classname(), identities_.get());

return content_.get()->carry(indxarray);
}

template <typename T>
Expand Down
47 changes: 26 additions & 21 deletions src/libawkward/array/NumpyArray.cpp
Expand Up @@ -573,8 +573,33 @@ namespace awkward {
throw std::invalid_argument("array contains no Records");
}

const std::vector<ssize_t> flatten_shape(const std::vector<ssize_t> shape) {
if (shape.size() == 1) {
return std::vector<ssize_t>();
}
else {
std::vector<ssize_t> out = { shape[0]*shape[1] };
out.insert(out.end(), shape.begin() + 2, shape.end());
return out;
}
}

const std::vector<ssize_t> flatten_strides(const std::vector<ssize_t> strides) {
if (strides.size() == 1) {
return std::vector<ssize_t>();
}
else {
return std::vector<ssize_t>(strides.begin() + 1, strides.end());
}
}

const std::shared_ptr<Content> NumpyArray::flatten(int64_t axis) const {
throw std::runtime_error("FIXME: not implemented");
if (iscontiguous()) {
return std::make_shared<NumpyArray>(identities_, parameters_, ptr_, flatten_shape(shape_), flatten_strides(strides_), byteoffset_, itemsize_, format_);
}
else {
return contiguous().flatten(axis);
}
}

const std::shared_ptr<Content> NumpyArray::getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const {
Expand All @@ -597,26 +622,6 @@ namespace awkward {
throw std::runtime_error("NumpyArray has its own getitem_next system");
}

const std::vector<ssize_t> flatten_shape(const std::vector<ssize_t> shape) {
if (shape.size() == 1) {
return std::vector<ssize_t>();
}
else {
std::vector<ssize_t> out = { shape[0]*shape[1] };
out.insert(out.end(), shape.begin() + 2, shape.end());
return out;
}
}

const std::vector<ssize_t> flatten_strides(const std::vector<ssize_t> strides) {
if (strides.size() == 1) {
return std::vector<ssize_t>();
}
else {
return std::vector<ssize_t>(strides.begin() + 1, strides.end());
}
}

bool NumpyArray::iscontiguous() const {
ssize_t x = itemsize_;
for (ssize_t i = ndim() - 1; i >= 0; i--) {
Expand Down
2 changes: 1 addition & 1 deletion src/libawkward/array/Record.cpp
Expand Up @@ -198,7 +198,7 @@ namespace awkward {
}

const std::shared_ptr<Content> Record::flatten(int64_t axis) const {
throw std::runtime_error("FIXME: not implemented");
throw std::invalid_argument("Record is not an array");
}

const std::shared_ptr<Content> Record::field(int64_t fieldindex) const {
Expand Down
2 changes: 1 addition & 1 deletion src/libawkward/array/RecordArray.cpp
Expand Up @@ -352,7 +352,7 @@ namespace awkward {
}

const std::shared_ptr<Content> RecordArray::flatten(int64_t axis) const {
throw std::runtime_error("FIXME: not implemented");
throw std::invalid_argument("RecordArray cannot be flattened");
}

const std::shared_ptr<Content> RecordArray::field(int64_t fieldindex) const {
Expand Down
5 changes: 4 additions & 1 deletion src/libawkward/array/RegularArray.cpp
Expand Up @@ -238,7 +238,10 @@ namespace awkward {
}

const std::shared_ptr<Content> RegularArray::flatten(int64_t axis) const {
throw std::runtime_error("FIXME: not implemented");
if(content_.get()->length() % size_ != 0)
return content_.get()->getitem_range_nowrap(0, length()*size_);
else
return content_;
}

const std::shared_ptr<Content> RegularArray::getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const {
Expand Down
27 changes: 26 additions & 1 deletion src/libawkward/util.cpp
Expand Up @@ -8,6 +8,7 @@

#include "awkward/cpu-kernels/identities.h"
#include "awkward/cpu-kernels/getitem.h"
#include "awkward/cpu-kernels/operations.h"

#include "awkward/util.h"
#include "awkward/Identities.h"
Expand Down Expand Up @@ -290,6 +291,31 @@ namespace awkward {
return awkward_listarray64_getitem_carry_64(tostarts, tostops, fromstarts, fromstops, fromcarry, startsoffset, stopsoffset, lenstarts, lencarry);
}

template <>
Error awkward_listarray_flatten_length_64<int32_t>(int64_t* tolen, const int32_t* fromstarts, const int32_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
return awkward_listarray32_flatten_length_64(tolen, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset);
}
template <>
Error awkward_listarray_flatten_length_64<uint32_t>(int64_t* tolen, const uint32_t* fromstarts, const uint32_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
return awkward_listarrayU32_flatten_length_64(tolen, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset);
}
template <>
Error awkward_listarray_flatten_length_64<int64_t>(int64_t* tolen, const int64_t* fromstarts, const int64_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
return awkward_listarray64_flatten_length_64(tolen, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset);
}

template <>
Error awkward_listarray_flatten_64<int32_t>(int64_t* tocarry, const int32_t* fromstarts, const int32_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
return awkward_listarray32_flatten_64(tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset);
}
template <>
Error awkward_listarray_flatten_64<uint32_t>(int64_t* tocarry, const uint32_t* fromstarts, const uint32_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
return awkward_listarrayU32_flatten_64(tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset);
}
template <>
Error awkward_listarray_flatten_64<int64_t>(int64_t* tocarry, const int64_t* fromstarts, const int64_t* fromstops, const int64_t lenstarts, int64_t startsoffset, int64_t stopsoffset) {
return awkward_listarray64_flatten_64(tocarry, fromstarts, fromstops, lenstarts, startsoffset, stopsoffset);
}
template <>
Error awkward_indexedarray_numnull<int32_t>(int64_t* numnull, const int32_t* fromindex, int64_t indexoffset, int64_t lenindex) {
return awkward_indexedarray32_numnull(numnull, fromindex, indexoffset, lenindex);
Expand Down Expand Up @@ -333,6 +359,5 @@ namespace awkward {
Error awkward_indexedarray_getitem_carry_64<int64_t>(int64_t* toindex, const int64_t* fromindex, const int64_t* fromcarry, int64_t indexoffset, int64_t lenindex, int64_t lencarry) {
return awkward_indexedarray64_getitem_carry_64(toindex, fromindex, fromcarry, indexoffset, lenindex, lencarry);
}

}
}

0 comments on commit 8a71e52

Please sign in to comment.