Skip to content

Commit

Permalink
Merge branch 'master' into feature/PR045-flatten-operation
Browse files Browse the repository at this point in the history
  • Loading branch information
jpivarski committed Jan 8, 2020
2 parents e76eaa6 + d1fe5ad commit 5dde95c
Show file tree
Hide file tree
Showing 17 changed files with 1,087 additions and 5 deletions.
4 changes: 3 additions & 1 deletion README.md
Expand Up @@ -152,7 +152,9 @@ Completed items are ☑check-marked. See [closed PRs](https://github.com/scikit-
* [ ] `ByteMaskedArray`: for nullable data with a byte mask (for NumPy).
* [ ] `BitMaskedArray`: for nullable data with a bit mask (for Arrow).
* [ ] `UnmaskedArray`: for optional type without actually having a mask.
* [ ] `IndexedArray`: same as the old version `IndexedMaskedArray`, has option type.
* [X] `IndexedArray` and `IndexedOptionArray`: the old `IndexedArray` and `IndexedMaskedArray`; the latter has option type.
* [ ] Implement `Identities` for `IndexedArray`.
* [ ] Implement Numba lowering for `IndexedArray`.
* [ ] `UnionArray`: same as the old version; `SparseUnionArray`: the additional case found in Apache Arrow.
* [ ] `RedirectArray`: an explicit weak-reference to another part of the structure (no hard-linked cycles). Often used with an `IndexedArray`.
* [ ] `SlicedArray`: lazy-slicing (from old `Table`) that can be applied to any type.
Expand Down
5 changes: 5 additions & 0 deletions include/awkward/Index.h
Expand Up @@ -10,9 +10,13 @@
#include "awkward/util.h"

namespace awkward {
template <typename T>
class IndexOf;

class Index {
virtual const std::shared_ptr<Index> shallow_copy() const = 0;
virtual const std::shared_ptr<Index> deep_copy() const = 0;
virtual IndexOf<int64_t> to64() const = 0;
};

template <typename T>
Expand All @@ -35,6 +39,7 @@ namespace awkward {
IndexOf<T> getitem_range_nowrap(int64_t start, int64_t stop) const;
const std::shared_ptr<Index> shallow_copy() const override;
const std::shared_ptr<Index> deep_copy() const override;
IndexOf<int64_t> to64() const override;

private:
const std::shared_ptr<T> ptr_;
Expand Down
72 changes: 72 additions & 0 deletions include/awkward/array/IndexedArray.h
@@ -0,0 +1,72 @@
// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE

#ifndef AWKWARD_INDEXEDARRAY_H_
#define AWKWARD_INDEXEDARRAY_H_

#include <cassert>
#include <string>
#include <memory>
#include <vector>

#include "awkward/cpu-kernels/util.h"
#include "awkward/Slice.h"
#include "awkward/Index.h"
#include "awkward/Content.h"

namespace awkward {
template <typename T, bool ISOPTION>
class IndexedArrayOf: public Content {
public:
IndexedArrayOf<T, ISOPTION>(const std::shared_ptr<Identities>& identities, const util::Parameters& parameters, const IndexOf<T>& index, const std::shared_ptr<Content>& content);
const IndexOf<T> index() const;
const std::shared_ptr<Content> content() const;
bool isoption() const;

const std::string classname() const override;
void setidentities() override;
void setidentities(const std::shared_ptr<Identities>& identities) override;
const std::shared_ptr<Type> type() const override;
const std::shared_ptr<Content> astype(const std::shared_ptr<Type>& type) const override;
const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override;
void tojson_part(ToJson& builder) const override;
int64_t length() const override;
const std::shared_ptr<Content> shallow_copy() const override;
void check_for_iteration() const override;
const std::shared_ptr<Content> getitem_nothing() const override;
const std::shared_ptr<Content> getitem_at(int64_t at) const override;
const std::shared_ptr<Content> getitem_at_nowrap(int64_t at) const override;
const std::shared_ptr<Content> getitem_range(int64_t start, int64_t stop) const override;
const std::shared_ptr<Content> getitem_range_nowrap(int64_t start, int64_t stop) const override;
const std::shared_ptr<Content> getitem_field(const std::string& key) const override;
const std::shared_ptr<Content> getitem_fields(const std::vector<std::string>& keys) const override;
const std::shared_ptr<Content> getitem_next(const std::shared_ptr<SliceItem>& head, const Slice& tail, const Index64& advanced) const override;
const std::shared_ptr<Content> carry(const Index64& carry) const override;
const std::pair<int64_t, int64_t> minmax_depth() const override;
int64_t numfields() const override;
int64_t fieldindex(const std::string& key) const override;
const std::string key(int64_t fieldindex) const override;
bool haskey(const std::string& key) const override;
const std::vector<std::string> keys() const override;

// operations
const std::shared_ptr<Content> flatten(int64_t axis) const override;

protected:
const std::shared_ptr<Content> getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const override;
const std::shared_ptr<Content> getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const override;
const std::shared_ptr<Content> getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const override;

private:
const IndexOf<T> index_;
const std::shared_ptr<Content> content_;
bool isoption_;
};

typedef IndexedArrayOf<int32_t, false> IndexedArray32;
typedef IndexedArrayOf<uint32_t, false> IndexedArrayU32;
typedef IndexedArrayOf<int64_t, false> IndexedArray64;
typedef IndexedArrayOf<int32_t, true> IndexedOptionArray32;
typedef IndexedArrayOf<int64_t, true> IndexedOptionArray64;
}

#endif // AWKWARD_INDEXEDARRAY_H_
60 changes: 60 additions & 0 deletions include/awkward/array/None.h
@@ -0,0 +1,60 @@
// BSD 3-Clause License; see https://github.com/jpivarski/awkward-1.0/blob/master/LICENSE

#ifndef AWKWARD_NONE_H_
#define AWKWARD_NONE_H_

#include <cassert>
#include <string>
#include <memory>
#include <vector>

#include "awkward/cpu-kernels/util.h"
#include "awkward/Slice.h"
#include "awkward/Content.h"

namespace awkward {
class None: public Content {
public:
None();

bool isscalar() const override;
const std::string classname() const override;
void setidentities() override;
void setidentities(const std::shared_ptr<Identities>& identities) override;
const std::shared_ptr<Type> type() const override;
const std::shared_ptr<Content> astype(const std::shared_ptr<Type>& type) const override;
const std::string tostring_part(const std::string& indent, const std::string& pre, const std::string& post) const override;
void tojson_part(ToJson& builder) const override;
int64_t length() const override;
const std::shared_ptr<Content> shallow_copy() const override;
void check_for_iteration() const override;
const std::shared_ptr<Content> getitem_nothing() const override;
const std::shared_ptr<Content> getitem_at(int64_t at) const override;
const std::shared_ptr<Content> getitem_at_nowrap(int64_t at) const override;
const std::shared_ptr<Content> getitem_range(int64_t start, int64_t stop) const override;
const std::shared_ptr<Content> getitem_range_nowrap(int64_t start, int64_t stop) const override;
const std::shared_ptr<Content> getitem_field(const std::string& key) const override;
const std::shared_ptr<Content> getitem_fields(const std::vector<std::string>& keys) const override;
const std::shared_ptr<Content> carry(const Index64& carry) const override;
const std::pair<int64_t, int64_t> minmax_depth() const override;
int64_t numfields() const override;
int64_t fieldindex(const std::string& key) const override;
const std::string key(int64_t fieldindex) const override;
bool haskey(const std::string& key) const override;
const std::vector<std::string> keys() const override;

// operations
const std::shared_ptr<Content> flatten(int64_t axis) const override;

protected:
const std::shared_ptr<Content> getitem_next(const SliceAt& at, const Slice& tail, const Index64& advanced) const override;
const std::shared_ptr<Content> getitem_next(const SliceRange& range, const Slice& tail, const Index64& advanced) const override;
const std::shared_ptr<Content> getitem_next(const SliceArray64& array, const Slice& tail, const Index64& advanced) const override;
const std::shared_ptr<Content> getitem_next(const SliceField& field, const Slice& tail, const Index64& advanced) const override;
const std::shared_ptr<Content> getitem_next(const SliceFields& fields, const Slice& tail, const Index64& advanced) const override;
};

extern const std::shared_ptr<Content> none;
}

#endif // AWKWARD_NONE_H_
19 changes: 19 additions & 0 deletions include/awkward/cpu-kernels/getitem.h
Expand Up @@ -9,6 +9,11 @@ extern "C" {
EXPORT_SYMBOL void awkward_regularize_rangeslice(int64_t* start, int64_t* stop, bool posstep, bool hasstart, bool hasstop, int64_t length);
EXPORT_SYMBOL struct Error awkward_regularize_arrayslice_64(int64_t* flatheadptr, int64_t lenflathead, int64_t length);

EXPORT_SYMBOL struct Error awkward_index8_to_index64(int64_t* toptr, const int8_t* fromptr, int64_t length);
EXPORT_SYMBOL struct Error awkward_indexU8_to_index64(int64_t* toptr, const uint8_t* fromptr, int64_t length);
EXPORT_SYMBOL struct Error awkward_index32_to_index64(int64_t* toptr, const int32_t* fromptr, int64_t length);
EXPORT_SYMBOL struct Error awkward_indexU32_to_index64(int64_t* toptr, const uint32_t* fromptr, int64_t length);

EXPORT_SYMBOL struct Error awkward_slicearray_ravel_64(int64_t* toptr, const int64_t* fromptr, int64_t ndim, const int64_t* shape, const int64_t* strides);

EXPORT_SYMBOL struct Error awkward_carry_arange_64(int64_t* toptr, int64_t length);
Expand Down Expand Up @@ -66,6 +71,20 @@ extern "C" {
EXPORT_SYMBOL struct Error awkward_regulararray_getitem_next_array_64(int64_t* tocarry, int64_t* toadvanced, const int64_t* fromarray, int64_t len, int64_t lenarray, int64_t size);
EXPORT_SYMBOL struct Error awkward_regulararray_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const int64_t* fromadvanced, const int64_t* fromarray, int64_t len, int64_t lenarray, int64_t size);
EXPORT_SYMBOL struct Error awkward_regulararray_getitem_carry_64(int64_t* tocarry, const int64_t* fromcarry, int64_t lencarry, int64_t size);

EXPORT_SYMBOL struct Error awkward_indexedarray32_numnull(int64_t* numnull, const int32_t* fromindex, int64_t indexoffset, int64_t lenindex);
EXPORT_SYMBOL struct Error awkward_indexedarray64_numnull(int64_t* numnull, const int64_t* fromindex, int64_t indexoffset, int64_t lenindex);

EXPORT_SYMBOL struct Error awkward_indexedarray32_getitem_nextcarry_outindex_64(int64_t* tocarry, int32_t* toindex, const int32_t* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent);
EXPORT_SYMBOL struct Error awkward_indexedarray64_getitem_nextcarry_outindex_64(int64_t* tocarry, int64_t* toindex, const int64_t* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent);

EXPORT_SYMBOL struct Error awkward_indexedarray32_getitem_nextcarry_64(int64_t* tocarry, const int32_t* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent);
EXPORT_SYMBOL struct Error awkward_indexedarrayU32_getitem_nextcarry_64(int64_t* tocarry, const uint32_t* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent);
EXPORT_SYMBOL struct Error awkward_indexedarray64_getitem_nextcarry_64(int64_t* tocarry, const int64_t* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent);

EXPORT_SYMBOL struct Error awkward_indexedarray32_getitem_carry_64(int32_t* toindex, const int32_t* fromindex, const int64_t* fromcarry, int64_t indexoffset, int64_t lenindex, int64_t lencarry);
EXPORT_SYMBOL struct Error awkward_indexedarrayU32_getitem_carry_64(uint32_t* toindex, const uint32_t* fromindex, const int64_t* fromcarry, int64_t indexoffset, int64_t lenindex, int64_t lencarry);
EXPORT_SYMBOL struct Error awkward_indexedarray64_getitem_carry_64(int64_t* toindex, const int64_t* fromindex, const int64_t* fromcarry, int64_t indexoffset, int64_t lenindex, int64_t lencarry);
}

#endif // AWKWARDCPU_GETITEM_H_
8 changes: 8 additions & 0 deletions include/awkward/util.h
Expand Up @@ -64,6 +64,14 @@ namespace awkward {
ERROR awkward_listarray_getitem_next_array_advanced_64(int64_t* tocarry, int64_t* toadvanced, const T* fromstarts, const T* fromstops, const int64_t* fromarray, const int64_t* fromadvanced, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lenarray, int64_t lencontent);
template <typename T>
ERROR awkward_listarray_getitem_carry_64(T* tostarts, T* tostops, const T* fromstarts, const T* fromstops, const int64_t* fromcarry, int64_t startsoffset, int64_t stopsoffset, int64_t lenstarts, int64_t lencarry);
template <typename T>
ERROR awkward_indexedarray_numnull(int64_t* numnull, const T* fromindex, int64_t indexoffset, int64_t lenindex);
template <typename T>
ERROR awkward_indexedarray_getitem_nextcarry_outindex_64(int64_t* tocarry, T* toindex, const T* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent);
template <typename T>
ERROR awkward_indexedarray_getitem_nextcarry_64(int64_t* tocarry, const T* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent);
template <typename T>
ERROR awkward_indexedarray_getitem_carry_64(T* toindex, const T* fromindex, const int64_t* fromcarry, int64_t indexoffset, int64_t lenindex, int64_t lencarry);

}
}
Expand Down
113 changes: 113 additions & 0 deletions src/cpu-kernels/getitem.cpp
Expand Up @@ -49,6 +49,31 @@ ERROR awkward_regularize_arrayslice_64(int64_t* flatheadptr, int64_t lenflathead
return awkward_regularize_arrayslice<int64_t>(flatheadptr, lenflathead, length);
}

ERROR awkward_index8_to_index64(int64_t* toptr, const int8_t* fromptr, int64_t length) {
for (int64_t i = 0; i < length; i++) {
toptr[i]= (int64_t)fromptr[i];
}
return success();
}
ERROR awkward_indexU8_to_index64(int64_t* toptr, const uint8_t* fromptr, int64_t length) {
for (int64_t i = 0; i < length; i++) {
toptr[i]= (int64_t)fromptr[i];
}
return success();
}
ERROR awkward_index32_to_index64(int64_t* toptr, const int32_t* fromptr, int64_t length) {
for (int64_t i = 0; i < length; i++) {
toptr[i]= (int64_t)fromptr[i];
}
return success();
}
ERROR awkward_indexU32_to_index64(int64_t* toptr, const uint32_t* fromptr, int64_t length) {
for (int64_t i = 0; i < length; i++) {
toptr[i]= (int64_t)fromptr[i];
}
return success();
}

template <typename T>
ERROR awkward_slicearray_ravel(T* toptr, const T* fromptr, int64_t ndim, const int64_t* shape, const int64_t* strides) {
if (ndim == 1) {
Expand Down Expand Up @@ -531,3 +556,91 @@ ERROR awkward_regulararray_getitem_carry(T* tocarry, const T* fromcarry, int64_t
ERROR awkward_regulararray_getitem_carry_64(int64_t* tocarry, const int64_t* fromcarry, int64_t lencarry, int64_t size) {
return awkward_regulararray_getitem_carry<int64_t>(tocarry, fromcarry, lencarry, size);
}

template <typename C>
ERROR awkward_indexedarray_numnull(int64_t* numnull, const C* fromindex, int64_t indexoffset, int64_t lenindex) {
*numnull = 0;
for (int64_t i = 0; i < lenindex; i++) {
if (fromindex[indexoffset + i] < 0) {
*numnull = *numnull + 1;
}
}
return success();
}
ERROR awkward_indexedarray32_numnull(int64_t* numnull, const int32_t* fromindex, int64_t indexoffset, int64_t lenindex) {
return awkward_indexedarray_numnull<int32_t>(numnull, fromindex, indexoffset, lenindex);
}
ERROR awkward_indexedarray64_numnull(int64_t* numnull, const int64_t* fromindex, int64_t indexoffset, int64_t lenindex) {
return awkward_indexedarray_numnull<int64_t>(numnull, fromindex, indexoffset, lenindex);
}

template <typename C, typename T>
ERROR awkward_indexedarray_getitem_nextcarry_outindex(T* tocarry, C* toindex, const C* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent) {
int64_t k = 0;
for (int64_t i = 0; i < lenindex; i++) {
C j = fromindex[indexoffset + i];
if (j >= lencontent) {
return failure("IndexedOptionArray index out of range", i, j);
}
else if (j < 0) {
toindex[i] = -1;
}
else {
tocarry[k] = j;
toindex[i] = (C)k;
k++;
}
}
return success();
}
ERROR awkward_indexedarray32_getitem_nextcarry_outindex_64(int64_t* tocarry, int32_t* toindex, const int32_t* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent) {
return awkward_indexedarray_getitem_nextcarry_outindex<int32_t, int64_t>(tocarry, toindex, fromindex, indexoffset, lenindex, lencontent);
}
ERROR awkward_indexedarray64_getitem_nextcarry_outindex_64(int64_t* tocarry, int64_t* toindex, const int64_t* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent) {
return awkward_indexedarray_getitem_nextcarry_outindex<int64_t, int64_t>(tocarry, toindex, fromindex, indexoffset, lenindex, lencontent);
}

template <typename C, typename T>
ERROR awkward_indexedarray_getitem_nextcarry(T* tocarry, const C* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent) {
int64_t k = 0;
for (int64_t i = 0; i < lenindex; i++) {
C j = fromindex[indexoffset + i];
if (j < 0 || j >= lencontent) {
return failure("IndexedArray index out of range", i, j);
}
else {
tocarry[k] = j;
k++;
}
}
return success();
}
ERROR awkward_indexedarray32_getitem_nextcarry_64(int64_t* tocarry, const int32_t* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent) {
return awkward_indexedarray_getitem_nextcarry<int32_t, int64_t>(tocarry, fromindex, indexoffset, lenindex, lencontent);
}
ERROR awkward_indexedarrayU32_getitem_nextcarry_64(int64_t* tocarry, const uint32_t* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent) {
return awkward_indexedarray_getitem_nextcarry<uint32_t, int64_t>(tocarry, fromindex, indexoffset, lenindex, lencontent);
}
ERROR awkward_indexedarray64_getitem_nextcarry_64(int64_t* tocarry, const int64_t* fromindex, int64_t indexoffset, int64_t lenindex, int64_t lencontent) {
return awkward_indexedarray_getitem_nextcarry<int64_t, int64_t>(tocarry, fromindex, indexoffset, lenindex, lencontent);
}

template <typename C, typename T>
ERROR awkward_indexedarray_getitem_carry(C* toindex, const C* fromindex, const T* fromcarry, int64_t indexoffset, int64_t lenindex, int64_t lencarry) {
for (int64_t i = 0; i < lencarry; i++) {
if (fromcarry[i] >= lenindex) {
return failure("index out of range", i, fromcarry[i]);
}
toindex[i] = (C)(fromindex[indexoffset + fromcarry[i]]);
}
return success();
}
ERROR awkward_indexedarray32_getitem_carry_64(int32_t* toindex, const int32_t* fromindex, const int64_t* fromcarry, int64_t indexoffset, int64_t lenindex, int64_t lencarry) {
return awkward_indexedarray_getitem_carry<int32_t, int64_t>(toindex, fromindex, fromcarry, indexoffset, lenindex, lencarry);
}
ERROR awkward_indexedarrayU32_getitem_carry_64(uint32_t* toindex, const uint32_t* fromindex, const int64_t* fromcarry, int64_t indexoffset, int64_t lenindex, int64_t lencarry) {
return awkward_indexedarray_getitem_carry<uint32_t, int64_t>(toindex, fromindex, fromcarry, indexoffset, lenindex, lencarry);
}
ERROR awkward_indexedarray64_getitem_carry_64(int64_t* toindex, const int64_t* fromindex, const int64_t* fromcarry, int64_t indexoffset, int64_t lenindex, int64_t lencarry) {
return awkward_indexedarray_getitem_carry<int64_t, int64_t>(toindex, fromindex, fromcarry, indexoffset, lenindex, lencarry);
}

0 comments on commit 5dde95c

Please sign in to comment.