Skip to content
This repository was archived by the owner on Apr 10, 2024. It is now read-only.

[pandas 2.0] Prototype array view interface, buffer copying #40

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ if (UNIX)
# Full lint
add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py
--verbose=4
--filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/include_order
--filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/include_order,-build/c++11
`find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h`)
endif (UNIX)

Expand Down Expand Up @@ -611,12 +611,12 @@ set(PANDAS_SRCS
src/pandas/numpy_interop.cc
src/pandas/pytypes.cc
src/pandas/status.cc
src/pandas/types.cc
src/pandas/type.cc

src/pandas/types/boolean.cc
src/pandas/types/common.cc
src/pandas/types/category.cc
src/pandas/types/integer.cc
src/pandas/types/floating.cc
src/pandas/types/numeric.cc
)

add_library(pandas SHARED
Expand Down
6 changes: 3 additions & 3 deletions scripts/merge-py.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from six.moves import input

PANDAS_HOME = '.'
PROJECT_NAME = 'pandas'
PROJECT_NAME = 'pandas2'
print("PANDAS_HOME = " + PANDAS_HOME)

# Remote name with the PR
Expand All @@ -46,8 +46,8 @@
# Remote name where results pushed
PUSH_REMOTE_NAME = os.environ.get("PUSH_REMOTE_NAME", "upstream")

GITHUB_BASE = "https://github.com/pydata/" + PROJECT_NAME + "/pull"
GITHUB_API_BASE = "https://api.github.com/repos/pydata/" + PROJECT_NAME
GITHUB_BASE = "https://github.com/pandas-dev/" + PROJECT_NAME + "/pull"
GITHUB_API_BASE = "https://api.github.com/repos/pandas-dev/" + PROJECT_NAME

# Prefix added to temporary branches
BRANCH_PREFIX = "PR_TOOL"
Expand Down
1 change: 1 addition & 0 deletions src/pandas/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ install(FILES

set(PANDAS_TEST_LINK_LIBS pandas_test_util ${PANDAS_MIN_TEST_LIBS})

ADD_PANDAS_TEST(array-test)
ADD_PANDAS_TEST(buffer-test)
ADD_PANDAS_TEST(memory-test)
ADD_PANDAS_TEST(util-test)
8 changes: 3 additions & 5 deletions src/pandas/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,11 @@

#include "pandas/array.h"
#include "pandas/dispatch.h"
#include "pandas/types.h"
#include "pandas/status.h"
#include "pandas/type.h"

#include "pandas/types/boolean.h"
#include "pandas/types/category.h"
#include "pandas/types/floating.h"
#include "pandas/types/integer.h"

#include "pandas/status.h"
#include "pandas/types/numeric.h"

#endif // PANDAS_API_H
150 changes: 150 additions & 0 deletions src/pandas/array-test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// This file is a part of pandas. See LICENSE for details about reuse and
// copyright holders

// Test non-type specific array functionality

#include <cstdint>
#include <limits>
#include <string>

#include "gtest/gtest.h"

#include "pandas/array.h"
#include "pandas/buffer.h"
#include "pandas/memory.h"
#include "pandas/status.h"
#include "pandas/test-util.h"
#include "pandas/type.h"
#include "pandas/types/numeric.h"

using std::string;

namespace pandas {

class TestArray : public ::testing::Test {
public:
void SetUp() {
values_ = {0, 1, 2, 3, 4, 5, 6, 7};

auto buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(values_.data()),
values_.size() * sizeof(double));

array_ = std::make_shared<DoubleArray>(values_.size(), buffer);
}

protected:
std::shared_ptr<Array> array_;
std::vector<double> values_;
};

TEST_F(TestArray, Attrs) {
DoubleType ex_type;
ASSERT_TRUE(array_->type()->Equals(ex_type));
ASSERT_EQ(DataType::DOUBLE, array_->type_id());

ASSERT_EQ(values_.size(), array_->length());
}

// ----------------------------------------------------------------------
// Array view object

class TestArrayView : public ::testing::Test {
public:
using value_t = double;

void SetUp() {
values_ = {0, 1, 2, 3, 4, 5, 6, 7};

auto buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(values_.data()),
values_.size() * sizeof(value_t));

auto arr = std::make_shared<DoubleArray>(values_.size(), buffer);
view_ = ArrayView(arr);
}

protected:
ArrayView view_;
std::vector<value_t> values_;
};

TEST_F(TestArrayView, Ctors) {
ASSERT_EQ(1, view_.ref_count());
ASSERT_EQ(0, view_.offset());
ASSERT_EQ(values_.size(), view_.length());

// Copy ctor
ArrayView view2(view_);
ASSERT_EQ(2, view2.ref_count());
ASSERT_EQ(0, view_.offset());
ASSERT_EQ(values_.size(), view_.length());

// move ctor
ArrayView view3(view_.data(), 3);
ArrayView view4(std::move(view3));
ASSERT_EQ(3, view4.ref_count());
ASSERT_EQ(3, view3.offset());
ASSERT_EQ(values_.size() - 3, view3.length());

// With offset and length
ArrayView view5(view4.data(), 2, 4);
ASSERT_EQ(2, view5.offset());
ASSERT_EQ(4, view5.length());

// Copy assignment
ArrayView view6 = view5;
ASSERT_EQ(5, view4.ref_count());
ASSERT_EQ(2, view5.offset());
ASSERT_EQ(4, view5.length());

// Move assignment
ArrayView view7 = std::move(view6);
ASSERT_EQ(5, view4.ref_count());
ASSERT_EQ(2, view5.offset());
ASSERT_EQ(4, view5.length());
}

TEST_F(TestArrayView, EnsureMutable) {
// This only tests for one data type -- we will need to test more rigorously
// across all data types elsewhere

const Array* ap = view_.data().get();

ASSERT_OK(view_.EnsureMutable());
ASSERT_EQ(ap, view_.data().get());

ArrayView view2 = view_;

ASSERT_OK(view_.EnsureMutable());

// The views now have their own distinct copies of the array
ASSERT_NE(ap, view_.data().get());
ASSERT_EQ(ap, view2.data().get());

ASSERT_EQ(1, view_.ref_count());
ASSERT_EQ(1, view2.ref_count());
}

TEST_F(TestArrayView, Slice) {
ArrayView s1 = view_.Slice(3);
ASSERT_EQ(2, s1.ref_count());
ASSERT_EQ(3, s1.offset());
ASSERT_EQ(view_.length() - 3, s1.length());

ArrayView s2 = view_.Slice(2, 4);
ASSERT_EQ(3, s2.ref_count());
ASSERT_EQ(2, s2.offset());
ASSERT_EQ(4, s2.length());

// Slice of a slice
ArrayView s3 = s1.Slice(2);
ASSERT_EQ(4, s3.ref_count());
ASSERT_EQ(5, s3.offset());
ASSERT_EQ(view_.length() - 5, s3.length());

ArrayView s4 = s1.Slice(1, 2);
ASSERT_EQ(5, s4.ref_count());
ASSERT_EQ(4, s4.offset());
ASSERT_EQ(2, s4.length());
}

} // namespace pandas
91 changes: 91 additions & 0 deletions src/pandas/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,98 @@

#include "pandas/array.h"
#include "pandas/status.h"
#include "pandas/type.h"
#include "pandas/util/logging.h"

namespace pandas {

// ----------------------------------------------------------------------
// Array

Array::Array(const std::shared_ptr<DataType>& type, int64_t length)
: type_(type),
length_(length) {}

Status Array::Copy(std::shared_ptr<Array>* out) const {
return Copy(0, length_, out);
}

// ----------------------------------------------------------------------
// ArrayView

ArrayView::ArrayView(const std::shared_ptr<Array>& data)
: data_(data),
offset_(0),
length_(data->length()) {}

ArrayView::ArrayView(const std::shared_ptr<Array>& data, int64_t offset)
: data_(data),
offset_(offset),
length_(data->length() - offset) {
// Debugging sanity checks
PANDAS_DCHECK_GE(offset, 0);
PANDAS_DCHECK_LT(offset, data->length());
}

ArrayView::ArrayView(const std::shared_ptr<Array>& data, int64_t offset, int64_t length)
: data_(data),
offset_(offset),
length_(length) {
// Debugging sanity checks
PANDAS_DCHECK_GE(offset, 0);
PANDAS_DCHECK_LT(offset, data->length());
PANDAS_DCHECK_GE(length, 0);
PANDAS_DCHECK_LE(length, data->length() - offset);
}

// Copy ctor
ArrayView::ArrayView(const ArrayView& other)
: data_(other.data_),
offset_(other.offset_),
length_(other.length_) {}

// Move ctor
ArrayView::ArrayView(ArrayView&& other)
: data_(std::move(other.data_)),
offset_(other.offset_),
length_(other.length_) {}

// Copy assignment
ArrayView& ArrayView::operator=(const ArrayView& other) {
data_ = other.data_;
offset_ = other.offset_;
length_ = other.length_;
return *this;
}

// Move assignment
ArrayView& ArrayView::operator=(ArrayView&& other) {
data_ = std::move(other.data_);
offset_ = other.offset_;
length_ = other.length_;
return *this;
}

Status ArrayView::EnsureMutable() {
if (ref_count() > 1) {
std::shared_ptr<Array> copied_data;
RETURN_NOT_OK(data_->Copy(&copied_data));
data_ = copied_data;
}
return Status::OK();
}

ArrayView ArrayView::Slice(int64_t offset) {
return ArrayView(data_, offset_ + offset, length_ - offset);
}

ArrayView ArrayView::Slice(int64_t offset, int64_t length) {
return ArrayView(data_, offset_ + offset, length);
}

// Return the reference count for the underlying array
int64_t ArrayView::ref_count() const {
return data_.use_count();
}

} // namespace pandas
Loading