Skip to content

Commit

Permalink
maint: Remove usage of folly::enumerate
Browse files Browse the repository at this point in the history
Signed-off-by: Julien Jerphanion <git@jjerphan.xyz>
  • Loading branch information
jjerphan committed Feb 28, 2024
1 parent bb2278d commit 5a5dbdd
Show file tree
Hide file tree
Showing 25 changed files with 252 additions and 199 deletions.
1 change: 0 additions & 1 deletion cpp/arcticdb/column_store/column.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <arcticdb/util/preconditions.hpp>
#include <arcticdb/util/sparse_utils.hpp>

#include <folly/container/Enumerate.h>
// Compilation fails on Mac if cstdio is not included prior to folly/Function.h due to a missing definition of memalign in folly/Memory.h
#ifdef __APPLE__
#include <cstdio>
Expand Down
7 changes: 3 additions & 4 deletions cpp/arcticdb/column_store/column_map.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
#include <arcticdb/entity/types.hpp>
#include <arcticdb/util/offset_string.hpp>

#include <folly/container/Enumerate.h>

#ifdef ARCTICDB_USING_CONDA
#include <robin_hood.h>
#else
Expand Down Expand Up @@ -59,8 +57,9 @@ class ColumnMap {
}

void set_from_descriptor(const StreamDescriptor& descriptor) {
for(const auto& field : folly::enumerate(descriptor.fields())) {
insert(field->name(), field.index);
for(size_t i = 0; i < descriptor.fields().size(); i++) {
const auto& field = descriptor.fields()[i];
insert(field.name(), i);
}
}

Expand Down
52 changes: 29 additions & 23 deletions cpp/arcticdb/column_store/memory_segment_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,17 +188,18 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::filter(const util::Bit

// Index is built to make rank queries faster
std::unique_ptr<util::BitIndex> filter_idx;
for(const auto& column : folly::enumerate(columns())) {
(*column)->type().visit_tag([&] (auto type_desc_tag){
for(size_t index = 0, columns_size = columns().size(); index < columns_size; index++) {
const auto& column = columns_[index];
column->type().visit_tag([&] (auto type_desc_tag){
using TypeDescriptorTag = decltype(type_desc_tag);
using DataTypeTag = typename TypeDescriptorTag::DataTypeTag;
using RawType = typename DataTypeTag::raw_type;
const util::BitSet* final_bitset;
util::BitSet bitset_including_sparse;

auto sparse_map = (*column)->opt_sparse_map();
auto sparse_map = column->opt_sparse_map();
std::unique_ptr<util::BitIndex> sparse_idx;
auto output_col_idx = column.index;
auto output_col_idx = index;
if (is_input_sparse || sparse_map) {
filter_idx = std::make_unique<util::BitIndex>();
filter_bitset.build_rs_index(filter_idx.get());
Expand All @@ -209,13 +210,13 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::filter(const util::Bit
sparse_idx = std::make_unique<util::BitIndex>();
sparse_map.value().build_rs_index(sparse_idx.get());
} else {
bitset_including_sparse.resize((*column)->row_count());
bitset_including_sparse.resize(column->row_count());
}
if (bitset_including_sparse.count() == 0) {
// No values are set in the sparse column, skip it
return;
}
output_col_idx = output->add_column(field(column.index), bitset_including_sparse.count(), true);
output_col_idx = output->add_column(field(index), bitset_including_sparse.count(), true);
final_bitset = &bitset_including_sparse;
} else {
final_bitset = &filter_bitset;
Expand All @@ -224,7 +225,7 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::filter(const util::Bit
if (sparse_map)
output_col.opt_sparse_map() = std::make_optional<util::BitSet>();
auto output_ptr = reinterpret_cast<RawType*>(output_col.ptr());
auto input_data = (*column)->data();
auto input_data = column->data();

auto bitset_iter = final_bitset->first();
auto row_count_so_far = 0;
Expand Down Expand Up @@ -356,11 +357,12 @@ std::vector<std::shared_ptr<SegmentInMemoryImpl>> SegmentInMemoryImpl::partition
return output;
}

for (const auto& segment_count: folly::enumerate(segment_counts)) {
if (*segment_count > 0) {
auto& seg = output.at(segment_count.index);
seg = get_output_segment(*segment_count);
seg->set_row_data(*segment_count - 1);
for (size_t idx = 0, segment_counts_size = segment_counts.size(); idx < segment_counts_size; idx++) {
const auto& segment_count = segment_counts.at(idx);
if (segment_count > 0) {
auto& seg = output.at(idx);
seg = get_output_segment(segment_count);
seg->set_row_data(segment_count - 1);
seg->set_string_pool(string_pool_);
seg->set_compacted(compacted_);
if (metadata_) {
Expand All @@ -371,21 +373,23 @@ std::vector<std::shared_ptr<SegmentInMemoryImpl>> SegmentInMemoryImpl::partition
}
}

for(const auto& column : folly::enumerate(columns())) {
(*column)->type().visit_tag([&] (auto type_desc_tag){
for(size_t idx = 0, columns_size = columns().size(); idx < columns_size; idx++) {
const auto& column = columns_[idx];
column->type().visit_tag([&] (auto type_desc_tag){
using TypeDescriptorTag = decltype(type_desc_tag);
using ColumnTagType = typename TypeDescriptorTag::DataTypeTag;
using RawType = typename ColumnTagType::raw_type;

auto output_col_idx = column.index;
auto output_col_idx = idx;
std::vector<RawType*> output_ptrs{output.size(), nullptr};
for (const auto& segment: folly::enumerate(output)) {
if (static_cast<bool>(*segment)) {
output_ptrs.at(segment.index) = reinterpret_cast<RawType*>((*segment)->column(output_col_idx).ptr());
for (size_t seg_idx = 0; seg_idx < output.size(); seg_idx++) {
const auto& segment = output.at(seg_idx);
if (static_cast<bool>(segment)) {
output_ptrs.at(seg_idx) = reinterpret_cast<RawType*>((segment)->column(output_col_idx).ptr());
}
}

auto input_data = (*column)->data();
auto input_data = column->data();
size_t overall_idx = 0;
while(auto block = input_data.next<TypeDescriptorTag>()) {
auto input_ptr = reinterpret_cast<const RawType*>(block.value().data());
Expand Down Expand Up @@ -460,7 +464,8 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::truncate(
output->set_metadata(std::move(metadata));
}

for (const auto&& [idx, column] : folly::enumerate(columns_)) {
for (size_t idx = 0, columns_size = columns_.size(); idx < columns_size; idx++) {
const auto& column = columns_[idx];
const TypeDescriptor column_type = column->type();
const Field& field = descriptor_->field(idx);
std::shared_ptr<Column> truncated_column = Column::truncate(column, start_row, end_row);
Expand Down Expand Up @@ -492,9 +497,10 @@ void SegmentInMemoryImpl::concatenate(SegmentInMemoryImpl&& other, bool unique_c
row_count() == other.row_count(),
"Cannot concatenate segments with differing row counts: {} {}",
row_count(), other.row_count());
for (const auto& field: folly::enumerate(other.fields())) {
if (!unique_column_names || !column_index(field->name()).has_value()) {
add_column(*field, other.column_ptr(field.index));
for (size_t idx = 0, fields_size = other.fields().size(); idx < fields_size; idx++) {
const auto& field = other.fields()[idx];
if (!unique_column_names || !column_index(field.name()).has_value()) {
add_column(field, other.column_ptr(idx));
}
}
}
Expand Down
9 changes: 5 additions & 4 deletions cpp/arcticdb/column_store/memory_segment_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <arcticdb/entity/stream_descriptor.hpp>

#include <boost/iterator/iterator_facade.hpp>
#include <folly/container/Enumerate.h>

namespace google::protobuf
{
Expand Down Expand Up @@ -438,11 +437,13 @@ class SegmentInMemoryImpl {
}

void push_back(const Row &row) {
for (auto it : folly::enumerate(row)) {
it->visit([&it, that=this](const auto &val) {
size_t index = 0;
for (auto it: row) {
it.visit([&it, &index, that=this](const auto &val) {
if(val)
that->set_scalar(it.index, val.value());
that->set_scalar(index, val);
});
index++;
}
end_row();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

#include <arcticdb/column_store/memory_segment.hpp>
#include <arcticdb/stream/test/stream_test_common.hpp>
#include <folly/container/Enumerate.h>

#include <algorithm>

Expand Down
62 changes: 39 additions & 23 deletions cpp/arcticdb/column_store/test/test_memory_segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,23 +107,27 @@ TEST(MemSegment, IterateAndGetValues) {
auto frame_wrapper = get_test_timeseries_frame("test_get_values", 100, 0);
auto& segment = frame_wrapper.segment_;

for( auto row : folly::enumerate(segment)) {
for(auto value : folly::enumerate(*row)) {
value->visit([&] (const auto& val) {
size_t row_index = 0;
for(auto row : segment) {
size_t value_index = 0;
for(auto value : row) {
value.visit([&] (const auto& val) {
using ValType = std::decay_t<decltype(val)>;
if( value.index == 0) {
ASSERT_EQ(static_cast<ValType>(row.index), val);
if(value_index == 0) {
ASSERT_EQ(static_cast<ValType>(row_index), val);
}
else {
if constexpr(std::is_integral_v<ValType>) {
ASSERT_EQ(val, get_integral_value_for_offset<ValType>(0, row.index));
ASSERT_EQ(val, get_integral_value_for_offset<ValType>(0, row_index));
}
if constexpr (std::is_floating_point_v<ValType>) {
ASSERT_EQ(val, get_floating_point_value_for_offset<ValType>(0, row.index));
ASSERT_EQ(val, get_floating_point_value_for_offset<ValType>(0, row_index));
}
}
});
value_index++;
}
row_index++;
}
}

Expand All @@ -136,12 +140,14 @@ TEST(MemSegment, IterateWithEmptyTypeColumn) {
auto empty_column = std::make_shared<Column>(generate_empty_column());
seg.add_column(scalar_field(empty_column->type().data_type(), "empty_column"), empty_column);
seg.set_row_id(num_rows - 1);
for (auto&& [idx, row]: folly::enumerate(seg)) {
auto idx = 0;
for (auto&& row: seg) {
ASSERT_EQ(static_cast<int64_t>(idx), row.scalar_at<int64_t>(0));
// Exception should be thrown regardless of the type requested for empty type columns
EXPECT_THROW([[maybe_unused]] auto v = row.scalar_at<int64_t>(1).has_value(), InternalException);
EXPECT_THROW([[maybe_unused]] auto v = row.scalar_at<float>(1).has_value(), InternalException);
EXPECT_THROW([[maybe_unused]] auto v = row.scalar_at<uint8_t>(1).has_value(), InternalException);
++idx;
}
}

Expand All @@ -151,23 +157,27 @@ TEST(MemSegment, CopyViaIterator) {
auto target = get_test_empty_timeseries_segment("to_sort", 0u);
std::copy(std::begin(source), std::end(source), std::back_inserter(target));

for( auto row : folly::enumerate(target)) {
for(auto value : folly::enumerate(*row)) {
value->visit([&] (const auto& val) {
size_t row_index = 0;
for(auto row : target) {
size_t value_index = 0;
for(auto value : row) {
value.visit([&] (const auto& val) {
using ValType = std::decay_t<decltype(val)>;
if( value.index == 0) {
ASSERT_EQ(static_cast<ValType>(row.index), val);
if(value_index == 0) {
ASSERT_EQ(static_cast<ValType>(row_index), val);
}
else {
if constexpr(std::is_integral_v<ValType>) {
ASSERT_EQ(val, get_integral_value_for_offset<ValType>(0, row.index));
ASSERT_EQ(val, get_integral_value_for_offset<ValType>(0, row_index));
}
if constexpr (std::is_floating_point_v<ValType>) {
ASSERT_EQ(val, get_floating_point_value_for_offset<ValType>(0, row.index));
ASSERT_EQ(val, get_floating_point_value_for_offset<ValType>(0, row_index));
}
}
});
value_index++;
}
row_index++;
}
}

Expand All @@ -187,22 +197,26 @@ TEST(MemSegment, ModifyViaIterator) {
}
}

for (auto row : folly::enumerate(segment)) {
for (auto value : folly::enumerate(*row)) {
value->visit([&](const auto &val) {
size_t row_index = 0;
for (auto row : segment) {
size_t value_index = 0;
for (auto value : row) {
value.visit([&](const auto &val) {
using ValType = std::decay_t<decltype(val)>;
if (value.index == 0) {
ASSERT_EQ(static_cast<ValType>(row.index + 1), val);
if (value_index == 0) {
ASSERT_EQ(static_cast<ValType>(row_index + 1), val);
} else {
if constexpr(std::is_integral_v<ValType>) {
ASSERT_EQ(val, get_integral_value_for_offset<ValType>(0, row.index) + 1);
ASSERT_EQ(val, get_integral_value_for_offset<ValType>(0, row_index) + 1);
}
if constexpr (std::is_floating_point_v<ValType>) {
ASSERT_EQ(val, get_floating_point_value_for_offset<ValType>(0, row.index) + 1);
ASSERT_EQ(val, get_floating_point_value_for_offset<ValType>(0, row_index) + 1);
}
}
});
value_index++;
}
row_index++;
}
}

Expand Down Expand Up @@ -483,12 +497,14 @@ TEST(MemSegment, Filter) {

auto filtered_seg = seg.filter(filter_bitset);

for (auto&& [idx, row]: folly::enumerate(filtered_seg)) {
size_t idx = 0;
for (auto row: filtered_seg) {
ASSERT_EQ(static_cast<int64_t>(retained_rows[idx]), row.scalar_at<int64_t>(0));
// Exception should be thrown regardless of the type requested for empty type columns
EXPECT_THROW([[maybe_unused]] auto v = row.scalar_at<int64_t>(1).has_value(), InternalException);
EXPECT_THROW([[maybe_unused]] auto v = row.scalar_at<float>(1).has_value(), InternalException);
EXPECT_THROW([[maybe_unused]] auto v = row.scalar_at<uint8_t>(1).has_value(), InternalException);
++idx;
}
}

5 changes: 3 additions & 2 deletions cpp/arcticdb/pipeline/column_stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ SegmentInMemory merge_column_stats_segments(const std::vector<SegmentInMemory>&
}
}
}
for (const auto& type_descriptor: folly::enumerate(type_descriptors)) {
merged.add_column(FieldRef{*type_descriptor, field_names.at(type_descriptor.index)}, 0, false);
for (size_t index = 0; index < type_descriptors.size(); index++) {
auto &type_descriptor = type_descriptors.at(index);
merged.add_column(FieldRef{type_descriptor, field_names.at(index)}, 0, false);
}
for (auto &segment : segments) {
merged.append(segment);
Expand Down
19 changes: 10 additions & 9 deletions cpp/arcticdb/pipeline/frame_slice_map.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ struct FrameSliceMap {
const auto& row_range = context_row.slice_and_key().slice_.row_range;

const auto& fields = context_row.descriptor().fields();
for(const auto& field : folly::enumerate(fields)) {
if (!context_->is_in_filter_columns_set(field->name())) {
ARCTICDB_DEBUG(log::version(), "{} not present in filtered columns, skipping", field->name());
for(size_t i = 0; i < fields.size(); i++) {
const auto& field = fields[i];
if (!context_->is_in_filter_columns_set(field.name())) {
ARCTICDB_DEBUG(log::version(), "{} not present in filtered columns, skipping", field.name());
continue;
}

const entity::DataType row_range_type = field->type().data_type();
const entity::DataType row_range_type = field.type().data_type();
if(!dynamic_schema && !is_sequence_type(row_range_type)) {
// In case we end up with static schema and empty we must check the type of the whole column
// Because we could be reading an empty segment of a string column. Example: start with [None],
Expand All @@ -43,21 +44,21 @@ struct FrameSliceMap {
// TODO: This logic won't be needed when we move string handling into separate type handler
if(is_empty_type(row_range_type)) {
const entity::StreamDescriptor& descriptor = context_->descriptor();
const size_t global_field_idx = descriptor.find_field(field->name()).value();
const size_t global_field_idx = descriptor.find_field(field.name()).value();
const Field& global_field = descriptor.field(global_field_idx);
const entity::DataType global_field_type = global_field.type().data_type();
if(!is_sequence_type(global_field_type)) {
ARCTICDB_DEBUG(log::version(), "{} not a string type in dynamic schema, skipping", field->name());
ARCTICDB_DEBUG(log::version(), "{} not a string type in dynamic schema, skipping", field.name());
continue;
}
} else {
ARCTICDB_DEBUG(log::version(), "{} not a string type in dynamic schema, skipping", field->name());
ARCTICDB_DEBUG(log::version(), "{} not a string type in dynamic schema, skipping", field.name());
continue;
}
}

auto& column = columns_[field->name()];
ContextData data{context_row.index_, field.index};
auto& column = columns_[field.name()];
ContextData data{context_row.index_, i};
column.insert(std::make_pair(row_range, data));
}
}
Expand Down
6 changes: 4 additions & 2 deletions cpp/arcticdb/pipeline/test/test_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,10 @@ struct TestProjection {
desc.add_field(fd);
auto col_index = segment.add_column(fd, 0, false);
auto& column = segment.column(col_index);
for(auto&& row : folly::enumerate(segment)) {
column.set_scalar(row.index, projection_func_(*row));
size_t row_index = 0;
for(const auto& row : segment) {
column.set_scalar(row_index, projection_func_(row));
++row_index;
}
return segment;
}
Expand Down

0 comments on commit 5a5dbdd

Please sign in to comment.