Skip to content

Commit

Permalink
wrap DataCell contents in an Arc to work around #1746
Browse files Browse the repository at this point in the history
  • Loading branch information
teh-cmc committed Apr 3, 2023
1 parent 843244f commit 113b09e
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 27 deletions.
2 changes: 2 additions & 0 deletions crates/re_arrow_store/src/store_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,7 @@ impl IndexedBucketInner {

{
crate::profile_scope!("control");

fn reshuffle_control_column<T: Copy, const N: usize>(
column: &mut SmallVec<[T; N]>,
swaps: &[(usize, usize)],
Expand All @@ -914,6 +915,7 @@ impl IndexedBucketInner {
}
}
}

reshuffle_control_column(col_time, &swaps);
if !col_insert_id.is_empty() {
reshuffle_control_column(col_insert_id, &swaps);
Expand Down
55 changes: 33 additions & 22 deletions crates/re_log_types/src/data_cell.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::sync::Arc;

use itertools::Itertools as _;

use crate::{Component, ComponentName, DeserializableComponent, SerializableComponent};
Expand Down Expand Up @@ -91,6 +93,22 @@ pub type DataCellResult<T> = ::std::result::Result<T, DataCellError>;
///
#[derive(Debug, Clone, PartialEq)]
pub struct DataCell {
/// While the arrow data is already refcounted, the contents of the `DataCell` still have to
/// be wrapped in an `Arc` to work around performance issues in `arrow2`.
///
/// See [`DataCellInner`] for more information.
pub inner: Arc<DataCellInner>,
}

/// The actual contents of a [`DataCell`].
///
/// Despite the fact that the arrow data is already refcounted, this has to live separately, behind
/// an `Arc`, to work around performance issues in `arrow2` that stem from its heavy use of nested
/// virtual calls.
///
/// See #1746 for details.
#[derive(Debug, Clone, PartialEq)]
pub struct DataCellInner {
/// Name of the component type used in this cell.
//
// TODO(#1696): Store this within the datatype itself.
Expand Down Expand Up @@ -204,7 +222,9 @@ impl DataCell {
name: ComponentName,
values: Box<dyn arrow2::array::Array>,
) -> DataCellResult<Self> {
Ok(Self { name, values })
Ok(Self {
inner: Arc::new(DataCellInner { name, values }),
})
}

/// Builds a new `DataCell` from an arrow array.
Expand Down Expand Up @@ -237,8 +257,10 @@ impl DataCell {
) -> DataCellResult<Self> {
// TODO(cmc): check that it is indeed a component datatype
Ok(Self {
name,
values: arrow2::array::new_empty_array(datatype),
inner: Arc::new(DataCellInner {
name,
values: arrow2::array::new_empty_array(datatype),
}),
})
}

Expand All @@ -253,17 +275,6 @@ impl DataCell {

// ---

/// Returns the contents of the cell as an arrow array.
///
/// Avoid using raw arrow arrays unless you absolutely have to: prefer working directly with
/// `DataCell`s, `DataRow`s & `DataTable`s instead.
/// If you do use them, try to keep the scope as short as possible: holding on to a raw array
/// might prevent the datastore from releasing memory from garbage collected data.
#[inline]
pub fn into_arrow(self) -> Box<dyn arrow2::array::Array> {
self.values
}

/// Returns the contents of the cell as an arrow array (shallow clone).
///
/// Avoid using raw arrow arrays unless you absolutely have to: prefer working directly with
Expand All @@ -272,7 +283,7 @@ impl DataCell {
/// might prevent the datastore from releasing memory from garbage collected data.
#[inline]
pub fn as_arrow(&self) -> Box<dyn arrow2::array::Array> {
self.values.clone() /* shallow */
self.inner.values.clone() /* shallow */
}

/// Returns the contents of the cell as a reference to an arrow array.
Expand All @@ -283,7 +294,7 @@ impl DataCell {
/// might prevent the datastore from releasing memory from garbage collected data.
#[inline]
pub fn as_arrow_ref(&self) -> &dyn arrow2::array::Array {
&*self.values
&*self.inner.values
}

/// Returns the contents of the cell as an arrow array (shallow clone) wrapped in a unit-length
Expand Down Expand Up @@ -327,7 +338,7 @@ impl DataCell {
for<'a> &'a C::ArrayType: IntoIterator,
{
use arrow2_convert::deserialize::arrow_array_deserialize_iterator;
arrow_array_deserialize_iterator(&*self.values).map_err(Into::into)
arrow_array_deserialize_iterator(&*self.inner.values).map_err(Into::into)
}

/// Returns the contents of the cell as an iterator of native components.
Expand All @@ -349,24 +360,24 @@ impl DataCell {
/// The name of the component type stored in the cell.
#[inline]
pub fn component_name(&self) -> ComponentName {
self.name
self.inner.name
}

/// The type of the component stored in the cell, i.e. the cell is an array of that type.
#[inline]
pub fn datatype(&self) -> &arrow2::datatypes::DataType {
self.values.data_type()
self.inner.values.data_type()
}

/// The length of the cell's array, i.e. how many component instances are in the cell?
#[inline]
pub fn num_instances(&self) -> u32 {
self.values.len() as _
self.inner.values.len() as _
}

#[inline]
pub fn is_empty(&self) -> bool {
self.values.is_empty()
self.inner.values.is_empty()
}

/// Returns `true` if the underlying array is dense (no nulls).
Expand Down Expand Up @@ -462,7 +473,7 @@ impl DataCell {
///
/// Beware: this is costly! Cache the returned value as much as possible.
pub fn size_bytes(&self) -> u64 {
let Self { name, values } = self;
let DataCellInner { name, values } = &*self.inner;

std::mem::size_of_val(name) as u64 +
// Warning: this is surprisingly costly!
Expand Down
2 changes: 1 addition & 1 deletion crates/re_log_types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub use self::component_types::ViewCoordinates;
pub use self::component_types::{EncodedMesh3D, Mesh3D, MeshFormat, MeshId, RawMesh3D};
pub use self::component_types::{MsgId, RowId, TableId};
pub use self::data::*;
pub use self::data_cell::{DataCell, DataCellError, DataCellResult};
pub use self::data_cell::{DataCell, DataCellError, DataCellInner, DataCellResult};
pub use self::data_row::{DataRow, DataRowError, DataRowResult};
pub use self::data_table::{
DataCellColumn, DataCellOptVec, DataTable, DataTableError, DataTableResult, EntityPathVec,
Expand Down
4 changes: 2 additions & 2 deletions crates/re_query/src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ pub fn get_component_with_instances(

Ok(ComponentWithInstances {
name: component,
instance_keys: cells[0].take().map(|cell| cell.into_arrow()),
instance_keys: cells[0].take().map(|cell| cell.as_arrow()),
values: cells[1]
.take()
.map(|cell| cell.into_arrow())
.map(|cell| cell.as_arrow())
.ok_or(QueryError::PrimaryNotFound)?,
})
}
Expand Down
4 changes: 2 additions & 2 deletions crates/re_query/src/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ pub fn range_entity_with_primary<'a, Primary: Component + 'a, const N: usize>(
store
.range(query, ent_path, components)
.map(move |(time, _, mut cells)| {
let instance_keys = cells[cluster_col].take().map(|cell| cell.into_arrow());
let instance_keys = cells[cluster_col].take().map(|cell| cell.as_arrow());
let is_primary = cells[primary_col].is_some();
let cwis = cells
.into_iter()
Expand All @@ -103,7 +103,7 @@ pub fn range_entity_with_primary<'a, Primary: Component + 'a, const N: usize>(
cell.map(|cell| ComponentWithInstances {
name: components[i],
instance_keys: instance_keys.clone(), /* shallow */
values: cell.into_arrow(),
values: cell.as_arrow(),
})
})
.collect::<Vec<_>>();
Expand Down

0 comments on commit 113b09e

Please sign in to comment.