From ea9c70327d811c2e21985cd02af81ccbfd817872 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Tue, 13 Feb 2024 15:54:03 +0100 Subject: [PATCH 1/8] fix passing incorrect device limits on re_renderer examples --- crates/re_renderer_examples/framework.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/re_renderer_examples/framework.rs b/crates/re_renderer_examples/framework.rs index cf4adcb89618..415c5f1258a3 100644 --- a/crates/re_renderer_examples/framework.rs +++ b/crates/re_renderer_examples/framework.rs @@ -132,8 +132,7 @@ impl Application { &wgpu::DeviceDescriptor { label: None, required_features: wgpu::Features::empty(), - required_limits: wgpu::Limits::downlevel_webgl2_defaults() - .using_resolution(adapter.limits()), + required_limits: device_caps.limits(), }, None, ) From f60cf22167bca4283dffb343897f551b1d8f9342 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Tue, 13 Feb 2024 18:46:42 +0100 Subject: [PATCH 2/8] dynamically sized point cloud data texture --- crates/re_renderer/shader/point_cloud.wgsl | 9 +- .../src/allocator/cpu_write_gpu_read_belt.rs | 34 +++-- .../src/allocator/gpu_readback_belt.rs | 10 +- .../src/draw_phases/picking_layer.rs | 14 +- .../re_renderer/src/draw_phases/screenshot.rs | 28 ++-- crates/re_renderer/src/point_cloud_builder.rs | 29 ++-- crates/re_renderer/src/renderer/lines.rs | 2 +- .../re_renderer/src/renderer/point_cloud.rs | 139 ++++++++++++------ crates/re_renderer/src/texture_info.rs | 10 +- crates/re_renderer_examples/2d.rs | 2 +- crates/re_renderer_examples/depth_cloud.rs | 2 +- crates/re_renderer_examples/multiview.rs | 2 +- crates/re_renderer_examples/picking.rs | 8 +- .../src/contexts/shared_render_builders.rs | 11 +- 14 files changed, 181 insertions(+), 119 deletions(-) diff --git a/crates/re_renderer/shader/point_cloud.wgsl b/crates/re_renderer/shader/point_cloud.wgsl index a1c37db4e447..f6aebb5ec9a6 100644 --- a/crates/re_renderer/shader/point_cloud.wgsl +++ b/crates/re_renderer/shader/point_cloud.wgsl @@ -40,8 +40,6 @@ var batch: BatchUniformBuffer; const FLAG_ENABLE_SHADING: u32 = 1u; const FLAG_DRAW_AS_CIRCLES: u32 = 2u; -const TEXTURE_SIZE: u32 = 2048u; - struct VertexOut { @builtin(position) position: vec4f, @@ -77,16 +75,19 @@ struct PointData { // Read and unpack data at a given location fn read_data(idx: u32) -> PointData { - let coord = vec2u(idx % TEXTURE_SIZE, idx / TEXTURE_SIZE); + let texture_size = textureDimensions(position_data_texture); + let coord = vec2u(idx % texture_size.x, idx / texture_size.x); + let position_data = textureLoad(position_data_texture, coord, 0); let color = textureLoad(color_texture, coord, 0); + let picking_instance_id = textureLoad(picking_instance_id_texture, coord, 0).rg; var data: PointData; let pos_4d = batch.world_from_obj * vec4f(position_data.xyz, 1.0); data.pos = pos_4d.xyz / pos_4d.w; data.unresolved_radius = position_data.w; data.color = color; - data.picking_instance_id = textureLoad(picking_instance_id_texture, coord, 0).rg; + data.picking_instance_id = picking_instance_id; return data; } diff --git a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs index abaa6a3c93fc..c27be67c1126 100644 --- a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs +++ b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs @@ -10,8 +10,14 @@ pub enum CpuWriteGpuReadError { #[error("Attempting to allocate an empty buffer.")] ZeroSizeBufferAllocation, - #[error("Buffer is full, can't append more data! Buffer has capacity for {buffer_element_capacity} elements.")] - BufferFull { buffer_element_capacity: usize }, + #[error("Buffer is full, can't append more data! + Buffer contains {buffer_element_size} elements and has a capacity for {buffer_element_capacity} elements. + Tried to add {num_elements_attempted_to_add} elements.")] + BufferFull { + buffer_element_capacity: usize, + buffer_element_size: usize, + num_elements_attempted_to_add: usize, + }, #[error("Target buffer has a size of {target_buffer_size}, can't write {copy_size} bytes with an offset of {destination_offset}!")] TargetBufferTooSmall { @@ -20,9 +26,9 @@ pub enum CpuWriteGpuReadError { destination_offset: u64, }, - #[error("Target texture doesn't fit the size of the written data to this buffer! Texture size: {target_texture_size} bytes, written data size: {written_data_size} bytes")] + #[error("Target texture doesn't fit the size of the written data to this buffer! Texture copy size: {copy_size:?} bytes, written data size: {written_data_size} bytes")] TargetTextureBufferSizeMismatch { - target_texture_size: u64, + copy_size: wgpu::Extent3d, written_data_size: usize, }, } @@ -95,6 +101,8 @@ where ( Err(CpuWriteGpuReadError::BufferFull { buffer_element_capacity: self.capacity(), + buffer_element_size: self.num_written(), + num_elements_attempted_to_add: elements.len(), }), &elements[..self.remaining_capacity()], ) @@ -133,6 +141,8 @@ where if self.unwritten_element_range.start >= self.unwritten_element_range.end { return Err(CpuWriteGpuReadError::BufferFull { buffer_element_capacity: self.capacity(), + buffer_element_size: self.num_written(), + num_elements_attempted_to_add: 1, }); } @@ -154,6 +164,8 @@ where ( Err(CpuWriteGpuReadError::BufferFull { buffer_element_capacity: self.capacity(), + buffer_element_size: self.num_written(), + num_elements_attempted_to_add: num_elements, }), self.remaining_capacity(), ) @@ -183,6 +195,8 @@ where if self.remaining_capacity() == 0 { return Err(CpuWriteGpuReadError::BufferFull { buffer_element_capacity: self.capacity(), + buffer_element_size: self.num_written(), + num_elements_attempted_to_add: 1, }); } @@ -221,9 +235,9 @@ where self, encoder: &mut wgpu::CommandEncoder, destination: wgpu::ImageCopyTexture<'_>, - copy_extent: glam::UVec2, + copy_size: wgpu::Extent3d, ) -> Result<(), CpuWriteGpuReadError> { - let buffer_info = Texture2DBufferInfo::new(destination.texture.format(), copy_extent); + let buffer_info = Texture2DBufferInfo::new(destination.texture.format(), copy_size); // Validate that we stay within the written part of the slice (wgpu can't fully know our intention here, so we have to check). // We go one step further and require the size to be exactly equal - it's too unlikely that you wrote more than is needed! @@ -231,7 +245,7 @@ where if buffer_info.buffer_size_padded as usize != self.num_written() * std::mem::size_of::() { return Err(CpuWriteGpuReadError::TargetTextureBufferSizeMismatch { - target_texture_size: buffer_info.buffer_size_padded, + copy_size, written_data_size: self.num_written() * std::mem::size_of::(), }); } @@ -246,11 +260,7 @@ where }, }, destination, - wgpu::Extent3d { - width: copy_extent.x, - height: copy_extent.y, - depth_or_array_layers: 1, - }, + copy_size, ); Ok(()) diff --git a/crates/re_renderer/src/allocator/gpu_readback_belt.rs b/crates/re_renderer/src/allocator/gpu_readback_belt.rs index d772128cf18a..ae6d0215c509 100644 --- a/crates/re_renderer/src/allocator/gpu_readback_belt.rs +++ b/crates/re_renderer/src/allocator/gpu_readback_belt.rs @@ -42,7 +42,7 @@ impl GpuReadbackBuffer { &mut self, encoder: &mut wgpu::CommandEncoder, source: wgpu::ImageCopyTexture<'_>, - copy_extents: glam::UVec2, + copy_extents: wgpu::Extent3d, ) -> Result<(), GpuReadbackError> { self.read_multiple_texture2d(encoder, &[(source, copy_extents)]) } @@ -60,7 +60,7 @@ impl GpuReadbackBuffer { pub fn read_multiple_texture2d( &mut self, encoder: &mut wgpu::CommandEncoder, - sources_and_extents: &[(wgpu::ImageCopyTexture<'_>, glam::UVec2)], + sources_and_extents: &[(wgpu::ImageCopyTexture<'_>, wgpu::Extent3d)], ) -> Result<(), GpuReadbackError> { for (source, copy_extents) in sources_and_extents { let start_offset = wgpu::util::align_to( @@ -92,11 +92,7 @@ impl GpuReadbackBuffer { rows_per_image: None, }, }, - wgpu::Extent3d { - width: copy_extents.x, - height: copy_extents.y, - depth_or_array_layers: 1, - }, + *copy_extents, ); self.range_in_chunk = diff --git a/crates/re_renderer/src/draw_phases/picking_layer.rs b/crates/re_renderer/src/draw_phases/picking_layer.rs index 0a9026eb1d1b..03f6d322fe4e 100644 --- a/crates/re_renderer/src/draw_phases/picking_layer.rs +++ b/crates/re_renderer/src/draw_phases/picking_layer.rs @@ -261,14 +261,15 @@ impl PickingLayerProcessor { frame_uniform_buffer, ); - let row_info_id = Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent); + let row_info_id = + Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.wgpu_extent()); let row_info_depth = Texture2DBufferInfo::new( if direct_depth_readback { Self::PICKING_LAYER_DEPTH_FORMAT } else { DepthReadbackWorkaround::READBACK_FORMAT }, - picking_rect.extent, + picking_rect.wgpu_extent(), ); // Offset of the depth buffer in the readback buffer needs to be aligned to size of a depth pixel. @@ -343,10 +344,7 @@ impl PickingLayerProcessor { encoder: &mut wgpu::CommandEncoder, render_pipelines: &GpuRenderPipelinePoolAccessor<'_>, ) -> Result<(), PickingLayerError> { - let extent = glam::uvec2( - self.picking_target.texture.width(), - self.picking_target.texture.height(), - ); + let extent = self.picking_target.texture.size(); let readable_depth_texture = if let Some(depth_copy_workaround) = self.depth_readback_workaround.as_ref() { @@ -420,7 +418,7 @@ impl PickingLayerProcessor { let buffer_info_id = Texture2DBufferInfo::new( Self::PICKING_LAYER_FORMAT, - metadata.picking_rect.extent, + metadata.picking_rect.wgpu_extent(), ); let buffer_info_depth = Texture2DBufferInfo::new( if metadata.depth_readback_workaround_in_use { @@ -428,7 +426,7 @@ impl PickingLayerProcessor { } else { Self::PICKING_LAYER_DEPTH_FORMAT }, - metadata.picking_rect.extent, + metadata.picking_rect.wgpu_extent(), ); let picking_id_data = buffer_info_id diff --git a/crates/re_renderer/src/draw_phases/screenshot.rs b/crates/re_renderer/src/draw_phases/screenshot.rs index b954d1be6f72..d753f42a1b07 100644 --- a/crates/re_renderer/src/draw_phases/screenshot.rs +++ b/crates/re_renderer/src/draw_phases/screenshot.rs @@ -21,7 +21,7 @@ use crate::{ /// Type used as user data on the gpu readback belt. struct ReadbackBeltMetadata { - extent: glam::UVec2, + extent: wgpu::Extent3d, user_data: T, } @@ -41,14 +41,19 @@ impl ScreenshotProcessor { readback_identifier: GpuReadbackIdentifier, readback_user_data: T, ) -> Self { - let buffer_info = Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, resolution); + let size = wgpu::Extent3d { + width: resolution.x, + height: resolution.y, + depth_or_array_layers: 1, + }; + let buffer_info = Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, size); let screenshot_readback_buffer = Mutex::new(ctx.gpu_readback_belt.lock().allocate( &ctx.device, &ctx.gpu_resources.buffers, buffer_info.buffer_size_padded, readback_identifier, Box::new(ReadbackBeltMetadata { - extent: resolution, + extent: size, user_data: readback_user_data, }), )); @@ -57,11 +62,7 @@ impl ScreenshotProcessor { &ctx.device, &TextureDesc { label: format!("{view_name} - ScreenshotProcessor").into(), - size: wgpu::Extent3d { - width: resolution.x, - height: resolution.y, - depth_or_array_layers: 1, - }, + size, mip_level_count: 1, sample_count: 1, dimension: wgpu::TextureDimension::D2, @@ -113,10 +114,7 @@ impl ScreenshotProcessor { origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All, }, - glam::uvec2( - self.screenshot_texture.texture.width(), - self.screenshot_texture.texture.height(), - ), + self.screenshot_texture.texture.size(), ) } @@ -141,7 +139,11 @@ impl ScreenshotProcessor { let buffer_info = Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, metadata.extent); let texture_data = buffer_info.remove_padding(data); - on_screenshot(&texture_data, metadata.extent, metadata.user_data); + on_screenshot( + &texture_data, + glam::uvec2(metadata.extent.width, metadata.extent.height), + metadata.user_data, + ); }); screenshot_was_available } diff --git a/crates/re_renderer/src/point_cloud_builder.rs b/crates/re_renderer/src/point_cloud_builder.rs index 473b5e3c291b..1eba385e9dba 100644 --- a/crates/re_renderer/src/point_cloud_builder.rs +++ b/crates/re_renderer/src/point_cloud_builder.rs @@ -24,21 +24,19 @@ pub struct PointCloudBuilder { pub(crate) batches: Vec, pub(crate) radius_boost_in_ui_points_for_outlines: f32, + + max_num_points: usize, } impl PointCloudBuilder { - pub fn new(ctx: &RenderContext) -> Self { - const RESERVE_SIZE: usize = 512; + pub fn new(ctx: &RenderContext, max_num_points: u32) -> Self { + let num_buffer_elements = + PointCloudDrawData::padded_buffer_element_count(ctx, max_num_points); - // TODO(andreas): Be more resourceful about the size allocated here. Typically we know in advance! let color_buffer = ctx .cpu_write_gpu_read_belt .lock() - .allocate::( - &ctx.device, - &ctx.gpu_resources.buffers, - PointCloudDrawData::MAX_NUM_POINTS, - ) + .allocate::(&ctx.device, &ctx.gpu_resources.buffers, num_buffer_elements) .expect("Failed to allocate color buffer"); // TODO(#3408): Should never happen but should propagate error anyways let picking_instance_ids_buffer = ctx .cpu_write_gpu_read_belt @@ -46,16 +44,17 @@ impl PointCloudBuilder { .allocate::( &ctx.device, &ctx.gpu_resources.buffers, - PointCloudDrawData::MAX_NUM_POINTS, + num_buffer_elements, ) .expect("Failed to allocate picking layer buffer"); // TODO(#3408): Should never happen but should propagate error anyways Self { - vertices: Vec::with_capacity(RESERVE_SIZE), + vertices: Vec::with_capacity(max_num_points as usize), color_buffer, picking_instance_ids_buffer, batches: Vec::with_capacity(16), radius_boost_in_ui_points_for_outlines: 0.0, + max_num_points: max_num_points as usize, } } @@ -182,13 +181,13 @@ impl<'a> PointCloudBatchBuilder<'a> { self.0.picking_instance_ids_buffer.num_written() ); - if num_points + self.0.vertices.len() > PointCloudDrawData::MAX_NUM_POINTS { + if num_points + self.0.vertices.len() > self.0.max_num_points { re_log::error_once!( - "Reached maximum number of supported points of {}. \ - See also https://github.com/rerun-io/rerun/issues/957", - PointCloudDrawData::MAX_NUM_POINTS + "Reserved space for {} points, but reached {}. Clamping to previously set maximum", + self.0.max_num_points, + num_points + self.0.vertices.len() ); - num_points = PointCloudDrawData::MAX_NUM_POINTS - self.0.vertices.len(); + num_points = self.0.max_num_points - self.0.vertices.len(); } if num_points == 0 { return self; diff --git a/crates/re_renderer/src/renderer/lines.rs b/crates/re_renderer/src/renderer/lines.rs index 8772e9f0ee81..79046c064298 100644 --- a/crates/re_renderer/src/renderer/lines.rs +++ b/crates/re_renderer/src/renderer/lines.rs @@ -586,7 +586,7 @@ impl LineDrawData { origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All, }, - glam::uvec2(strip_texture_extent.width, strip_texture_extent.height), + strip_texture_extent, )?; } diff --git a/crates/re_renderer/src/renderer/point_cloud.rs b/crates/re_renderer/src/renderer/point_cloud.rs index 27a1ddf73c9e..830a50b68536 100644 --- a/crates/re_renderer/src/renderer/point_cloud.rs +++ b/crates/re_renderer/src/renderer/point_cloud.rs @@ -160,17 +160,7 @@ pub enum PointCloudDrawDataError { FailedTransferringDataToGpu(#[from] crate::allocator::CpuWriteGpuReadError), } -/// Textures are 2D since 1D textures are very limited in size (8k typically). -/// Need to keep this value in sync with `point_cloud.wgsl`! -/// We store `vec4 + [u8;4]` = 20 bytes per texel. -const DATA_TEXTURE_SIZE: u32 = 2048; // 2ki x 2ki = 4 Mi = 80 MiB - impl PointCloudDrawData { - /// Maximum number of vertices per [`PointCloudDrawData`]. - /// - /// TODO(#3076): Get rid of this limit!. - pub const MAX_NUM_POINTS: usize = (DATA_TEXTURE_SIZE * DATA_TEXTURE_SIZE) as usize; - /// Transforms and uploads point cloud data to be consumed by gpu. /// /// Try to bundle all points into a single draw data instance whenever possible. @@ -212,39 +202,31 @@ impl PointCloudDrawData { batches }; - // Make sure the size of a row is a multiple of the row byte alignment to make buffer copies easier. - static_assertions::const_assert_eq!( - DATA_TEXTURE_SIZE * std::mem::size_of::() as u32 - % wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, - 0 - ); - static_assertions::const_assert_eq!( - DATA_TEXTURE_SIZE * std::mem::size_of::<[u8; 4]>() as u32 - % wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, - 0 - ); - - let vertices = if vertices.len() > Self::MAX_NUM_POINTS { + // Points are stored on a 2d texture (we can't use buffers due to WebGL compatibility). + // 2D texture size limits on desktop is at least 8192x8192. + // Android WebGL is known to only support 4096x4096, see https://web3dsurvey.com/webgl2/parameters/MAX_TEXTURE_SIZE + // Android WebGPU, however almost always supports 8192x8192 and most of the time 16384x16384, see https://web3dsurvey.com/webgpu/limits/maxTextureDimension2D + // => Even with a conservative 4096x4096, we can store 16 million points on a single texture, so we're very unlikely to ever hit this. + // (and the typ typical 16384x16384 gives us a max 268 million points, far more than we can realistically render with this renderer) + let max_texture_dimension_2d = ctx.device.limits().max_texture_dimension_2d; + let max_num_points = max_texture_dimension_2d as usize * max_texture_dimension_2d as usize; + let vertices = if vertices.len() > max_num_points { re_log::error_once!( - "Reached maximum number of supported points. Clamping down to {}, passed were {}. \ - See also https://github.com/rerun-io/rerun/issues/957", - Self::MAX_NUM_POINTS, + "Reached maximum number of supported points. Clamping down to {}, passed were {}.", + max_num_points, vertices.len() ); - &vertices[..Self::MAX_NUM_POINTS] + &vertices[..max_num_points] } else { vertices }; - // TODO(andreas): We want a "stack allocation" here that lives for one frame. - // Note also that this doesn't protect against sharing the same texture with several PointDrawData! + let data_texture_size = + Self::point_cloud_data_texture_size(vertices.len() as u32, max_texture_dimension_2d); + let position_data_texture_desc = TextureDesc { label: "PointCloudDrawData::position_data_texture".into(), - size: wgpu::Extent3d { - width: DATA_TEXTURE_SIZE, - height: DATA_TEXTURE_SIZE, - depth_or_array_layers: 1, - }, + size: data_texture_size, mip_level_count: 1, sample_count: 1, dimension: wgpu::TextureDimension::D2, @@ -273,22 +255,16 @@ impl PointCloudDrawData { }, ); - // To make the data upload simpler (and have it be done in copy-operation), we always update full rows of each of our textures - let num_points_written = - wgpu::util::align_to(vertices.len() as u32, DATA_TEXTURE_SIZE) as usize; - let num_elements_padding = num_points_written - vertices.len(); - let texture_copy_extent = glam::uvec2( - DATA_TEXTURE_SIZE, - num_points_written as u32 / DATA_TEXTURE_SIZE, - ); - + let data_texture_element_count = + data_texture_size.width as usize * data_texture_size.height as usize; + let num_elements_padding = data_texture_element_count - vertices.len(); { re_tracing::profile_scope!("write_pos_size_texture"); let mut staging_buffer = ctx.cpu_write_gpu_read_belt.lock().allocate( &ctx.device, &ctx.gpu_resources.buffers, - num_points_written, + data_texture_element_count, )?; staging_buffer.extend_from_slice(vertices)?; staging_buffer.fill_n(gpu_data::PositionRadius::zeroed(), num_elements_padding)?; @@ -300,7 +276,7 @@ impl PointCloudDrawData { origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All, }, - texture_copy_extent, + data_texture_size, )?; } @@ -315,7 +291,7 @@ impl PointCloudDrawData { origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All, }, - texture_copy_extent, + data_texture_size, )?; builder @@ -329,7 +305,7 @@ impl PointCloudDrawData { origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All, }, - texture_copy_extent, + data_texture_size, )?; let draw_data_uniform_buffer_bindings = create_and_fill_uniform_buffer_batch( @@ -438,7 +414,7 @@ impl PointCloudDrawData { batches.iter().zip(uniform_buffer_bindings.into_iter()) { let point_vertex_range_end = (start_point_for_next_batch + batch_info.point_count) - .min(Self::MAX_NUM_POINTS as u32); + .min(max_num_points as u32); let mut active_phases = enum_set![DrawPhase::Opaque | DrawPhase::PickingLayer]; // Does the entire batch participate in the outline mask phase? if batch_info.overall_outline_mask_ids.is_some() { @@ -480,6 +456,30 @@ impl PointCloudDrawData { batches: batches_internal, }) } + + /// Returns size in number of elements for buffers that store point cloud data. + /// + /// This padding is required since we can only copy full rows of a texture. + pub(crate) fn padded_buffer_element_count(ctx: &RenderContext, max_num_points: u32) -> usize { + let max_texture_dimension_2d = ctx.device.limits().max_texture_dimension_2d; + let data_texture_size = + Self::point_cloud_data_texture_size(max_num_points, max_texture_dimension_2d); + data_texture_size.width as usize * data_texture_size.height as usize + } + + /// Size used for all data textures for a given number of points. + fn point_cloud_data_texture_size( + num_elements: u32, + max_texture_dimension_2d: u32, + ) -> wgpu::Extent3d { + // Not all data textures have an element size of 4. + // For instance, the picking texture has an element size of 16. + // + // However, we'd like to use the same coordinates on all textures, so we're using the same size for all textures. + // By specifying the smallest element size, we might over-pad the textures with bigger elements. + let element_size = 4; + data_texture_size(num_elements, element_size, max_texture_dimension_2d) + } } pub struct PointCloudRenderer { @@ -725,3 +725,46 @@ impl Renderer for PointCloudRenderer { Ok(()) } } + +/// Determines the size for a "data texture", i.e. a texture that we use to store data +/// that we'd otherwise store inside a buffer, but can't due to WebGL compatibility. +/// +/// `max_texture_size` must be a power of two. +/// +/// For convenience, the returned texture size has a width that is also a multiple of `wgpu::COPY_BYTES_PER_ROW_ALIGNMENT` +/// for the given element size. +/// This makes it a lot easier to copy data from a continuous buffer to the texture. +/// If we wouldn't do that, we'd need to do a copy per row in some cases. +fn data_texture_size( + num_elements: u32, + element_size_in_bytes: u32, + max_texture_size: u32, +) -> wgpu::Extent3d { + assert!(max_texture_size.is_power_of_two()); + + // Our data textures are usually accessed in a linear fashion, + // so ideally we'd be using a 1D texture. + // However, 1D textures are very limited in size on many platforms, we we have to use 2D textures instead. + // 2D textures perform a lot better when their dimensions are powers of two, so we'll strictly stick to that even + // when it seems to cause memory overhead. + + // Our current strategy is to fill row by row. + // Note that using shorter rows may be more efficient in some cases. + + let width = if num_elements < max_texture_size { + num_elements + .next_power_of_two() + // Keeping at a multiple of the alignment requirement makes copy operations a lot simpler. + .next_multiple_of(wgpu::COPY_BYTES_PER_ROW_ALIGNMENT / element_size_in_bytes) + } else { + max_texture_size + }; + + let height = num_elements.div_ceil(width); + + wgpu::Extent3d { + width, + height, + depth_or_array_layers: 1, + } +} diff --git a/crates/re_renderer/src/texture_info.rs b/crates/re_renderer/src/texture_info.rs index d22e50d75fe4..b07fc2b6bf56 100644 --- a/crates/re_renderer/src/texture_info.rs +++ b/crates/re_renderer/src/texture_info.rs @@ -23,10 +23,10 @@ impl Texture2DBufferInfo { /// /// If a single buffer is not possible for all aspects of the texture format, all sizes will be zero. #[inline] - pub fn new(format: wgpu::TextureFormat, extent: glam::UVec2) -> Self { + pub fn new(format: wgpu::TextureFormat, extent: wgpu::Extent3d) -> Self { let block_dimensions = format.block_dimensions(); - let width_blocks = extent.x / block_dimensions.0; - let height_blocks = extent.y / block_dimensions.1; + let width_blocks = extent.width / block_dimensions.0; + let height_blocks = extent.height / block_dimensions.1; let block_size = format .block_copy_size(Some(wgpu::TextureAspect::All)) @@ -43,6 +43,10 @@ impl Texture2DBufferInfo { } } + pub fn from_texture(texture: &wgpu::Texture) -> Self { + Self::new(texture.format(), texture.size()) + } + #[inline] pub fn num_rows(&self) -> u32 { self.buffer_size_padded as u32 / self.bytes_per_row_padded diff --git a/crates/re_renderer_examples/2d.rs b/crates/re_renderer_examples/2d.rs index 972ff4aabe38..d6ebc97de7b1 100644 --- a/crates/re_renderer_examples/2d.rs +++ b/crates/re_renderer_examples/2d.rs @@ -173,7 +173,7 @@ impl framework::Example for Render2D { // Moving the windows to a high dpi screen makes the second one bigger. // Also, it looks different under perspective projection. // The third point is automatic thickness which is determined by the point renderer implementation. - let mut point_cloud_builder = PointCloudBuilder::new(re_ctx); + let mut point_cloud_builder = PointCloudBuilder::new(re_ctx, 128); point_cloud_builder.batch("points").add_points_2d( &[ glam::vec3(500.0, 120.0, 0.0), diff --git a/crates/re_renderer_examples/depth_cloud.rs b/crates/re_renderer_examples/depth_cloud.rs index 1a470561a7f6..7bbab2fd6a4b 100644 --- a/crates/re_renderer_examples/depth_cloud.rs +++ b/crates/re_renderer_examples/depth_cloud.rs @@ -99,7 +99,7 @@ impl RenderDepthClouds { }) .multiunzip(); - let mut builder = PointCloudBuilder::new(re_ctx); + let mut builder = PointCloudBuilder::new(re_ctx, points.len() as u32); builder .batch("backprojected point cloud") .add_points(&points, &radii, &colors, &[]); diff --git a/crates/re_renderer_examples/multiview.rs b/crates/re_renderer_examples/multiview.rs index cce45e948412..e6a6f656466e 100644 --- a/crates/re_renderer_examples/multiview.rs +++ b/crates/re_renderer_examples/multiview.rs @@ -323,7 +323,7 @@ impl Example for Multiview { let skybox = GenericSkyboxDrawData::new(re_ctx); let lines = build_lines(re_ctx, seconds_since_startup); - let mut builder = PointCloudBuilder::new(re_ctx); + let mut builder = PointCloudBuilder::new(re_ctx, self.random_points_positions.len() as u32); builder .batch("Random Points") .world_from_obj(glam::Affine3A::from_rotation_x(seconds_since_startup)) diff --git a/crates/re_renderer_examples/picking.rs b/crates/re_renderer_examples/picking.rs index a04f1d39ed56..275759408788 100644 --- a/crates/re_renderer_examples/picking.rs +++ b/crates/re_renderer_examples/picking.rs @@ -156,7 +156,13 @@ impl framework::Example for Picking { .schedule_picking_rect(re_ctx, picking_rect, READBACK_IDENTIFIER, (), false) .unwrap(); - let mut point_builder = PointCloudBuilder::new(re_ctx); + let mut point_builder = PointCloudBuilder::new( + re_ctx, + self.point_sets + .iter() + .map(|set| set.positions.len() as u32) + .sum(), + ); for (i, point_set) in self.point_sets.iter().enumerate() { point_builder .batch(format!("Random Points {i}")) diff --git a/crates/re_space_view_spatial/src/contexts/shared_render_builders.rs b/crates/re_space_view_spatial/src/contexts/shared_render_builders.rs index b7e2016b89a1..064a4cbe7f73 100644 --- a/crates/re_space_view_spatial/src/contexts/shared_render_builders.rs +++ b/crates/re_space_view_spatial/src/contexts/shared_render_builders.rs @@ -78,10 +78,13 @@ impl ViewContextSystem for SharedRenderBuilders { LineStripSeriesBuilder::new(ctx.render_ctx) .radius_boost_in_ui_points_for_outlines(SIZE_BOOST_IN_POINTS_FOR_LINE_OUTLINES), )); - self.points = Mutex::new(Some( - PointCloudBuilder::new(ctx.render_ctx) - .radius_boost_in_ui_points_for_outlines(SIZE_BOOST_IN_POINTS_FOR_POINT_OUTLINES), - )); + self.points = + Mutex::new(Some( + PointCloudBuilder::new(ctx.render_ctx, 4096) // TODO: + .radius_boost_in_ui_points_for_outlines( + SIZE_BOOST_IN_POINTS_FOR_POINT_OUTLINES, + ), + )); } fn as_any(&self) -> &dyn std::any::Any { From 639c2b320054b7a2d406aca6ae67e01de76b8231 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Tue, 13 Feb 2024 19:24:30 +0100 Subject: [PATCH 3/8] note on how data_texture_size is optimal --- crates/re_renderer/src/renderer/point_cloud.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/re_renderer/src/renderer/point_cloud.rs b/crates/re_renderer/src/renderer/point_cloud.rs index 830a50b68536..7021f06c2d80 100644 --- a/crates/re_renderer/src/renderer/point_cloud.rs +++ b/crates/re_renderer/src/renderer/point_cloud.rs @@ -742,14 +742,15 @@ fn data_texture_size( ) -> wgpu::Extent3d { assert!(max_texture_size.is_power_of_two()); - // Our data textures are usually accessed in a linear fashion, - // so ideally we'd be using a 1D texture. + // Our data textures are usually accessed in a linear fashion, so ideally we'd be using a 1D texture. // However, 1D textures are very limited in size on many platforms, we we have to use 2D textures instead. // 2D textures perform a lot better when their dimensions are powers of two, so we'll strictly stick to that even // when it seems to cause memory overhead. - // Our current strategy is to fill row by row. - // Note that using shorter rows may be more efficient in some cases. + // We fill row by row. With the power-of-two requirement, this is the optimal strategy: + // if there were a texture with less padding that uses half the width, + // then we'd need to increase the height. We can't increase without doubling it, thus creating a texture + // with the exact same mount of padding as before ▮. let width = if num_elements < max_texture_size { num_elements From 5cf8516e7a5c5291ff37d0d83d1a795442f3513b Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Wed, 14 Feb 2024 11:27:30 +0100 Subject: [PATCH 4/8] remove point cloud builder from shared_render_builders --- .../src/contexts/shared_render_builders.rs | 32 ++------------ .../src/visualizers/entity_iterator.rs | 42 +++++++++++++++++++ .../src/visualizers/points2d.rs | 29 ++++++++++--- .../src/visualizers/points3d.rs | 31 +++++++++++--- .../re_viewer_context/src/space_view/mod.rs | 3 ++ 5 files changed, 97 insertions(+), 40 deletions(-) diff --git a/crates/re_space_view_spatial/src/contexts/shared_render_builders.rs b/crates/re_space_view_spatial/src/contexts/shared_render_builders.rs index 064a4cbe7f73..8e19f706d56d 100644 --- a/crates/re_space_view_spatial/src/contexts/shared_render_builders.rs +++ b/crates/re_space_view_spatial/src/contexts/shared_render_builders.rs @@ -1,18 +1,15 @@ use parking_lot::{MappedMutexGuard, Mutex, MutexGuard}; -use re_renderer::{LineStripSeriesBuilder, PointCloudBuilder, RenderContext}; +use re_renderer::{LineStripSeriesBuilder, RenderContext}; use re_types::ComponentNameSet; use re_viewer_context::{IdentifiedViewSystem, ViewContextSystem}; -use crate::visualizers::{ - SIZE_BOOST_IN_POINTS_FOR_LINE_OUTLINES, SIZE_BOOST_IN_POINTS_FOR_POINT_OUTLINES, -}; +use crate::visualizers::SIZE_BOOST_IN_POINTS_FOR_LINE_OUTLINES; -// TODO(wumpf): Workaround for Point & Line builder taking up too much memory to emit them on every scene element that as points/lines. +// TODO(wumpf): Workaround for Point & Line builder taking up too much memory to emit them on every scene element that as lines. // If these builders/draw-data would allocate space more dynamically, this would not be necessary! #[derive(Default)] pub struct SharedRenderBuilders { pub lines: Mutex>, - pub points: Mutex>, } impl IdentifiedViewSystem for SharedRenderBuilders { @@ -26,10 +23,6 @@ impl SharedRenderBuilders { MutexGuard::map(self.lines.lock(), |l| l.as_mut().unwrap()) } - pub fn points(&self) -> MappedMutexGuard<'_, PointCloudBuilder> { - MutexGuard::map(self.points.lock(), |l| l.as_mut().unwrap()) - } - pub fn queuable_draw_data( &self, render_ctx: &RenderContext, @@ -47,18 +40,6 @@ impl SharedRenderBuilders { } }), ); - result.extend( - self.points - .lock() - .take() - .and_then(|l| match l.into_draw_data(render_ctx) { - Ok(d) => Some(d.into()), - Err(err) => { - re_log::error_once!("Failed to build point draw data: {err}"); - None - } - }), - ); result } } @@ -78,13 +59,6 @@ impl ViewContextSystem for SharedRenderBuilders { LineStripSeriesBuilder::new(ctx.render_ctx) .radius_boost_in_ui_points_for_outlines(SIZE_BOOST_IN_POINTS_FOR_LINE_OUTLINES), )); - self.points = - Mutex::new(Some( - PointCloudBuilder::new(ctx.render_ctx, 4096) // TODO: - .radius_boost_in_ui_points_for_outlines( - SIZE_BOOST_IN_POINTS_FOR_POINT_OUTLINES, - ), - )); } fn as_any(&self) -> &dyn std::any::Any { diff --git a/crates/re_space_view_spatial/src/visualizers/entity_iterator.rs b/crates/re_space_view_spatial/src/visualizers/entity_iterator.rs index 665e4145e2d2..f49b862f2d3f 100644 --- a/crates/re_space_view_spatial/src/visualizers/entity_iterator.rs +++ b/crates/re_space_view_spatial/src/visualizers/entity_iterator.rs @@ -237,3 +237,45 @@ macro_rules! impl_process_archetype { seq!(NUM_COMP in 0..10 { impl_process_archetype!(for N=1, M=NUM_COMP); }); + +/// Count the number of primary instances for a given archetype query that should be displayed. +/// +/// Returned value might be conservative and some of the instances may not be displayable after all, +/// e.g. due to invalid transformation etc. +pub fn count_instances_in_archetype_views( + ctx: &ViewerContext<'_>, + query: &ViewQuery<'_>, +) -> usize { + // TODO(andreas/cmc): Use cached code path for this. + // This is right now a bit harder to do and requires knowing all queried components. + // The only thing we really want to pass here are the POV components. + + re_tracing::profile_function!(); + + let mut num_instances = 0; + + for data_result in query.iter_visible_data_results(System::identifier()) { + match query_archetype_with_history::( + ctx.entity_db.store(), + &query.timeline, + &query.latest_at, + &data_result.accumulated_properties().visible_history, + &data_result.entity_path, + ) + .map(|arch_views| { + for arch_view in arch_views { + num_instances += arch_view.num_instances(); + } + }) { + Ok(_) | Err(QueryError::PrimaryNotFound(_)) => {} + Err(err) => { + re_log::error_once!( + "Unexpected error querying {:?}: {err}", + &data_result.entity_path + ); + } + } + } + + num_instances +} diff --git a/crates/re_space_view_spatial/src/visualizers/points2d.rs b/crates/re_space_view_spatial/src/visualizers/points2d.rs index 3c2f297681a3..dcc43f4b3225 100644 --- a/crates/re_space_view_spatial/src/visualizers/points2d.rs +++ b/crates/re_space_view_spatial/src/visualizers/points2d.rs @@ -1,5 +1,5 @@ use re_entity_db::{EntityPath, InstancePathHash}; -use re_renderer::PickingLayerInstanceId; +use re_renderer::{PickingLayerInstanceId, PointCloudBuilder}; use re_types::{ archetypes::Points2D, components::{ClassId, Color, InstanceKey, KeypointId, Position2D, Radius, Text}, @@ -19,7 +19,10 @@ use crate::{ }, }; -use super::{filter_visualizable_2d_entities, SpatialViewVisualizerData}; +use super::{ + filter_visualizable_2d_entities, SpatialViewVisualizerData, + SIZE_BOOST_IN_POINTS_FOR_POINT_OUTLINES, +}; // --- @@ -71,6 +74,7 @@ impl Points2DVisualizer { fn process_data( &mut self, + point_builder: &mut PointCloudBuilder, query: &ViewQuery<'_>, data: &Points2DComponentData<'_>, ent_path: &EntityPath, @@ -95,7 +99,6 @@ impl Points2DVisualizer { { re_tracing::profile_scope!("to_gpu"); - let mut point_builder = ent_context.shared_render_builders.points(); let point_batch = point_builder .batch("2d points") .depth_offset(ent_context.depth_offset) @@ -243,6 +246,18 @@ impl VisualizerSystem for Points2DVisualizer { query: &ViewQuery<'_>, view_ctx: &ViewContextCollection, ) -> Result, SpaceViewSystemExecutionError> { + let num_points = super::entity_iterator::count_instances_in_archetype_views::< + Points2DVisualizer, + Points2D, + >(ctx, query); + + if num_points == 0 { + return Ok(Vec::new()); + } + + let mut point_builder = PointCloudBuilder::new(ctx.render_ctx, num_points as u32) + .radius_boost_in_ui_points_for_outlines(SIZE_BOOST_IN_POINTS_FOR_POINT_OUTLINES); + super::entity_iterator::process_archetype_pov1_comp5::< Points2DVisualizer, Points2D, @@ -279,12 +294,16 @@ impl VisualizerSystem for Points2DVisualizer { keypoint_ids, class_ids, }; - self.process_data(query, &data, ent_path, ent_context); + self.process_data(&mut point_builder, query, &data, ent_path, ent_context); Ok(()) }, )?; - Ok(Vec::new()) // TODO(andreas): Optionally return point & line draw data once SharedRenderBuilders is gone. + let draw_data = point_builder + .into_draw_data(ctx.render_ctx) + .map_err(|err| SpaceViewSystemExecutionError::DrawDataCreationError(Box::new(err)))?; + + Ok(vec![draw_data.into()]) } fn data(&self) -> Option<&dyn std::any::Any> { diff --git a/crates/re_space_view_spatial/src/visualizers/points3d.rs b/crates/re_space_view_spatial/src/visualizers/points3d.rs index 2c202ad7a672..9fa6b137c43e 100644 --- a/crates/re_space_view_spatial/src/visualizers/points3d.rs +++ b/crates/re_space_view_spatial/src/visualizers/points3d.rs @@ -1,6 +1,6 @@ use re_entity_db::{EntityPath, InstancePathHash}; use re_log_types::TimeInt; -use re_renderer::PickingLayerInstanceId; +use re_renderer::{PickingLayerInstanceId, PointCloudBuilder}; use re_types::{ archetypes::Points3D, components::{ClassId, Color, InstanceKey, KeypointId, Position3D, Radius, Text}, @@ -20,7 +20,10 @@ use crate::{ }, }; -use super::{filter_visualizable_3d_entities, Keypoints, SpatialViewVisualizerData}; +use super::{ + filter_visualizable_3d_entities, Keypoints, SpatialViewVisualizerData, + SIZE_BOOST_IN_POINTS_FOR_POINT_OUTLINES, +}; pub struct Points3DVisualizer { /// If the number of points in the batch is > max_labels, don't render point labels. @@ -72,6 +75,7 @@ impl Points3DVisualizer { fn process_data( &mut self, + point_builder: &mut PointCloudBuilder, query: &ViewQuery<'_>, ent_path: &EntityPath, ent_context: &SpatialSceneEntityContext<'_>, @@ -91,9 +95,8 @@ impl Points3DVisualizer { { re_tracing::profile_scope!("to_gpu"); - let mut point_builder = ent_context.shared_render_builders.points(); let point_batch = point_builder - .batch("3d points") + .batch(ent_path.to_string()) .world_from_obj(ent_context.world_from_entity) .outline_mask_ids(ent_context.highlight.overall) .picking_object_id(re_renderer::PickingLayerObjectId(ent_path.hash64())); @@ -184,6 +187,18 @@ impl VisualizerSystem for Points3DVisualizer { query: &ViewQuery<'_>, view_ctx: &ViewContextCollection, ) -> Result, SpaceViewSystemExecutionError> { + let num_points = super::entity_iterator::count_instances_in_archetype_views::< + Points3DVisualizer, + Points3D, + >(ctx, query); + + if num_points == 0 { + return Ok(Vec::new()); + } + + let mut point_builder = PointCloudBuilder::new(ctx.render_ctx, num_points as u32) + .radius_boost_in_ui_points_for_outlines(SIZE_BOOST_IN_POINTS_FOR_POINT_OUTLINES); + super::entity_iterator::process_archetype_pov1_comp5::< Points3DVisualizer, Points3D, @@ -220,12 +235,16 @@ impl VisualizerSystem for Points3DVisualizer { keypoint_ids, class_ids, }; - self.process_data(query, ent_path, ent_context, &data); + self.process_data(&mut point_builder, query, ent_path, ent_context, &data); Ok(()) }, )?; - Ok(Vec::new()) // TODO(andreas): Optionally return point & line draw data once SharedRenderBuilders is gone. + let draw_data = point_builder + .into_draw_data(ctx.render_ctx) + .map_err(|err| SpaceViewSystemExecutionError::DrawDataCreationError(Box::new(err)))?; + + Ok(vec![draw_data.into()]) } fn data(&self) -> Option<&dyn std::any::Any> { diff --git a/crates/re_viewer_context/src/space_view/mod.rs b/crates/re_viewer_context/src/space_view/mod.rs index 01c2d2f35114..1c28c1a44b80 100644 --- a/crates/re_viewer_context/src/space_view/mod.rs +++ b/crates/re_viewer_context/src/space_view/mod.rs @@ -51,4 +51,7 @@ pub enum SpaceViewSystemExecutionError { #[error(transparent)] DeserializationError(#[from] re_types::DeserializationError), + + #[error("Failed to create draw data: {0}")] + DrawDataCreationError(Box), } From 14c8cf9a061b3143030404a29cb7957ae0721528 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Wed, 14 Feb 2024 12:08:48 +0100 Subject: [PATCH 5/8] fix crash due to history query inflexibility in number of queried components --- .../src/visualizers/entity_iterator.rs | 10 ++++++++-- .../re_space_view_spatial/src/visualizers/points2d.rs | 1 + .../re_space_view_spatial/src/visualizers/points3d.rs | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/crates/re_space_view_spatial/src/visualizers/entity_iterator.rs b/crates/re_space_view_spatial/src/visualizers/entity_iterator.rs index f49b862f2d3f..1159d03efafc 100644 --- a/crates/re_space_view_spatial/src/visualizers/entity_iterator.rs +++ b/crates/re_space_view_spatial/src/visualizers/entity_iterator.rs @@ -242,10 +242,16 @@ seq!(NUM_COMP in 0..10 { /// /// Returned value might be conservative and some of the instances may not be displayable after all, /// e.g. due to invalid transformation etc. -pub fn count_instances_in_archetype_views( +pub fn count_instances_in_archetype_views< + System: IdentifiedViewSystem, + A: Archetype, + const N: usize, +>( ctx: &ViewerContext<'_>, query: &ViewQuery<'_>, ) -> usize { + assert_eq!(A::all_components().len(), N); + // TODO(andreas/cmc): Use cached code path for this. // This is right now a bit harder to do and requires knowing all queried components. // The only thing we really want to pass here are the POV components. @@ -255,7 +261,7 @@ pub fn count_instances_in_archetype_views( + match query_archetype_with_history::( ctx.entity_db.store(), &query.timeline, &query.latest_at, diff --git a/crates/re_space_view_spatial/src/visualizers/points2d.rs b/crates/re_space_view_spatial/src/visualizers/points2d.rs index dcc43f4b3225..df69f7577ec0 100644 --- a/crates/re_space_view_spatial/src/visualizers/points2d.rs +++ b/crates/re_space_view_spatial/src/visualizers/points2d.rs @@ -249,6 +249,7 @@ impl VisualizerSystem for Points2DVisualizer { let num_points = super::entity_iterator::count_instances_in_archetype_views::< Points2DVisualizer, Points2D, + 9, >(ctx, query); if num_points == 0 { diff --git a/crates/re_space_view_spatial/src/visualizers/points3d.rs b/crates/re_space_view_spatial/src/visualizers/points3d.rs index 9fa6b137c43e..3dfdc7ec96a5 100644 --- a/crates/re_space_view_spatial/src/visualizers/points3d.rs +++ b/crates/re_space_view_spatial/src/visualizers/points3d.rs @@ -190,6 +190,7 @@ impl VisualizerSystem for Points3DVisualizer { let num_points = super::entity_iterator::count_instances_in_archetype_views::< Points3DVisualizer, Points3D, + 8, >(ctx, query); if num_points == 0 { From 4cef233a9cb5b52aad5e7bab71f2d31f0e0ef936 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Wed, 14 Feb 2024 16:28:53 +0100 Subject: [PATCH 6/8] cleanup data texture handling, improve docs, allow independent data texture sizes, mostly for clarity --- crates/re_renderer/shader/point_cloud.wgsl | 15 +- .../src/allocator/cpu_write_gpu_read_belt.rs | 25 ++- crates/re_renderer/src/point_cloud_builder.rs | 24 ++- crates/re_renderer/src/renderer/mod.rs | 97 ++++++++- .../re_renderer/src/renderer/point_cloud.rs | 191 ++++++------------ 5 files changed, 211 insertions(+), 141 deletions(-) diff --git a/crates/re_renderer/shader/point_cloud.wgsl b/crates/re_renderer/shader/point_cloud.wgsl index f6aebb5ec9a6..b512f822e40c 100644 --- a/crates/re_renderer/shader/point_cloud.wgsl +++ b/crates/re_renderer/shader/point_cloud.wgsl @@ -75,12 +75,17 @@ struct PointData { // Read and unpack data at a given location fn read_data(idx: u32) -> PointData { - let texture_size = textureDimensions(position_data_texture); - let coord = vec2u(idx % texture_size.x, idx / texture_size.x); + let position_data_texture_size = textureDimensions(position_data_texture); + let position_data = textureLoad(position_data_texture, + vec2u(idx % position_data_texture_size.x, idx / position_data_texture_size.x), 0); - let position_data = textureLoad(position_data_texture, coord, 0); - let color = textureLoad(color_texture, coord, 0); - let picking_instance_id = textureLoad(picking_instance_id_texture, coord, 0).rg; + let color_texture_size = textureDimensions(color_texture); + let color = textureLoad(color_texture, + vec2u(idx % color_texture_size.x, idx / color_texture_size.x), 0); + + let picking_instance_id_texture_size = textureDimensions(picking_instance_id_texture); + let picking_instance_id = textureLoad(picking_instance_id_texture, + vec2u(idx % picking_instance_id_texture_size.x, idx / picking_instance_id_texture_size.x), 0).xy; var data: PointData; let pos_4d = batch.world_from_obj * vec4f(position_data.xyz, 1.0); diff --git a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs index c27be67c1126..c457c277b42e 100644 --- a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs +++ b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs @@ -2,7 +2,7 @@ use std::sync::mpsc; use crate::{ texture_info::Texture2DBufferInfo, - wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool}, + wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, GpuTexture}, }; #[derive(thiserror::Error, Debug, PartialEq, Eq)] @@ -224,6 +224,29 @@ where self.unwritten_element_range.end } + /// Copies all all so far written data to the first layer of a 2d texture. + /// + /// Assumes that the buffer consists of as-tightly-packed-as-possible rows of data. + /// (taking into account required padding as specified by [`wgpu::COPY_BYTES_PER_ROW_ALIGNMENT`]) + /// + /// Fails if the buffer size is not sufficient to fill the entire texture. + pub fn copy_to_texture2d_entire_first_layer( + self, + encoder: &mut wgpu::CommandEncoder, + destination: &GpuTexture, + ) -> Result<(), CpuWriteGpuReadError> { + self.copy_to_texture2d( + encoder, + wgpu::ImageCopyTexture { + texture: &destination.texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + destination.texture.size(), + ) + } + /// Copies all so far written data to a rectangle on a single 2d texture layer. /// /// Assumes that the buffer consists of as-tightly-packed-as-possible rows of data. diff --git a/crates/re_renderer/src/point_cloud_builder.rs b/crates/re_renderer/src/point_cloud_builder.rs index 1eba385e9dba..87f4fd489d86 100644 --- a/crates/re_renderer/src/point_cloud_builder.rs +++ b/crates/re_renderer/src/point_cloud_builder.rs @@ -6,8 +6,8 @@ use crate::{ allocator::CpuWriteGpuReadBuffer, draw_phases::PickingLayerObjectId, renderer::{ - PointCloudBatchFlags, PointCloudBatchInfo, PointCloudDrawData, PointCloudDrawDataError, - PositionRadius, + data_texture_source_buffer_element_count, PointCloudBatchFlags, PointCloudBatchInfo, + PointCloudDrawData, PointCloudDrawDataError, PositionRadius, }, Color32, DebugLabel, DepthOffset, OutlineMaskPreference, PickingLayerInstanceId, RenderContext, Size, @@ -30,21 +30,33 @@ pub struct PointCloudBuilder { impl PointCloudBuilder { pub fn new(ctx: &RenderContext, max_num_points: u32) -> Self { - let num_buffer_elements = - PointCloudDrawData::padded_buffer_element_count(ctx, max_num_points); + let max_texture_dimension_2d = ctx.device.limits().max_texture_dimension_2d; let color_buffer = ctx .cpu_write_gpu_read_belt .lock() - .allocate::(&ctx.device, &ctx.gpu_resources.buffers, num_buffer_elements) + .allocate::( + &ctx.device, + &ctx.gpu_resources.buffers, + data_texture_source_buffer_element_count( + PointCloudDrawData::COLOR_TEXTURE_FORMAT, + max_num_points, + max_texture_dimension_2d, + ), + ) .expect("Failed to allocate color buffer"); // TODO(#3408): Should never happen but should propagate error anyways + let picking_instance_ids_buffer = ctx .cpu_write_gpu_read_belt .lock() .allocate::( &ctx.device, &ctx.gpu_resources.buffers, - num_buffer_elements, + data_texture_source_buffer_element_count( + PointCloudDrawData::PICKING_INSTANCE_ID_TEXTURE_FORMAT, + max_num_points, + max_texture_dimension_2d, + ), ) .expect("Failed to allocate picking layer buffer"); // TODO(#3408): Should never happen but should propagate error anyways diff --git a/crates/re_renderer/src/renderer/mod.rs b/crates/re_renderer/src/renderer/mod.rs index 76003439db40..5fd6b3c17309 100644 --- a/crates/re_renderer/src/renderer/mod.rs +++ b/crates/re_renderer/src/renderer/mod.rs @@ -39,7 +39,8 @@ use crate::{ context::RenderContext, draw_phases::DrawPhase, include_shader_module, - wgpu_resources::{GpuRenderPipelinePoolAccessor, PoolError}, + wgpu_resources::{self, GpuRenderPipelinePoolAccessor, PoolError}, + DebugLabel, }; /// GPU sided data used by a [`Renderer`] to draw things to the screen. @@ -89,3 +90,97 @@ pub fn screen_triangle_vertex_shader( &include_shader_module!("../../shader/screen_triangle.wgsl"), ) } + +/// Texture size for storing a given amount of data. +/// +/// For WebGL compatibility we sometimes have to use textures instead of buffers. +/// We call these textures "data textures". +/// This method determines the size of a data texture holding a given number of used texels. +/// Each texel is typically a single data entry (think `struct`). +/// +/// `max_texture_dimension_2d` must be a power of two and is the maximum supported size of 2D textures. +/// +/// For convenience, the returned texture size has a width such that its +/// row size in bytes is a multiple of `wgpu::COPY_BYTES_PER_ROW_ALIGNMENT`. +/// This makes it a lot easier to copy data from a continuous buffer to the texture. +/// If we wouldn't do that, we'd need to do a copy for each row in some cases. +pub fn data_texture_size( + format: wgpu::TextureFormat, + num_texels_written: u32, + max_texture_dimension_2d: u32, +) -> wgpu::Extent3d { + debug_assert!(max_texture_dimension_2d.is_power_of_two()); + debug_assert!(!format.has_depth_aspect()); + debug_assert!(!format.has_stencil_aspect()); + debug_assert!(!format.is_compressed()); + + let texel_size_in_bytes = format + .block_copy_size(None) + .expect("Depth/stencil formats are not supported as data textures"); + + // Our data textures are usually accessed in a linear fashion, so ideally we'd be using a 1D texture. + // However, 1D textures are very limited in size on many platforms, we we have to use 2D textures instead. + // 2D textures perform a lot better when their dimensions are powers of two, so we'll strictly stick to that even + // when it seems to cause memory overhead. + + // We fill row by row. With the power-of-two requirement, this is the optimal strategy: + // if there were a texture with less padding that uses half the width, + // then we'd need to increase the height. We can't increase without doubling it, thus creating a texture + // with the exact same mount of padding as before. + + let width = if num_texels_written < max_texture_dimension_2d { + num_texels_written + .next_power_of_two() + // For too few number of written texels, or too small texels we might need to increase the size to stay + // above a row **byte** size of `wgpu::COPY_BYTES_PER_ROW_ALIGNMENT`. + // Note that this implies that for very large texels, we need less wide textures to stay above this limit. + // (width is in number of texels, but alignment cares about bytes!) + .next_multiple_of(wgpu::COPY_BYTES_PER_ROW_ALIGNMENT / texel_size_in_bytes) + } else { + max_texture_dimension_2d + }; + + let height = num_texels_written.div_ceil(width); + + wgpu::Extent3d { + width, + height, + depth_or_array_layers: 1, + } +} + +/// Texture descriptor for data storage. +/// +/// See [`data_texture_size`] +pub fn data_texture_desc( + label: impl Into, + format: wgpu::TextureFormat, + num_texels_written: u32, + max_texture_dimension_2d: u32, +) -> wgpu_resources::TextureDesc { + wgpu_resources::TextureDesc { + label: label.into(), + size: data_texture_size(format, num_texels_written, max_texture_dimension_2d), + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format, + usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, + } +} + +/// Pendent to [`data_texture_size`] for determining the element size (==texels on data texture) +/// need to be in a buffer that fills an entire data texture. +pub fn data_texture_source_buffer_element_count( + texture_format: wgpu::TextureFormat, + num_texels_written: u32, + max_texture_dimension_2d: u32, +) -> usize { + let data_texture_size = + data_texture_size(texture_format, num_texels_written, max_texture_dimension_2d); + let element_count = data_texture_size.width as usize * data_texture_size.height as usize; + + debug_assert!(element_count >= num_texels_written as usize); + + element_count +} diff --git a/crates/re_renderer/src/renderer/point_cloud.rs b/crates/re_renderer/src/renderer/point_cloud.rs index 7021f06c2d80..32309abcc750 100644 --- a/crates/re_renderer/src/renderer/point_cloud.rs +++ b/crates/re_renderer/src/renderer/point_cloud.rs @@ -19,6 +19,7 @@ use crate::{ allocator::create_and_fill_uniform_buffer_batch, draw_phases::{DrawPhase, OutlineMaskProcessor, PickingLayerObjectId, PickingLayerProcessor}, include_shader_module, + renderer::data_texture_desc, wgpu_resources::GpuRenderPipelinePoolAccessor, DebugLabel, DepthOffset, OutlineMaskPreference, PointCloudBuilder, }; @@ -32,7 +33,7 @@ use crate::{ view_builder::ViewBuilder, wgpu_resources::{ BindGroupDesc, BindGroupEntry, BindGroupLayoutDesc, GpuBindGroup, GpuBindGroupLayoutHandle, - GpuRenderPipelineHandle, PipelineLayoutDesc, RenderPipelineDesc, TextureDesc, + GpuRenderPipelineHandle, PipelineLayoutDesc, RenderPipelineDesc, }, }; @@ -161,6 +162,11 @@ pub enum PointCloudDrawDataError { } impl PointCloudDrawData { + pub const COLOR_TEXTURE_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8UnormSrgb; + pub const POSITION_DATA_TEXTURE_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba32Float; + pub const PICKING_INSTANCE_ID_TEXTURE_FORMAT: wgpu::TextureFormat = + wgpu::TextureFormat::Rg32Uint; + /// Transforms and uploads point cloud data to be consumed by gpu. /// /// Try to bundle all points into a single draw data instance whenever possible. @@ -221,50 +227,46 @@ impl PointCloudDrawData { vertices }; - let data_texture_size = - Self::point_cloud_data_texture_size(vertices.len() as u32, max_texture_dimension_2d); - - let position_data_texture_desc = TextureDesc { - label: "PointCloudDrawData::position_data_texture".into(), - size: data_texture_size, - mip_level_count: 1, - sample_count: 1, - dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rgba32Float, - usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, - }; - - let position_data_texture = ctx - .gpu_resources - .textures - .alloc(&ctx.device, &position_data_texture_desc); + let position_data_texture = ctx.gpu_resources.textures.alloc( + &ctx.device, + &data_texture_desc( + "PointCloudDrawData::position_data_texture", + Self::POSITION_DATA_TEXTURE_FORMAT, + vertices.len() as u32, + max_texture_dimension_2d, + ), + ); let color_texture = ctx.gpu_resources.textures.alloc( &ctx.device, - &TextureDesc { - label: "PointCloudDrawData::color_texture".into(), - format: wgpu::TextureFormat::Rgba8UnormSrgb, // Declaring this as srgb here saves us manual conversion in the shader! - ..position_data_texture_desc - }, + &data_texture_desc( + "PointCloudDrawData::color_texture", + Self::COLOR_TEXTURE_FORMAT, + vertices.len() as u32, + max_texture_dimension_2d, + ), ); + let picking_instance_id_texture = ctx.gpu_resources.textures.alloc( &ctx.device, - &TextureDesc { - label: "PointCloudDrawData::picking_instance_id_texture".into(), - format: wgpu::TextureFormat::Rg32Uint, - ..position_data_texture_desc - }, + &data_texture_desc( + "PointCloudDrawData::picking_instance_id_texture", + Self::PICKING_INSTANCE_ID_TEXTURE_FORMAT, + vertices.len() as u32, + max_texture_dimension_2d, + ), ); - let data_texture_element_count = - data_texture_size.width as usize * data_texture_size.height as usize; - let num_elements_padding = data_texture_element_count - vertices.len(); { re_tracing::profile_scope!("write_pos_size_texture"); + let textures_size = position_data_texture.texture.size(); + let texel_count = (textures_size.width * textures_size.height) as usize; + let num_elements_padding = texel_count - vertices.len(); + let mut staging_buffer = ctx.cpu_write_gpu_read_belt.lock().allocate( &ctx.device, &ctx.gpu_resources.buffers, - data_texture_element_count, + texel_count, )?; staging_buffer.extend_from_slice(vertices)?; staging_buffer.fill_n(gpu_data::PositionRadius::zeroed(), num_elements_padding)?; @@ -276,37 +278,38 @@ impl PointCloudDrawData { origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All, }, - data_texture_size, + position_data_texture.texture.size(), + )?; + } + { + let textures_size = color_texture.texture.size(); + let texel_count = (textures_size.width * textures_size.height) as usize; + let num_elements_padding = texel_count - vertices.len(); + + builder + .color_buffer + .fill_n(ecolor::Color32::TRANSPARENT, num_elements_padding)?; + builder.color_buffer.copy_to_texture2d_entire_first_layer( + ctx.active_frame.before_view_builder_encoder.lock().get(), + &color_texture, )?; } - builder - .color_buffer - .fill_n(ecolor::Color32::TRANSPARENT, num_elements_padding)?; - builder.color_buffer.copy_to_texture2d( - ctx.active_frame.before_view_builder_encoder.lock().get(), - wgpu::ImageCopyTexture { - texture: &color_texture.texture, - mip_level: 0, - origin: wgpu::Origin3d::ZERO, - aspect: wgpu::TextureAspect::All, - }, - data_texture_size, - )?; - - builder - .picking_instance_ids_buffer - .fill_n(Default::default(), num_elements_padding)?; - builder.picking_instance_ids_buffer.copy_to_texture2d( - ctx.active_frame.before_view_builder_encoder.lock().get(), - wgpu::ImageCopyTexture { - texture: &picking_instance_id_texture.texture, - mip_level: 0, - origin: wgpu::Origin3d::ZERO, - aspect: wgpu::TextureAspect::All, - }, - data_texture_size, - )?; + { + let textures_size = picking_instance_id_texture.texture.size(); + let texel_count = (textures_size.width * textures_size.height) as usize; + let num_elements_padding = texel_count - vertices.len(); + + builder + .picking_instance_ids_buffer + .fill_n(Default::default(), num_elements_padding)?; + builder + .picking_instance_ids_buffer + .copy_to_texture2d_entire_first_layer( + ctx.active_frame.before_view_builder_encoder.lock().get(), + &picking_instance_id_texture, + )?; + } let draw_data_uniform_buffer_bindings = create_and_fill_uniform_buffer_batch( ctx, @@ -456,30 +459,6 @@ impl PointCloudDrawData { batches: batches_internal, }) } - - /// Returns size in number of elements for buffers that store point cloud data. - /// - /// This padding is required since we can only copy full rows of a texture. - pub(crate) fn padded_buffer_element_count(ctx: &RenderContext, max_num_points: u32) -> usize { - let max_texture_dimension_2d = ctx.device.limits().max_texture_dimension_2d; - let data_texture_size = - Self::point_cloud_data_texture_size(max_num_points, max_texture_dimension_2d); - data_texture_size.width as usize * data_texture_size.height as usize - } - - /// Size used for all data textures for a given number of points. - fn point_cloud_data_texture_size( - num_elements: u32, - max_texture_dimension_2d: u32, - ) -> wgpu::Extent3d { - // Not all data textures have an element size of 4. - // For instance, the picking texture has an element size of 16. - // - // However, we'd like to use the same coordinates on all textures, so we're using the same size for all textures. - // By specifying the smallest element size, we might over-pad the textures with bigger elements. - let element_size = 4; - data_texture_size(num_elements, element_size, max_texture_dimension_2d) - } } pub struct PointCloudRenderer { @@ -725,47 +704,3 @@ impl Renderer for PointCloudRenderer { Ok(()) } } - -/// Determines the size for a "data texture", i.e. a texture that we use to store data -/// that we'd otherwise store inside a buffer, but can't due to WebGL compatibility. -/// -/// `max_texture_size` must be a power of two. -/// -/// For convenience, the returned texture size has a width that is also a multiple of `wgpu::COPY_BYTES_PER_ROW_ALIGNMENT` -/// for the given element size. -/// This makes it a lot easier to copy data from a continuous buffer to the texture. -/// If we wouldn't do that, we'd need to do a copy per row in some cases. -fn data_texture_size( - num_elements: u32, - element_size_in_bytes: u32, - max_texture_size: u32, -) -> wgpu::Extent3d { - assert!(max_texture_size.is_power_of_two()); - - // Our data textures are usually accessed in a linear fashion, so ideally we'd be using a 1D texture. - // However, 1D textures are very limited in size on many platforms, we we have to use 2D textures instead. - // 2D textures perform a lot better when their dimensions are powers of two, so we'll strictly stick to that even - // when it seems to cause memory overhead. - - // We fill row by row. With the power-of-two requirement, this is the optimal strategy: - // if there were a texture with less padding that uses half the width, - // then we'd need to increase the height. We can't increase without doubling it, thus creating a texture - // with the exact same mount of padding as before ▮. - - let width = if num_elements < max_texture_size { - num_elements - .next_power_of_two() - // Keeping at a multiple of the alignment requirement makes copy operations a lot simpler. - .next_multiple_of(wgpu::COPY_BYTES_PER_ROW_ALIGNMENT / element_size_in_bytes) - } else { - max_texture_size - }; - - let height = num_elements.div_ceil(width); - - wgpu::Extent3d { - width, - height, - depth_or_array_layers: 1, - } -} From 94120642b2e954321f80e452cea47618a095f699 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Wed, 14 Feb 2024 17:10:01 +0100 Subject: [PATCH 7/8] fix plural typo --- crates/re_renderer/src/renderer/point_cloud.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/re_renderer/src/renderer/point_cloud.rs b/crates/re_renderer/src/renderer/point_cloud.rs index 32309abcc750..623b9ff3e11d 100644 --- a/crates/re_renderer/src/renderer/point_cloud.rs +++ b/crates/re_renderer/src/renderer/point_cloud.rs @@ -259,8 +259,8 @@ impl PointCloudDrawData { { re_tracing::profile_scope!("write_pos_size_texture"); - let textures_size = position_data_texture.texture.size(); - let texel_count = (textures_size.width * textures_size.height) as usize; + let texture_size = position_data_texture.texture.size(); + let texel_count = (texture_size.width * texture_size.height) as usize; let num_elements_padding = texel_count - vertices.len(); let mut staging_buffer = ctx.cpu_write_gpu_read_belt.lock().allocate( @@ -282,8 +282,8 @@ impl PointCloudDrawData { )?; } { - let textures_size = color_texture.texture.size(); - let texel_count = (textures_size.width * textures_size.height) as usize; + let texture_size = color_texture.texture.size(); + let texel_count = (texture_size.width * texture_size.height) as usize; let num_elements_padding = texel_count - vertices.len(); builder @@ -296,8 +296,8 @@ impl PointCloudDrawData { } { - let textures_size = picking_instance_id_texture.texture.size(); - let texel_count = (textures_size.width * textures_size.height) as usize; + let texture_size = picking_instance_id_texture.texture.size(); + let texel_count = (texture_size.width * texture_size.height) as usize; let num_elements_padding = texel_count - vertices.len(); builder From d390ba812bf94dec252899b499ec68cfe2708268 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Wed, 14 Feb 2024 17:43:16 +0100 Subject: [PATCH 8/8] lints --- crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs | 2 +- crates/re_renderer/src/renderer/mod.rs | 2 +- crates/re_space_view_spatial/src/visualizers/entity_iterator.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs index c457c277b42e..619020a8eb66 100644 --- a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs +++ b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs @@ -224,7 +224,7 @@ where self.unwritten_element_range.end } - /// Copies all all so far written data to the first layer of a 2d texture. + /// Copies all so far written data to the first layer of a 2d texture. /// /// Assumes that the buffer consists of as-tightly-packed-as-possible rows of data. /// (taking into account required padding as specified by [`wgpu::COPY_BYTES_PER_ROW_ALIGNMENT`]) diff --git a/crates/re_renderer/src/renderer/mod.rs b/crates/re_renderer/src/renderer/mod.rs index 5fd6b3c17309..721df798a9e5 100644 --- a/crates/re_renderer/src/renderer/mod.rs +++ b/crates/re_renderer/src/renderer/mod.rs @@ -119,7 +119,7 @@ pub fn data_texture_size( .expect("Depth/stencil formats are not supported as data textures"); // Our data textures are usually accessed in a linear fashion, so ideally we'd be using a 1D texture. - // However, 1D textures are very limited in size on many platforms, we we have to use 2D textures instead. + // However, 1D textures are very limited in size on many platforms, we have to use 2D textures instead. // 2D textures perform a lot better when their dimensions are powers of two, so we'll strictly stick to that even // when it seems to cause memory overhead. diff --git a/crates/re_space_view_spatial/src/visualizers/entity_iterator.rs b/crates/re_space_view_spatial/src/visualizers/entity_iterator.rs index 1159d03efafc..0847361bfe4e 100644 --- a/crates/re_space_view_spatial/src/visualizers/entity_iterator.rs +++ b/crates/re_space_view_spatial/src/visualizers/entity_iterator.rs @@ -252,7 +252,7 @@ pub fn count_instances_in_archetype_views< ) -> usize { assert_eq!(A::all_components().len(), N); - // TODO(andreas/cmc): Use cached code path for this. + // TODO(andreas): Use cached code path for this. // This is right now a bit harder to do and requires knowing all queried components. // The only thing we really want to pass here are the POV components.