diff --git a/crates/re_data_ui/src/image.rs b/crates/re_data_ui/src/image.rs index dbffece6e09d..4dd25e351a89 100644 --- a/crates/re_data_ui/src/image.rs +++ b/crates/re_data_ui/src/image.rs @@ -138,10 +138,18 @@ fn tensor_ui( ); } + let shape = match tensor.image_height_width_channels() { + Some([h, w, c]) => vec![ + TensorDimension::height(h), + TensorDimension::width(w), + TensorDimension::depth(c), + ], + None => tensor.shape.clone(), + }; ui.label(format!( "{} x {}{}", tensor.dtype(), - format_tensor_shape_single_line(tensor.shape()), + format_tensor_shape_single_line(shape.as_slice()), if original_tensor.buffer.is_compressed_image() { " (compressed)" } else { @@ -216,6 +224,9 @@ fn tensor_ui( } if let Some([_h, _w, channels]) = tensor.image_height_width_channels() { + if let TensorBuffer::Nv12(_) = &tensor.buffer { + return; + } if channels == 3 { if let TensorBuffer::U8(data) = &tensor.buffer { ui.collapsing("Histogram", |ui| { @@ -231,7 +242,7 @@ fn tensor_ui( } fn texture_size(colormapped_texture: &ColormappedTexture) -> Vec2 { - let [w, h] = colormapped_texture.texture.width_height(); + let [w, h] = colormapped_texture.width_height(); egui::vec2(w as f32, h as f32) } @@ -360,6 +371,11 @@ pub fn tensor_summary_ui_grid_contents( )); ui.end_row(); } + TensorBuffer::Nv12(_) => { + re_ui.grid_left_hand_label(ui, "Encoding"); + ui.label("NV12"); + ui.end_row(); + } } let TensorStats { @@ -379,8 +395,9 @@ pub fn tensor_summary_ui_grid_contents( } // Show finite range only if it is different from the actual range. if let (true, Some((min, max))) = (range != finite_range, finite_range) { - ui.label("Finite data range") - .on_hover_text("The finite values (ignoring all NaN & -Inf/+Inf) of the tensor range within these bounds"); + ui.label("Finite data range").on_hover_text( + "The finite values (ignoring all NaN & -Inf/+Inf) of the tensor range within these bounds" + ); ui.monospace(format!( "[{} - {}]", re_format::format_f64(*min), @@ -439,8 +456,8 @@ fn show_zoomed_image_region_tooltip( use egui::remap_clamp; let center_texel = [ - (remap_clamp(pointer_pos.x, image_rect.x_range(), 0.0..=w as f32) as isize), - (remap_clamp(pointer_pos.y, image_rect.y_range(), 0.0..=h as f32) as isize), + remap_clamp(pointer_pos.x, image_rect.x_range(), 0.0..=w as f32) as isize, + remap_clamp(pointer_pos.y, image_rect.y_range(), 0.0..=h as f32) as isize, ]; show_zoomed_image_region_area_outline( parent_ui.ctx(), @@ -562,7 +579,7 @@ fn try_show_zoomed_image_region( )?; const POINTS_PER_TEXEL: f32 = 5.0; - let size = Vec2::splat((ZOOMED_IMAGE_TEXEL_RADIUS * 2 + 1) as f32 * POINTS_PER_TEXEL); + let size = Vec2::splat(((ZOOMED_IMAGE_TEXEL_RADIUS * 2 + 1) as f32) * POINTS_PER_TEXEL); let (_id, zoom_rect) = ui.allocate_space(size); let painter = ui.painter(); @@ -574,7 +591,10 @@ fn try_show_zoomed_image_region( let image_rect_on_screen = egui::Rect::from_min_size( zoom_rect.center() - POINTS_PER_TEXEL - * egui::vec2(center_texel[0] as f32 + 0.5, center_texel[1] as f32 + 0.5), + * egui::vec2( + (center_texel[0] as f32) + 0.5, + (center_texel[1] as f32) + 0.5, + ), POINTS_PER_TEXEL * egui::vec2(width as f32, height as f32), ); @@ -610,7 +630,11 @@ fn try_show_zoomed_image_region( let zoom = rect.width(); let image_rect_on_screen = egui::Rect::from_min_size( rect.center() - - zoom * egui::vec2(center_texel[0] as f32 + 0.5, center_texel[1] as f32 + 0.5), + - zoom + * egui::vec2( + (center_texel[0] as f32) + 0.5, + (center_texel[1] as f32) + 0.5, + ), zoom * egui::vec2(width as f32, height as f32), ); gpu_bridge::render_image( @@ -661,7 +685,7 @@ fn tensor_pixel_value_ui( // This is a depth map if let Some(raw_value) = tensor.get(&[y, x]) { let raw_value = raw_value.as_f64(); - let meters = raw_value / meter as f64; + let meters = raw_value / (meter as f64); ui.label("Depth:"); if meters < 1.0 { ui.monospace(format!("{:.1} mm", meters * 1e3)); @@ -679,11 +703,20 @@ fn tensor_pixel_value_ui( .map(|v| format!("Val: {v}")), 3 => { // TODO(jleibs): Track RGB ordering somehow -- don't just assume it - if let (Some(r), Some(g), Some(b)) = ( - tensor.get_with_image_coords(x, y, 0), - tensor.get_with_image_coords(x, y, 1), - tensor.get_with_image_coords(x, y, 2), - ) { + if let Some([r, g, b]) = match &tensor.buffer { + TensorBuffer::Nv12(_) => tensor.get_nv12_pixel(x, y), + _ => { + if let [Some(r), Some(g), Some(b)] = [ + tensor.get_with_image_coords(x, y, 0), + tensor.get_with_image_coords(x, y, 1), + tensor.get_with_image_coords(x, y, 2), + ] { + Some([r, g, b]) + } else { + None + } + } + } { match (r, g, b) { (TensorElement::U8(r), TensorElement::U8(g), TensorElement::U8(b)) => { Some(format!("R: {r}, G: {g}, B: {b}, #{r:02X}{g:02X}{b:02X}")) diff --git a/crates/re_renderer/shader/decodings.wgsl b/crates/re_renderer/shader/decodings.wgsl new file mode 100644 index 000000000000..ef35725b1f29 --- /dev/null +++ b/crates/re_renderer/shader/decodings.wgsl @@ -0,0 +1,25 @@ +#import <./types.wgsl> + + +/// Loads an RGBA texel from a texture holding an NV12 encoded image at the given screen space coordinates. +fn decode_nv12(texture: texture_2d, coords: IVec2) -> Vec4 { + let texture_dim = Vec2(textureDimensions(texture).xy); + let uv_offset = u32(floor(texture_dim.y / 1.5)); + let uv_row = u32(coords.y / 2); + var uv_col = u32(coords.x / 2) * 2u; + + let y = max(0.0, (f32(textureLoad(texture, UVec2(coords), 0).r) - 16.0)) / 219.0; + let u = (f32(textureLoad(texture, UVec2(u32(uv_col), uv_offset + uv_row), 0).r) - 128.0) / 224.0; + let v = (f32(textureLoad(texture, UVec2((u32(uv_col) + 1u), uv_offset + uv_row), 0).r) - 128.0) / 224.0; + + // Specifying the color standard should be exposed in the future (https://github.com/rerun-io/rerun/pull/3541) + // BT.601 (aka. SDTV, aka. Rec.601). wiki: https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion + let r = clamp(y + 1.402 * v, 0.0, 1.0); + let g = clamp(y - (0.344 * u + 0.714 * v), 0.0, 1.0); + let b = clamp(y + 1.772 * u, 0.0, 1.0); + // BT.709 (aka. HDTV, aka. Rec.709). wiki: https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.709_conversion + // let r = clamp(y + 1.5748 * v, 0.0, 1.0); + // let g = clamp(y + u * -0.1873 + v * -0.4681, 0.0, 1.0); + // let b = clamp(y + u * 1.8556, 0.0 , 1.0); + return Vec4(r, g, b, 1.0); +} diff --git a/crates/re_renderer/shader/rectangle.wgsl b/crates/re_renderer/shader/rectangle.wgsl index da481a48e338..0f546f9a6045 100644 --- a/crates/re_renderer/shader/rectangle.wgsl +++ b/crates/re_renderer/shader/rectangle.wgsl @@ -6,6 +6,7 @@ const SAMPLE_TYPE_FLOAT = 1u; const SAMPLE_TYPE_SINT = 2u; const SAMPLE_TYPE_UINT = 3u; +const SAMPLE_TYPE_NV12 = 4u; // How do we do colormapping? const COLOR_MAPPER_OFF = 1u; diff --git a/crates/re_renderer/shader/rectangle_fs.wgsl b/crates/re_renderer/shader/rectangle_fs.wgsl index 28a2801d39d5..c3cdf538d504 100644 --- a/crates/re_renderer/shader/rectangle_fs.wgsl +++ b/crates/re_renderer/shader/rectangle_fs.wgsl @@ -1,6 +1,7 @@ #import <./colormap.wgsl> #import <./rectangle.wgsl> #import <./utils/srgb.wgsl> +#import <./decodings.wgsl> fn is_magnifying(pixel_coord: Vec2) -> bool { return fwidth(pixel_coord.x) < 1.0; @@ -101,7 +102,24 @@ fn fs_main(in: VertexOut) -> @location(0) Vec4 { let v11 = decode_color(Vec4(textureLoad(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2( 0.5, 0.5), texture_dimensions), 0))); normalized_value = filter_bilinear(coord, v00, v01, v10, v11); } - } else { + } else if rect_info.sample_type == SAMPLE_TYPE_NV12 { + let texture_dimensions = Vec2(textureDimensions(texture_uint).xy); + let coord = in.texcoord * texture_dimensions; + if tex_filter(coord) == FILTER_NEAREST { + // nearest + normalized_value = decode_color(Vec4(decode_nv12(texture_uint, + clamp_to_edge_nearest_neighbor(coord, texture_dimensions)))); + } else { + // bilinear + let v00 = decode_color(Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2(-0.5, -0.5), texture_dimensions)))); + let v01 = decode_color(Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2(-0.5, 0.5), texture_dimensions)))); + let v10 = decode_color(Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2( 0.5, -0.5), texture_dimensions)))); + let v11 = decode_color(Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2( 0.5, 0.5), texture_dimensions)))); + normalized_value = filter_bilinear(coord, v00, v01, v10, v11); + } + } + + else { return ERROR_RGBA; // unknown sample type } diff --git a/crates/re_renderer/shader/rectangle_vs.wgsl b/crates/re_renderer/shader/rectangle_vs.wgsl index dddbed08cce2..c0222b35dc6f 100644 --- a/crates/re_renderer/shader/rectangle_vs.wgsl +++ b/crates/re_renderer/shader/rectangle_vs.wgsl @@ -10,6 +10,9 @@ fn vs_main(@builtin(vertex_index) v_idx: u32) -> VertexOut { var out: VertexOut; out.position = apply_depth_offset(frame.projection_from_world * Vec4(pos, 1.0), rect_info.depth_offset); out.texcoord = texcoord; + if rect_info.sample_type == SAMPLE_TYPE_NV12 { + out.texcoord.y /= 1.5; + } return out; } diff --git a/crates/re_renderer/src/renderer/mod.rs b/crates/re_renderer/src/renderer/mod.rs index cec92bb2a568..d86a59d90ffe 100644 --- a/crates/re_renderer/src/renderer/mod.rs +++ b/crates/re_renderer/src/renderer/mod.rs @@ -21,8 +21,8 @@ pub use test_triangle::TestTriangleDrawData; mod rectangles; pub use rectangles::{ - ColorMapper, ColormappedTexture, RectangleDrawData, RectangleOptions, TextureFilterMag, - TextureFilterMin, TexturedRect, + ColorMapper, ColormappedTexture, RectangleDrawData, RectangleOptions, ShaderDecoding, + TextureFilterMag, TextureFilterMin, TexturedRect, }; mod mesh_renderer; diff --git a/crates/re_renderer/src/renderer/rectangles.rs b/crates/re_renderer/src/renderer/rectangles.rs index f3241ad7d239..3fea5f64ae92 100644 --- a/crates/re_renderer/src/renderer/rectangles.rs +++ b/crates/re_renderer/src/renderer/rectangles.rs @@ -48,6 +48,12 @@ pub enum TextureFilterMin { // TODO(andreas): Offer mipmapping here? } +/// Describes how the color information is encoded in the texture. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum ShaderDecoding { + Nv12, +} + /// Describes a texture and how to map it to a color. #[derive(Clone)] pub struct ColormappedTexture { @@ -83,6 +89,9 @@ pub struct ColormappedTexture { /// Setting a color mapper for a four-component texture is an error. /// Failure to set a color mapper for a one-component texture is an error. pub color_mapper: Option, + + /// For textures that need decoding in the shader, for example NV12 encoded images. + pub shader_decoding: Option, } /// How to map the normalized `.r` component to a color. @@ -113,6 +122,17 @@ impl ColormappedTexture { gamma: 1.0, multiply_rgb_with_alpha: true, color_mapper: None, + shader_decoding: None, + } + } + + pub fn width_height(&self) -> [u32; 2] { + match self.shader_decoding { + Some(ShaderDecoding::Nv12) => { + let [width, height] = self.texture.width_height(); + [width, height * 2 / 3] + } + _ => self.texture.width_height(), } } } @@ -198,6 +218,7 @@ mod gpu_data { const SAMPLE_TYPE_FLOAT: u32 = 1; const SAMPLE_TYPE_SINT: u32 = 2; const SAMPLE_TYPE_UINT: u32 = 3; + const SAMPLE_TYPE_NV12: u32 = 4; // How do we do colormapping? const COLOR_MAPPER_OFF: u32 = 1; @@ -261,6 +282,7 @@ mod gpu_data { gamma, color_mapper, multiply_rgb_with_alpha, + shader_decoding, } = colormapped_texture; let super::RectangleOptions { @@ -274,7 +296,13 @@ mod gpu_data { let sample_type = match texture_format.sample_type(None) { Some(wgpu::TextureSampleType::Float { .. }) => SAMPLE_TYPE_FLOAT, Some(wgpu::TextureSampleType::Sint) => SAMPLE_TYPE_SINT, - Some(wgpu::TextureSampleType::Uint) => SAMPLE_TYPE_UINT, + Some(wgpu::TextureSampleType::Uint) => { + if shader_decoding == &Some(super::ShaderDecoding::Nv12) { + SAMPLE_TYPE_NV12 + } else { + SAMPLE_TYPE_UINT + } + } _ => { return Err(RectangleError::TextureFormatNotSupported(texture_format)); } @@ -292,9 +320,10 @@ mod gpu_data { Some(ColorMapper::Texture(_)) => { color_mapper_int = COLOR_MAPPER_TEXTURE; } - None => { - return Err(RectangleError::MissingColorMapper); - } + None => match shader_decoding { + Some(super::ShaderDecoding::Nv12) => color_mapper_int = COLOR_MAPPER_OFF, + _ => return Err(RectangleError::MissingColorMapper), + }, }, 4 => { if color_mapper.is_some() { @@ -304,7 +333,7 @@ mod gpu_data { } } num_components => { - return Err(RectangleError::UnsupportedComponentCount(num_components)) + return Err(RectangleError::UnsupportedComponentCount(num_components)); } } @@ -442,7 +471,7 @@ impl RectangleDrawData { BindGroupEntry::DefaultTextureView(texture_float), BindGroupEntry::DefaultTextureView(texture_sint), BindGroupEntry::DefaultTextureView(texture_uint), - BindGroupEntry::DefaultTextureView(colormap_texture), + BindGroupEntry::DefaultTextureView(colormap_texture) ], layout: rectangle_renderer.bind_group_layout, }, @@ -475,7 +504,7 @@ impl Renderer for RectangleRenderer { let bind_group_layout = pools.bind_group_layouts.get_or_create( device, - &BindGroupLayoutDesc { + &(BindGroupLayoutDesc { label: "RectangleRenderer::bind_group_layout".into(), entries: vec![ wgpu::BindGroupLayoutEntry { @@ -538,15 +567,15 @@ impl Renderer for RectangleRenderer { count: None, }, ], - }, + }), ); let pipeline_layout = pools.pipeline_layouts.get_or_create( device, - &PipelineLayoutDesc { + &(PipelineLayoutDesc { label: "RectangleRenderer::pipeline_layout".into(), entries: vec![shared_data.global_bindings.layout, bind_group_layout], - }, + }), &pools.bind_group_layouts, ); @@ -591,20 +620,20 @@ impl Renderer for RectangleRenderer { ); let render_pipeline_picking_layer = pools.render_pipelines.get_or_create( device, - &RenderPipelineDesc { + &(RenderPipelineDesc { label: "RectangleRenderer::render_pipeline_picking_layer".into(), fragment_entrypoint: "fs_main_picking_layer".into(), render_targets: smallvec![Some(PickingLayerProcessor::PICKING_LAYER_FORMAT.into())], depth_stencil: PickingLayerProcessor::PICKING_LAYER_DEPTH_STATE, multisample: PickingLayerProcessor::PICKING_LAYER_MSAA_STATE, ..render_pipeline_desc_color.clone() - }, + }), &pools.pipeline_layouts, &pools.shader_modules, ); let render_pipeline_outline_mask = pools.render_pipelines.get_or_create( device, - &RenderPipelineDesc { + &(RenderPipelineDesc { label: "RectangleRenderer::render_pipeline_outline_mask".into(), fragment_entrypoint: "fs_main_outline_mask".into(), render_targets: smallvec![Some(OutlineMaskProcessor::MASK_FORMAT.into())], @@ -613,7 +642,7 @@ impl Renderer for RectangleRenderer { &shared_data.config.device_caps, ), ..render_pipeline_desc_color - }, + }), &pools.pipeline_layouts, &pools.shader_modules, ); diff --git a/crates/re_renderer/src/workspace_shaders.rs b/crates/re_renderer/src/workspace_shaders.rs index 7d999da73645..845b90623493 100644 --- a/crates/re_renderer/src/workspace_shaders.rs +++ b/crates/re_renderer/src/workspace_shaders.rs @@ -37,6 +37,12 @@ pub fn init() { fs.create_file(virtpath, content).unwrap(); } + { + let virtpath = Path::new("shader/decodings.wgsl"); + let content = include_str!("../shader/decodings.wgsl").into(); + fs.create_file(virtpath, content).unwrap(); + } + { let virtpath = Path::new("shader/depth_cloud.wgsl"); let content = include_str!("../shader/depth_cloud.wgsl").into(); diff --git a/crates/re_space_view_bar_chart/src/space_view_class.rs b/crates/re_space_view_bar_chart/src/space_view_class.rs index c99c4ecaa3a3..e3285e850b95 100644 --- a/crates/re_space_view_bar_chart/src/space_view_class.rs +++ b/crates/re_space_view_bar_chart/src/space_view_class.rs @@ -163,6 +163,13 @@ impl SpaceViewClass for BarChartSpaceView { ); continue; } + TensorBuffer::Nv12(_) => { + re_log::warn_once!( + "trying to display NV12 data as a bar chart ({:?})", + ent_path + ); + continue; + } }; plot_ui.bar_chart(chart); diff --git a/crates/re_space_view_spatial/src/picking.rs b/crates/re_space_view_spatial/src/picking.rs index ec84f04ce58a..74c752addc50 100644 --- a/crates/re_space_view_spatial/src/picking.rs +++ b/crates/re_space_view_spatial/src/picking.rs @@ -264,15 +264,13 @@ fn picking_textured_rects(context: &PickingContext, images: &[ViewerImage]) -> V let v = dir_from_rect_top_left.dot(rect.extent_v) / rect.extent_v.length_squared(); if (0.0..=1.0).contains(&u) && (0.0..=1.0).contains(&v) { + let [width, height] = rect.colormapped_texture.width_height(); hits.push(PickingRayHit { instance_path_hash: InstancePathHash { entity_path_hash: image.ent_path.hash(), instance_key: InstanceKey::from_2d_image_coordinate( - [ - (u * rect.colormapped_texture.texture.width() as f32) as u32, - (v * rect.colormapped_texture.texture.height() as f32) as u32, - ], - rect.colormapped_texture.texture.width() as u64, + [(u * width as f32) as u32, (v * height as f32) as u32], + width as u64, ), }, space_position: intersection_world, diff --git a/crates/re_space_view_tensor/src/space_view_class.rs b/crates/re_space_view_tensor/src/space_view_class.rs index 9dfb97be53c6..14342de2b04a 100644 --- a/crates/re_space_view_tensor/src/space_view_class.rs +++ b/crates/re_space_view_tensor/src/space_view_class.rs @@ -327,7 +327,7 @@ fn paint_tensor_slice( &tensor_stats, state, )?; - let [width, height] = colormapped_texture.texture.width_height(); + let [width, height] = colormapped_texture.width_height(); let img_size = egui::vec2(width as _, height as _); let img_size = Vec2::max(Vec2::splat(1.0), img_size); // better safe than sorry @@ -735,7 +735,7 @@ fn selectors_ui(ui: &mut egui::Ui, state: &mut PerTensorState, tensor: &TensorDa // Make the slider as big as needed: const MIN_SLIDER_WIDTH: f32 = 64.0; if ui.available_width() >= MIN_SLIDER_WIDTH { - ui.spacing_mut().slider_width = (size as f32 * 4.0) + ui.spacing_mut().slider_width = ((size as f32) * 4.0) .at_least(MIN_SLIDER_WIDTH) .at_most(ui.available_width()); ui.add(egui::Slider::new(selector_value, 0..=size - 1).show_value(false)) diff --git a/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs b/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs index 1ab674e5a73f..90616620b339 100644 --- a/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs +++ b/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs @@ -1,9 +1,12 @@ use re_log_types::RowId; use re_renderer::{ - renderer::ColormappedTexture, + renderer::{ColormappedTexture, ShaderDecoding}, resource_managers::{GpuTexture2D, Texture2DCreationDesc, TextureManager2DError}, }; -use re_types::tensor_data::{DecodedTensor, TensorCastError, TensorDataType}; +use re_types::{ + datatypes::TensorBuffer, + tensor_data::{DecodedTensor, TensorCastError, TensorDataType}, +}; use re_viewer_context::{ gpu_bridge::{self, tensor_data_range_heuristic, RangeError}, TensorStats, @@ -48,6 +51,10 @@ pub fn colormapped_texture( color_mapper: Some(re_renderer::renderer::ColorMapper::Function( color_mapping.map, )), + shader_decoding: match &tensor.buffer { + &TensorBuffer::Nv12(_) => Some(ShaderDecoding::Nv12), + _ => None, + }, }) } diff --git a/crates/re_types/definitions/rerun/datatypes/tensor_buffer.fbs b/crates/re_types/definitions/rerun/datatypes/tensor_buffer.fbs index 20cb387f0f44..d78289e2fbae 100644 --- a/crates/re_types/definitions/rerun/datatypes/tensor_buffer.fbs +++ b/crates/re_types/definitions/rerun/datatypes/tensor_buffer.fbs @@ -52,6 +52,9 @@ table F64Buffer(order: 100, transparent) { table JPEGBuffer(order: 100, transparent) { data: [ubyte] (order: 100); } +table NV12Buffer(order: 100, transparent) { + data: [ubyte] (order: 100); +} /// The underlying storage for a `Tensor`. @@ -72,4 +75,5 @@ union TensorBuffer ( F32: F32Buffer (transparent, order:1000), F64: F64Buffer (transparent, order:1200), JPEG: JPEGBuffer (transparent, order:1300), + NV12: NV12Buffer (transparent, order:1400), } diff --git a/crates/re_types/src/datatypes/tensor_buffer.rs b/crates/re_types/src/datatypes/tensor_buffer.rs index 571fbe529f39..0d9d6ff85e71 100644 --- a/crates/re_types/src/datatypes/tensor_buffer.rs +++ b/crates/re_types/src/datatypes/tensor_buffer.rs @@ -19,18 +19,19 @@ /// Tensor elements are stored in a contiguous buffer of a single type. #[derive(Clone, PartialEq)] pub enum TensorBuffer { - U8(::re_types_core::ArrowBuffer), - U16(::re_types_core::ArrowBuffer), - U32(::re_types_core::ArrowBuffer), - U64(::re_types_core::ArrowBuffer), - I8(::re_types_core::ArrowBuffer), - I16(::re_types_core::ArrowBuffer), - I32(::re_types_core::ArrowBuffer), - I64(::re_types_core::ArrowBuffer), - F16(::re_types_core::ArrowBuffer), - F32(::re_types_core::ArrowBuffer), - F64(::re_types_core::ArrowBuffer), - Jpeg(::re_types_core::ArrowBuffer), + U8(crate::ArrowBuffer), + U16(crate::ArrowBuffer), + U32(crate::ArrowBuffer), + U64(crate::ArrowBuffer), + I8(crate::ArrowBuffer), + I16(crate::ArrowBuffer), + I32(crate::ArrowBuffer), + I64(crate::ArrowBuffer), + F16(crate::ArrowBuffer), + F32(crate::ArrowBuffer), + F64(crate::ArrowBuffer), + Jpeg(crate::ArrowBuffer), + Nv12(crate::ArrowBuffer), } impl<'a> From for ::std::borrow::Cow<'a, TensorBuffer> { @@ -199,9 +200,21 @@ impl ::re_types_core::Loggable for TensorBuffer { is_nullable: false, metadata: [].into(), }, + Field { + name: "NV12".to_owned(), + data_type: DataType::List(Box::new(Field { + name: "item".to_owned(), + data_type: DataType::UInt8, + is_nullable: false, + metadata: [].into(), + })), + is_nullable: false, + metadata: [].into(), + }, ], Some(vec![ 0i32, 1i32, 2i32, 3i32, 4i32, 5i32, 6i32, 7i32, 8i32, 9i32, 10i32, 11i32, 12i32, + 13i32, ]), UnionMode::Dense, ) @@ -242,6 +255,7 @@ impl ::re_types_core::Loggable for TensorBuffer { Some(TensorBuffer::F32(_)) => 10i8, Some(TensorBuffer::F64(_)) => 11i8, Some(TensorBuffer::Jpeg(_)) => 12i8, + Some(TensorBuffer::Nv12(_)) => 13i8, }) .collect(), vec![ @@ -891,6 +905,60 @@ impl ::re_types_core::Loggable for TensorBuffer { .boxed() } }, + { + let (somes, nv12): (Vec<_>, Vec<_>) = data + .iter() + .filter(|datum| matches!(datum.as_deref(), Some(TensorBuffer::Nv12(_)))) + .map(|datum| { + let datum = match datum.as_deref() { + Some(TensorBuffer::Nv12(v)) => Some(v.clone()), + _ => None, + }; + (datum.is_some(), datum) + }) + .unzip(); + let nv12_bitmap: Option<::arrow2::bitmap::Bitmap> = { + let any_nones = somes.iter().any(|some| !*some); + any_nones.then(|| somes.into()) + }; + { + use arrow2::{buffer::Buffer, offset::OffsetsBuffer}; + let nv12_inner_data: Buffer<_> = nv12 + .iter() + .flatten() + .map(|b| b.as_slice()) + .collect::>() + .concat() + .into(); + let nv12_inner_bitmap: Option<::arrow2::bitmap::Bitmap> = None; + let offsets = ::arrow2::offset::Offsets::::try_from_lengths( + nv12.iter().map(|opt| { + opt.as_ref() + .map(|datum| datum.num_instances()) + .unwrap_or_default() + }), + ) + .unwrap() + .into(); + ListArray::new( + DataType::List(Box::new(Field { + name: "item".to_owned(), + data_type: DataType::UInt8, + is_nullable: false, + metadata: [].into(), + })), + offsets, + PrimitiveArray::new( + DataType::UInt8, + nv12_inner_data, + nv12_inner_bitmap, + ) + .boxed(), + nv12_bitmap, + ) + .boxed() + } + }, ], Some({ let mut u8_offset = 0; @@ -905,6 +973,7 @@ impl ::re_types_core::Loggable for TensorBuffer { let mut f32_offset = 0; let mut f64_offset = 0; let mut jpeg_offset = 0; + let mut nv12_offset = 0; let mut nulls_offset = 0; data.iter() .map(|v| match v.as_deref() { @@ -973,6 +1042,11 @@ impl ::re_types_core::Loggable for TensorBuffer { jpeg_offset += 1; offset } + Some(TensorBuffer::Nv12(_)) => { + let offset = nv12_offset; + nv12_offset += 1; + offset + } }) .collect() }), @@ -1137,10 +1211,21 @@ impl ::re_types_core::Loggable for TensorBuffer { is_nullable: false, metadata: [].into(), }, + Field { + name: "NV12".to_owned(), + data_type: DataType::List(Box::new(Field { + name: "item".to_owned(), + data_type: DataType::UInt8, + is_nullable: false, + metadata: [].into(), + })), + is_nullable: false, + metadata: [].into(), + }, ], Some(vec![ 0i32, 1i32, 2i32, 3i32, 4i32, 5i32, 6i32, 7i32, 8i32, 9i32, 10i32, - 11i32, 12i32, + 11i32, 12i32, 13i32, ]), UnionMode::Dense, ), @@ -2045,6 +2130,77 @@ impl ::re_types_core::Loggable for TensorBuffer { } .collect::>() }; + let nv12 = { + if 13usize >= arrow_data_arrays.len() { + return Ok(Vec::new()); + } + let arrow_data = &*arrow_data_arrays[13usize]; + { + let arrow_data = arrow_data + .as_any() + .downcast_ref::<::arrow2::array::ListArray>() + .ok_or_else(|| { + crate::DeserializationError::datatype_mismatch( + DataType::List(Box::new(Field { + name: "item".to_owned(), + data_type: DataType::UInt8, + is_nullable: false, + metadata: [].into(), + })), + arrow_data.data_type().clone(), + ) + }) + .with_context("rerun.datatypes.TensorBuffer#NV12")?; + if arrow_data.is_empty() { + Vec::new() + } else { + let arrow_data_inner = { + let arrow_data_inner = &**arrow_data.values(); + arrow_data_inner + .as_any() + .downcast_ref::() + .ok_or_else(|| { + crate::DeserializationError::datatype_mismatch( + DataType::UInt8, + arrow_data_inner.data_type().clone(), + ) + }) + .with_context("rerun.datatypes.TensorBuffer#NV12")? + .values() + }; + let offsets = arrow_data.offsets(); + arrow2::bitmap::utils::ZipValidity::new_with_validity( + offsets.iter().zip(offsets.lengths()), + arrow_data.validity(), + ) + .map(|elem| { + elem.map(|(start, len)| { + let start = *start as usize; + let end = start + len; + if end as usize > arrow_data_inner.len() { + return Err(crate::DeserializationError::offset_slice_oob( + (start, end), + arrow_data_inner.len(), + )); + } + + #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] + let data = unsafe { + arrow_data_inner + .clone() + .sliced_unchecked(start as usize, end - start as usize) + }; + let data = crate::ArrowBuffer::from(data); + Ok(data) + }) + .transpose() + }) + .collect::>>>()? + } + .into_iter() + } + .collect::>() + }; arrow_data_types .iter() .enumerate() @@ -2282,6 +2438,21 @@ impl ::re_types_core::Loggable for TensorBuffer { ) .with_context("rerun.datatypes.TensorBuffer#JPEG")? }), + 13i8 => TensorBuffer::Nv12({ + if offset as usize >= nv12.len() { + return Err(crate::DeserializationError::offset_oob( + offset as _, + nv12.len(), + )) + .with_context("rerun.datatypes.TensorBuffer#NV12"); + } + + #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] + unsafe { nv12.get_unchecked(offset as usize) } + .clone() + .ok_or_else(crate::DeserializationError::missing_data) + .with_context("rerun.datatypes.TensorBuffer#NV12")? + }), _ => { return Err( ::re_types_core::DeserializationError::missing_union_arm( diff --git a/crates/re_types/src/datatypes/tensor_buffer_ext.rs b/crates/re_types/src/datatypes/tensor_buffer_ext.rs index 68c7b57f035b..ad563838c678 100644 --- a/crates/re_types/src/datatypes/tensor_buffer_ext.rs +++ b/crates/re_types/src/datatypes/tensor_buffer_ext.rs @@ -18,6 +18,7 @@ impl TensorBuffer { Self::F32(_) => TensorDataType::F32, Self::F64(_) => TensorDataType::F64, Self::Jpeg(_) => TensorDataType::U8, + Self::Nv12(_) => TensorDataType::U8, } } @@ -36,6 +37,7 @@ impl TensorBuffer { Self::F32(buf) => buf.size_in_bytes(), Self::F64(buf) => buf.size_in_bytes(), Self::Jpeg(buf) => buf.size_in_bytes(), + Self::Nv12(buf) => buf.size_in_bytes(), } } @@ -57,7 +59,7 @@ impl TensorBuffer { | Self::F32(_) | Self::F64(_) => false, - Self::Jpeg(_) => true, + Self::Jpeg(_) | Self::Nv12(_) => true, } } } @@ -77,6 +79,7 @@ impl std::fmt::Debug for TensorBuffer { Self::F32(_) => write!(f, "F32({} bytes)", self.size_in_bytes()), Self::F64(_) => write!(f, "F64({} bytes)", self.size_in_bytes()), Self::Jpeg(_) => write!(f, "JPEG({} bytes)", self.size_in_bytes()), + Self::Nv12(_) => write!(f, "NV12({} bytes)", self.size_in_bytes()), } } } diff --git a/crates/re_types/src/datatypes/tensor_data_ext.rs b/crates/re_types/src/datatypes/tensor_data_ext.rs index 9f38c50ded02..046033c3c08a 100644 --- a/crates/re_types/src/datatypes/tensor_data_ext.rs +++ b/crates/re_types/src/datatypes/tensor_data_ext.rs @@ -46,28 +46,50 @@ impl TensorData { /// If the tensor can be interpreted as an image, return the height, width, and channels/depth of it. pub fn image_height_width_channels(&self) -> Option<[u64; 3]> { let shape_short = self.shape_short(); - - match shape_short.len() { - 1 => { - // Special case: Nx1(x1x1x …) tensors are treated as Nx1 gray images. - // Special case: Nx1(x1x1x …) tensors are treated as Nx1 gray images. - if self.shape.len() >= 2 { - Some([shape_short[0].size, 1, 1]) - } else { - None + match &self.buffer { + // In the case of NV12, return the shape of the RGB image, not the tensor size. + TensorBuffer::Nv12(_) => { + // NV12 encodes a color image in 1.5 "channels" -> 1 luma (per pixel) + (1U+1V) / 4 pixels. + match shape_short { + [h, w] => Some([h.size * 2 / 3, w.size, 3]), + _ => None, } } - 2 => Some([shape_short[0].size, shape_short[1].size, 1]), - 3 => { - let channels = shape_short[2].size; - if matches!(channels, 3 | 4) { - // rgb, rgba - Some([shape_short[0].size, shape_short[1].size, channels]) - } else { - None + TensorBuffer::U8(_) + | TensorBuffer::U16(_) + | TensorBuffer::U32(_) + | TensorBuffer::U64(_) + | TensorBuffer::I8(_) + | TensorBuffer::I16(_) + | TensorBuffer::I32(_) + | TensorBuffer::I64(_) + | TensorBuffer::F16(_) + | TensorBuffer::F32(_) + | TensorBuffer::F64(_) => { + match shape_short.len() { + 1 => { + // Special case: Nx1(x1x1x …) tensors are treated as Nx1 gray images. + // Special case: Nx1(x1x1x …) tensors are treated as Nx1 gray images. + if self.shape.len() >= 2 { + Some([shape_short[0].size, 1, 1]) + } else { + None + } + } + 2 => Some([shape_short[0].size, shape_short[1].size, 1]), + 3 => { + let channels = shape_short[2].size; + if matches!(channels, 3 | 4) { + // rgb, rgba + Some([shape_short[0].size, shape_short[1].size, channels]) + } else { + None + } + } + _ => None, } } - _ => None, + TensorBuffer::Jpeg(_) => None, } } @@ -154,6 +176,53 @@ impl TensorData { TensorBuffer::F32(buf) => Some(TensorElement::F32(buf[offset])), TensorBuffer::F64(buf) => Some(TensorElement::F64(buf[offset])), TensorBuffer::Jpeg(_) => None, // Too expensive to unpack here. + TensorBuffer::Nv12(_) => { + { + // Returns the U32 packed RGBA value of the pixel at index [y, x] if it is valid. + let [y, x] = index else { + return None; + }; + if let Some( + [TensorElement::U8(r), TensorElement::U8(g), TensorElement::U8(b)], + ) = self.get_nv12_pixel(*x, *y) + { + let mut rgba = 0; + rgba |= (r as u32) << 24; + rgba |= (g as u32) << 16; + rgba |= (b as u32) << 8; + rgba |= 0xff; + Some(TensorElement::U32(rgba)) + } else { + None + } + } + } + } + } + + /// Returns decoded RGB8 value at the given image coordinates if this tensor is a valid NV12 image. + pub fn get_nv12_pixel(&self, x: u64, y: u64) -> Option<[TensorElement; 3]> { + let TensorBuffer::Nv12(buf) = &self.buffer else { + return None; + }; + match self.image_height_width_channels() { + Some([h, w, _]) => { + let uv_offset = w * h; + let luma = ((buf[(y * w + x) as usize] as f64) - 16.0) / 216.0; + let u = ((buf[(uv_offset + (y / 2) * w + x) as usize] as f64) - 128.0) / 224.0; + let v = + ((buf[((uv_offset + (y / 2) * w + x) as usize) + 1] as f64) - 128.0) / 224.0; + let r = luma + 1.402 * v; + let g = luma - 0.344 * u + 0.714 * v; + let b = luma + 1.772 * u; + + Some([ + TensorElement::U8(f64::clamp(r * 255.0, 0.0, 255.0) as u8), + TensorElement::U8(f64::clamp(g * 255.0, 0.0, 255.0) as u8), + TensorElement::U8(f64::clamp(b * 255.0, 0.0, 255.0) as u8), + ]) + } + _ => None, } } @@ -170,7 +239,7 @@ impl TensorData { // ---------------------------------------------------------------------------- -macro_rules! tensor_type { +macro_rules! ndarray_from_tensor { ($type:ty, $variant:ident) => { impl<'a> TryFrom<&'a TensorData> for ::ndarray::ArrayViewD<'a, $type> { type Error = TensorCastError; @@ -186,7 +255,11 @@ macro_rules! tensor_type { } } } + }; +} +macro_rules! tensor_from_ndarray { + ($type:ty, $variant:ident) => { impl<'a, D: ::ndarray::Dimension> TryFrom<::ndarray::ArrayView<'a, $type, D>> for TensorData { @@ -260,7 +333,13 @@ macro_rules! tensor_type { }; } -tensor_type!(u8, U8); +macro_rules! tensor_type { + ($type:ty, $variant:ident) => { + ndarray_from_tensor!($type, $variant); + tensor_from_ndarray!($type, $variant); + }; +} + tensor_type!(u16, U16); tensor_type!(u32, U32); tensor_type!(u64, U64); @@ -275,6 +354,24 @@ tensor_type!(arrow2::types::f16, F16); tensor_type!(f32, F32); tensor_type!(f64, F64); +tensor_from_ndarray!(u8, U8); + +// Manual expansion of ndarray_from_tensor! macro for `u8` types. We need to do this, because u8 can store encoded data +impl<'a> TryFrom<&'a TensorData> for ::ndarray::ArrayViewD<'a, u8> { + type Error = TensorCastError; + + fn try_from(value: &'a TensorData) -> Result { + match &value.buffer { + TensorBuffer::U8(data) | TensorBuffer::Nv12(data) => { + let shape: Vec<_> = value.shape.iter().map(|d| d.size as usize).collect(); + ndarray::ArrayViewD::from_shape(shape, bytemuck::cast_slice(data.as_slice())) + .map_err(|err| TensorCastError::BadTensorShape { source: err }) + } + _ => Err(TensorCastError::TypeMismatch), + } + } +} + // Manual expansion of tensor_type! macro for `half::f16` types. We need to do this // because arrow uses its own half type. The two use the same underlying representation // but are still distinct types. `half::f16`, however, is more full-featured and diff --git a/crates/re_types/src/tensor_data.rs b/crates/re_types/src/tensor_data.rs index 8d049ecee3b6..36fca8537d04 100644 --- a/crates/re_types/src/tensor_data.rs +++ b/crates/re_types/src/tensor_data.rs @@ -434,7 +434,7 @@ impl TryFrom for DecodedTensor { | TensorBuffer::F16(_) | TensorBuffer::F32(_) | TensorBuffer::F64(_) => Ok(Self(tensor)), - TensorBuffer::Jpeg(_) => Err(tensor), + TensorBuffer::Jpeg(_) | TensorBuffer::Nv12(_) => Err(tensor), } } } @@ -530,7 +530,8 @@ impl DecodedTensor { | TensorBuffer::I64(_) | TensorBuffer::F16(_) | TensorBuffer::F32(_) - | TensorBuffer::F64(_) => Ok(Self(maybe_encoded_tensor)), + | TensorBuffer::F64(_) + | TensorBuffer::Nv12(_) => Ok(Self(maybe_encoded_tensor)), // Decoding happens on the GPU TensorBuffer::Jpeg(jpeg_bytes) => { let [h, w, c] = maybe_encoded_tensor diff --git a/crates/re_viewer_context/src/gpu_bridge/colormap.rs b/crates/re_viewer_context/src/gpu_bridge/colormap.rs index 0d777ae33db7..c1569ce45c22 100644 --- a/crates/re_viewer_context/src/gpu_bridge/colormap.rs +++ b/crates/re_viewer_context/src/gpu_bridge/colormap.rs @@ -42,6 +42,7 @@ fn colormap_preview_ui( decode_srgb: false, multiply_rgb_with_alpha: false, gamma: 1.0, + shader_decoding: None, color_mapper: Some(re_renderer::renderer::ColorMapper::Function(colormap)), }; diff --git a/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs b/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs index 03c605316ff1..d3cf2a205ce1 100644 --- a/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs +++ b/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs @@ -10,7 +10,7 @@ use wgpu::TextureFormat; use re_log_types::RowId; use re_renderer::{ pad_rgb_to_rgba, - renderer::{ColorMapper, ColormappedTexture}, + renderer::{ColorMapper, ColormappedTexture, ShaderDecoding}, resource_managers::Texture2DCreationDesc, RenderContext, }; @@ -87,10 +87,13 @@ pub fn color_tensor_to_gpu( ) -> anyhow::Result { re_tracing::profile_function!(); let texture_key = hash(tensor_data_row_id); - let [height, width, depth] = height_width_depth(tensor)?; + let [height, width, depth] = texture_height_width_channels(tensor)?; let texture_handle = try_get_or_create_texture(render_ctx, texture_key, || { let (data, format) = match (depth, &tensor.buffer) { + (3, TensorBuffer::Nv12(buf)) => { + (cast_slice_to_cow(buf.as_slice()), TextureFormat::R8Uint) + } // Normalize sRGB(A) textures to 0-1 range, and let the GPU premultiply alpha. // Why? Because premul must happen _before_ sRGB decode, so we can't // use a "Srgb-aware" texture like `Rgba8UnormSrgb` for RGBA. @@ -117,10 +120,18 @@ pub fn color_tensor_to_gpu( .map_err(|err| anyhow::anyhow!("{err}"))?; let texture_format = texture_handle.format(); - + let shader_decoding = match &tensor.buffer { + &TensorBuffer::Nv12(_) => Some(ShaderDecoding::Nv12), + _ => None, + }; // TODO(emilk): let the user specify the color space. - let decode_srgb = texture_format == TextureFormat::Rgba8Unorm - || super::tensor_decode_srgb_gamma_heuristic(tensor_stats, tensor.dtype(), depth)?; + let decode_srgb = match shader_decoding { + Some(ShaderDecoding::Nv12) => true, + None => { + texture_format == TextureFormat::Rgba8Unorm + || super::tensor_decode_srgb_gamma_heuristic(tensor_stats, tensor.dtype(), depth)? + } + }; // Special casing for normalized textures used above: let range = if matches!( @@ -130,12 +141,14 @@ pub fn color_tensor_to_gpu( [0.0, 1.0] } else if texture_format == TextureFormat::R8Snorm { [-1.0, 1.0] + } else if shader_decoding == Some(ShaderDecoding::Nv12) { + [0.0, 1.0] } else { // TODO(#2341): The range should be determined by a `DataRange` component. In absence this, heuristics apply. super::tensor_data_range_heuristic(tensor_stats, tensor.dtype())? }; - let color_mapper = if texture_format.components() == 1 { + let color_mapper = if shader_decoding.is_none() && texture_format.components() == 1 { // Single-channel images = luminance = grayscale Some(ColorMapper::Function(re_renderer::Colormap::Grayscale)) } else { @@ -145,14 +158,16 @@ pub fn color_tensor_to_gpu( // TODO(wumpf): There should be a way to specify whether a texture uses pre-multiplied alpha or not. // Assume that the texture is not pre-multiplied if it has an alpha channel. let multiply_rgb_with_alpha = depth == 4; + let gamma = 1.0; Ok(ColormappedTexture { texture: texture_handle, range, decode_srgb, multiply_rgb_with_alpha, - gamma: 1.0, + gamma, color_mapper, + shader_decoding, }) } @@ -170,7 +185,7 @@ pub fn class_id_tensor_to_gpu( re_tracing::profile_function!(); let texture_key = hash(tensor_data_row_id); - let [_height, _width, depth] = height_width_depth(tensor)?; + let [_height, _width, depth] = texture_height_width_channels(tensor)?; anyhow::ensure!( depth == 1, "Cannot apply annotations to tensor of shape {:?}", @@ -228,6 +243,7 @@ pub fn class_id_tensor_to_gpu( multiply_rgb_with_alpha: false, // already premultiplied! gamma: 1.0, color_mapper: Some(ColorMapper::Texture(colormap_texture_handle)), + shader_decoding: None, }) } @@ -244,7 +260,7 @@ pub fn depth_tensor_to_gpu( re_tracing::profile_function!(); let texture_key = hash(tensor_data_row_id); - let [_height, _width, depth] = height_width_depth(tensor)?; + let [_height, _width, depth] = texture_height_width_channels(tensor)?; anyhow::ensure!( depth == 1, "Depth tensor of weird shape: {:?}", @@ -264,6 +280,7 @@ pub fn depth_tensor_to_gpu( multiply_rgb_with_alpha: false, gamma: 1.0, color_mapper: Some(ColorMapper::Function(re_renderer::Colormap::Turbo)), + shader_decoding: None, }) } @@ -304,7 +321,7 @@ fn general_texture_creation_desc_from_tensor<'a>( debug_name: &str, tensor: &'a DecodedTensor, ) -> anyhow::Result> { - let [height, width, depth] = height_width_depth(tensor)?; + let [height, width, depth] = texture_height_width_channels(tensor)?; let (data, format) = match depth { 1 => { @@ -326,6 +343,10 @@ fn general_texture_creation_desc_from_tensor<'a>( TensorBuffer::Jpeg(_) => { unreachable!("DecodedTensor cannot contain a JPEG") } + + TensorBuffer::Nv12(_) => { + unreachable!("An NV12 tensor can only contain a 3 channel image.") + } } } 2 => { @@ -348,6 +369,9 @@ fn general_texture_creation_desc_from_tensor<'a>( TensorBuffer::Jpeg(_) => { unreachable!("DecodedTensor cannot contain a JPEG") } + TensorBuffer::Nv12(_) => { + unreachable!("An NV12 tensor can only contain a 3 channel image.") + } } } 3 => { @@ -391,6 +415,9 @@ fn general_texture_creation_desc_from_tensor<'a>( TensorBuffer::Jpeg(_) => { unreachable!("DecodedTensor cannot contain a JPEG") } + TensorBuffer::Nv12(buf) => { + (cast_slice_to_cow(buf.as_slice()), TextureFormat::R8Unorm) + } } } 4 => { @@ -413,6 +440,9 @@ fn general_texture_creation_desc_from_tensor<'a>( TensorBuffer::Jpeg(_) => { unreachable!("DecodedTensor cannot contain a JPEG") } + TensorBuffer::Nv12(_) => { + unreachable!("An NV12 tensor can only contain a 3 channel image.") + } } } depth => { @@ -487,12 +517,17 @@ fn pad_and_narrow_and_cast( // ----------------------------------------------------------------------------; -fn height_width_depth(tensor: &TensorData) -> anyhow::Result<[u32; 3]> { +fn texture_height_width_channels(tensor: &TensorData) -> anyhow::Result<[u32; 3]> { use anyhow::Context as _; - let Some([height, width, channel]) = tensor.image_height_width_channels() else { + let Some([mut height, width, channel]) = tensor.image_height_width_channels() else { anyhow::bail!("Tensor is not an image"); }; + height = match tensor.buffer { + // Correct the texture height for NV12, tensor.image_height_width_channels returns the RGB size for NV12 images. The actual texture size has dimensions (h*3/2, w, 1). + TensorBuffer::Nv12(_) => height * 3 / 2, + _ => height, + }; let [height, width] = [ u32::try_from(height).context("Image height is too large")?, diff --git a/crates/re_viewer_context/src/tensor/tensor_stats.rs b/crates/re_viewer_context/src/tensor/tensor_stats.rs index 04b467630db9..b7edf2ed99b2 100644 --- a/crates/re_viewer_context/src/tensor/tensor_stats.rs +++ b/crates/re_viewer_context/src/tensor/tensor_stats.rs @@ -18,7 +18,7 @@ impl TensorStats { use re_types::tensor_data::TensorDataType; macro_rules! declare_tensor_range_int { - ($name: ident, $typ: ty) => { + ($name:ident, $typ:ty) => { fn $name(tensor: ndarray::ArrayViewD<'_, $typ>) -> (f64, f64) { re_tracing::profile_function!(); let (min, max) = tensor @@ -31,7 +31,7 @@ impl TensorStats { } macro_rules! declare_tensor_range_float { - ($name: ident, $typ: ty) => { + ($name:ident, $typ:ty) => { fn $name(tensor: ndarray::ArrayViewD<'_, $typ>) -> (f64, f64) { re_tracing::profile_function!(); let (min, max) = tensor.fold( @@ -69,7 +69,7 @@ impl TensorStats { } macro_rules! declare_tensor_finite_range_float { - ($name: ident, $typ: ty) => { + ($name:ident, $typ:ty) => { fn $name(tensor: ndarray::ArrayViewD<'_, $typ>) -> (f64, f64) { re_tracing::profile_function!(); let (min, max) = tensor.fold( diff --git a/examples/python/nv12/README.md b/examples/python/nv12/README.md new file mode 100644 index 000000000000..8c3dd22c9e8f --- /dev/null +++ b/examples/python/nv12/README.md @@ -0,0 +1,17 @@ +--- +title: NV12 +tags: [2d, image-encoding, yuv] +description: "Visualize an NV12 encoded video stream from a webcam." +--- + + + +This example displays an NV12 encoded video stream from a webcam in rerun. + +```bash +pip install -r examples/python/nv12/requirements.txt +python examples/python/nv12/main.py +``` diff --git a/examples/python/nv12/main.py b/examples/python/nv12/main.py new file mode 100755 index 000000000000..a1b5dbf8f272 --- /dev/null +++ b/examples/python/nv12/main.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +""" +Stream NV12 images from a webcam. + +Run: +```sh +pip install -r examples/python/nv12/requirements.txt +python examples/python/nv12/main.py +``` +""" +from __future__ import annotations + +import argparse +import time + +import cv2 +import numpy as np +import numpy.typing as npt +import rerun as rr # pip install rerun-sdk + + +def bgr2nv12(bgr: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]: + yuv: npt.NDArray[np.uint8] = cv2.cvtColor(bgr, cv2.COLOR_RGB2YUV_I420) + uv_row_cnt = yuv.shape[0] // 3 + uv_plane = np.transpose(yuv[uv_row_cnt * 2 :].reshape(2, -1), [1, 0]) + yuv[uv_row_cnt * 2 :] = uv_plane.reshape(uv_row_cnt, -1) + return yuv + + +def main() -> None: + parser = argparse.ArgumentParser(description="Example of using the Rerun visualizer to display NV12 images.") + rr.script_add_args(parser) + parser.add_argument( + "-t", + "--timeout", + type=float, + default=5, + help="Timeout in seconds, after which the script will stop streaming frames.", + ) + args = parser.parse_args() + + rr.script_setup(args, "NV12 image example") + + cap = cv2.VideoCapture(0) + if not cap.isOpened(): + raise RuntimeError("This example requires a webcam.") + start_time = time.time() + print(f"Started streaming NV12 images for {args.timeout} seconds.") + while start_time + args.timeout > time.time(): + ret, frame = cap.read() + if not ret: + time.sleep(0.01) + continue + rr.log( + "NV12", + rr.ImageEncoded( + contents=bytes(bgr2nv12(frame)), + format=rr.ImageFormat.NV12((frame.shape[0], frame.shape[1])), + ), + ) + time.sleep(0.01) + rr.script_teardown(args) + + +if __name__ == "__main__": + main() diff --git a/examples/python/nv12/requirements.txt b/examples/python/nv12/requirements.txt new file mode 100644 index 000000000000..4aa030be3b15 --- /dev/null +++ b/examples/python/nv12/requirements.txt @@ -0,0 +1,3 @@ +rerun-sdk>=0.10 +opencv-python +numpy diff --git a/examples/python/requirements.txt b/examples/python/requirements.txt index 8dbf6d9c98d1..8db00a0eb0ea 100644 --- a/examples/python/requirements.txt +++ b/examples/python/requirements.txt @@ -16,6 +16,7 @@ -r multiprocessing/requirements.txt -r multithreading/requirements.txt -r notebook/requirements.txt +-r nv12/requirements.txt -r objectron/requirements.txt -r open_photogrammetry_format/requirements.txt -r plots/requirements.txt diff --git a/rerun_cpp/src/rerun/datatypes/tensor_buffer.cpp b/rerun_cpp/src/rerun/datatypes/tensor_buffer.cpp index 950ec0e844a2..4cd3094b3d7a 100644 --- a/rerun_cpp/src/rerun/datatypes/tensor_buffer.cpp +++ b/rerun_cpp/src/rerun/datatypes/tensor_buffer.cpp @@ -63,6 +63,11 @@ namespace rerun { arrow::list(arrow::field("item", arrow::uint8(), false)), false ), + arrow::field( + "NV12", + arrow::list(arrow::field("item", arrow::uint8(), false)), + false + ), }); return datatype; } @@ -126,6 +131,10 @@ namespace rerun { memory_pool, std::make_shared(memory_pool) ), + std::make_shared( + memory_pool, + std::make_shared(memory_pool) + ), }), arrow_datatype() )); @@ -289,6 +298,17 @@ namespace rerun { ); break; } + case detail::TensorBufferTag::NV12: { + auto variant_builder = + static_cast(variant_builder_untyped); + (void)variant_builder; + return Error( + ErrorCode::NotImplemented, + "Failed to serialize TensorBuffer: list types in unions not yet " + "implemented" + ); + break; + } } } diff --git a/rerun_cpp/src/rerun/datatypes/tensor_buffer.hpp b/rerun_cpp/src/rerun/datatypes/tensor_buffer.hpp index c99d492c15e0..b147b3e2c0cc 100644 --- a/rerun_cpp/src/rerun/datatypes/tensor_buffer.hpp +++ b/rerun_cpp/src/rerun/datatypes/tensor_buffer.hpp @@ -38,6 +38,7 @@ namespace rerun { F32, F64, JPEG, + NV12, }; union TensorBufferData { @@ -65,6 +66,8 @@ namespace rerun { std::vector jpeg; + std::vector nv12; + TensorBufferData() {} ~TensorBufferData() {} @@ -137,6 +140,10 @@ namespace rerun { _data.jpeg = other._data.jpeg; break; } + case detail::TensorBufferTag::NV12: { + _data.nv12 = other._data.nv12; + break; + } case detail::TensorBufferTag::NONE: const void *otherbytes = reinterpret_cast(&other._data); void *thisbytes = reinterpret_cast(&this->_data); @@ -225,6 +232,11 @@ namespace rerun { _data.jpeg.~TypeAlias(); break; } + case detail::TensorBufferTag::NV12: { + typedef std::vector TypeAlias; + _data.nv12.~TypeAlias(); + break; + } } } @@ -372,6 +384,14 @@ namespace rerun { return self; } + static TensorBuffer nv12(std::vector nv12) { + typedef std::vector TypeAlias; + TensorBuffer self; + self._tag = detail::TensorBufferTag::NV12; + new (&self._data.nv12) TypeAlias(std::move(nv12)); + return self; + } + /// Returns the arrow data type this type corresponds to. static const std::shared_ptr &arrow_datatype(); diff --git a/rerun_cpp/src/rerun/datatypes/tensor_buffer_ext.cpp b/rerun_cpp/src/rerun/datatypes/tensor_buffer_ext.cpp index f26d4f698b49..e6b70d799afa 100644 --- a/rerun_cpp/src/rerun/datatypes/tensor_buffer_ext.cpp +++ b/rerun_cpp/src/rerun/datatypes/tensor_buffer_ext.cpp @@ -102,6 +102,11 @@ namespace rerun { case detail::TensorBufferTag::F64: { return _data.f64.size(); } + case detail::TensorBufferTag::NV12: { + assert( + false && "Can't ask for the number of elements in an NV12 encoded image" + ); + } case detail::TensorBufferTag::JPEG: { assert(false && "Can't ask for the number of elements in a JPEG"); } diff --git a/rerun_py/rerun_sdk/rerun/_image.py b/rerun_py/rerun_sdk/rerun/_image.py index 65ca974eb043..3bc106c0db4d 100644 --- a/rerun_py/rerun_sdk/rerun/_image.py +++ b/rerun_py/rerun_sdk/rerun/_image.py @@ -2,7 +2,6 @@ import io import pathlib -from enum import Enum from typing import IO, Iterable import numpy as np @@ -16,28 +15,55 @@ __all__ = ["ImageFormat", "ImageEncoded"] -class ImageFormat(Enum): +class ImageFormat: """Image file format.""" - BMP = "BMP" - """BMP format.""" + name: str - GIF = "GIF" - """GIF format.""" + BMP: ImageFormat + GIF: ImageFormat + JPEG: ImageFormat + PNG: ImageFormat + TIFF: ImageFormat + NV12: type[NV12] - JPEG = "JPEG" - """JPEG format.""" - - PNG = "PNG" - """PNG format.""" - - TIFF = "TIFF" - """TIFF format.""" + def __init__(self, name: str): + self.name = name def __str__(self) -> str: return self.name +class NV12(ImageFormat): + """NV12 format.""" + + name = "NV12" + size_hint: tuple[int, int] + + def __init__(self, size_hint: tuple[int, int]) -> None: + """ + An NV12 encoded image. + + Parameters + ---------- + size_hint: + A tuple of (height, width), specifying the RGB size of the image + """ + self.size_hint = size_hint + + +# Assign the variants +# This allows for rust like enums, for example: +# ImageFormat.NV12(width=1920, height=1080) +# isinstance(ImageFormat.NV12, ImageFormat) == True and isinstance(ImageFormat.NV12, NV12) == True +ImageFormat.BMP = ImageFormat("BMP") +ImageFormat.GIF = ImageFormat("GIF") +ImageFormat.JPEG = ImageFormat("JPEG") +ImageFormat.PNG = ImageFormat("PNG") +ImageFormat.TIFF = ImageFormat("TIFF") +ImageFormat.NV12 = NV12 + + class ImageEncoded(AsComponents): """ A monochrome or color image encoded with a common format (PNG, JPEG, etc.). @@ -87,11 +113,24 @@ def __init__( if buffer is None: raise ValueError("Input data could not be coerced to IO[bytes]") + formats = None if format is not None: + if isinstance(format, NV12): + np_buf = np.frombuffer(buffer.read(), dtype=np.uint8) + np_buf = np_buf.reshape(int(format.size_hint[0] * 1.5), format.size_hint[1]) + tensor_buffer = TensorBuffer(np_buf) + tensor_buffer.kind = "nv12" + self.data = TensorData( + buffer=tensor_buffer, + shape=[ + TensorDimension(np_buf.shape[0], "height"), + TensorDimension(np_buf.shape[1], "width"), + TensorDimension(1, "depth"), + ], + ) + self.draw_order = draw_order + return formats = (str(format),) - else: - formats = None - # Note that PIL loading is lazy. This will only identify the type of file # and not decode the whole jpeg. img_data = PILImage.open(buffer, formats=formats) diff --git a/rerun_py/rerun_sdk/rerun/datatypes/tensor_buffer.py b/rerun_py/rerun_sdk/rerun/datatypes/tensor_buffer.py index e19ada6a0750..2746bc468324 100644 --- a/rerun_py/rerun_sdk/rerun/datatypes/tensor_buffer.py +++ b/rerun_py/rerun_sdk/rerun/datatypes/tensor_buffer.py @@ -53,9 +53,11 @@ class TensorBuffer(TensorBufferExt): F64 (npt.NDArray[np.float64]): JPEG (npt.NDArray[np.uint8]): + + NV12 (npt.NDArray[np.uint8]): """ - kind: Literal["u8", "u16", "u32", "u64", "i8", "i16", "i32", "i64", "f16", "f32", "f64", "jpeg"] = field( + kind: Literal["u8", "u16", "u32", "u64", "i8", "i16", "i32", "i64", "f16", "f32", "f64", "jpeg", "nv12"] = field( default="u8" ) @@ -176,6 +178,12 @@ def __init__(self) -> None: nullable=False, metadata={}, ), + pa.field( + "NV12", + pa.list_(pa.field("item", pa.uint8(), nullable=False, metadata={})), + nullable=False, + metadata={}, + ), ] ), self._TYPE_NAME, diff --git a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py index ceb708255bb7..dae3c8e326a9 100644 --- a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py +++ b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py @@ -155,6 +155,12 @@ def __init__(self) -> None: nullable=False, metadata={}, ), + pa.field( + "NV12", + pa.list_(pa.field("item", pa.uint8(), nullable=False, metadata={})), + nullable=False, + metadata={}, + ), ] ), nullable=False, diff --git a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py index e0d8224e969c..7dc237206dfc 100644 --- a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py +++ b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py @@ -137,7 +137,7 @@ def __init__( elif array is not None: self.buffer = TensorBuffer(array.flatten()) - if self.buffer.kind != "jpeg": + if self.buffer.kind != "jpeg" and self.buffer.kind != "nv12": expected_buffer_size = prod(d.size for d in self.shape) if len(self.buffer.inner) != expected_buffer_size: @@ -233,9 +233,10 @@ def _build_buffer_array(buffer: TensorBufferLike) -> pa.Array: buffer = buffer.flatten() data_inner = pa.ListArray.from_arrays(pa.array([0, len(buffer)]), buffer) - if kind == "jpeg": discriminant = "JPEG" + elif kind == "nv12": + discriminant = "NV12" else: assert buffer.dtype.type in DTYPE_MAP, f"Failed to find {buffer.dtype.type} in f{DTYPE_MAP}" discriminant = DTYPE_MAP[buffer.dtype.type] diff --git a/tests/python/nv12image/main.py b/tests/python/nv12image/main.py new file mode 100755 index 000000000000..20da9d24294c --- /dev/null +++ b/tests/python/nv12image/main.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""Testing NV12 image encoding.""" +from __future__ import annotations + +import argparse +import os +from typing import Any + +import cv2 +import numpy as np +import rerun + + +def bgra2nv12(bgra: Any) -> np.ndarray: + yuv = cv2.cvtColor(bgra, cv2.COLOR_BGRA2YUV_I420) + uv_row_cnt = yuv.shape[0] // 3 + uv_plane = np.transpose(yuv[uv_row_cnt * 2 :].reshape(2, -1), [1, 0]) + yuv[uv_row_cnt * 2 :] = uv_plane.reshape(uv_row_cnt, -1) + return yuv + + +def main() -> None: + parser = argparse.ArgumentParser(description="Displaying NV12 encoded images.") + rerun.script_add_args(parser) + args = parser.parse_args() + + rerun.script_setup(args, "rerun_test_nv12image") + + # Make sure you use a colorful image! + dir_path = os.path.dirname(os.path.realpath(__file__)) + img_path = f"{dir_path}/../../../crates/re_ui/data/logo_dark_mode.png" + img_bgra = cv2.imread(img_path, cv2.IMREAD_UNCHANGED) + + img_rgb = cv2.cvtColor(img_bgra, cv2.COLOR_BGRA2RGB) + rerun.log("img_reference", rerun.Image(img_rgb)) + + rerun.log( + "img_nv12", + rerun.ImageEncoded( + contents=bytes(bgra2nv12(img_bgra)), + format=rerun.ImageFormat.NV12((img_bgra.shape[0], img_bgra.shape[1])), + ), + ) + + rerun.script_teardown(args) + + +if __name__ == "__main__": + main() diff --git a/tests/python/nv12image/requirements.txt b/tests/python/nv12image/requirements.txt new file mode 100644 index 000000000000..4364766d7369 --- /dev/null +++ b/tests/python/nv12image/requirements.txt @@ -0,0 +1,3 @@ +numpy +opencv-python +rerun-sdk diff --git a/tests/python/requirements.txt b/tests/python/requirements.txt index 2da31733cced..a9fce2d3701c 100644 --- a/tests/python/requirements.txt +++ b/tests/python/requirements.txt @@ -1 +1,2 @@ -r test_api/requirements.txt +-r nv12image/requirements.txt