From ed71fe1e0b235e8493f5cddcdccc2e9e44143ca2 Mon Sep 17 00:00:00 2001 From: zrezke Date: Thu, 28 Sep 2023 14:02:06 +0200 Subject: [PATCH 01/20] Merge latest main and old nv12 support --- crates/re_data_ui/src/image.rs | 63 +++++-- crates/re_renderer/shader/decodings.wgsl | 25 +++ crates/re_renderer/shader/rectangle.wgsl | 1 + crates/re_renderer/shader/rectangle_fs.wgsl | 20 ++- crates/re_renderer/src/renderer/mod.rs | 4 +- crates/re_renderer/src/renderer/rectangles.rs | 84 ++++++++-- crates/re_renderer/src/workspace_shaders.rs | 6 + .../src/space_view_class.rs | 7 + .../re_space_view_spatial/src/parts/images.rs | 4 +- crates/re_space_view_spatial/src/ui_2d.rs | 1 + .../src/space_view_class.rs | 8 +- .../src/tensor_slice_to_gpu.rs | 7 +- .../rerun/datatypes/tensor_buffer.fbs | 4 + .../re_types/src/datatypes/tensor_buffer.rs | 158 +++++++++++++++++- .../re_types/src/datatypes/tensor_data_ext.rs | 109 +++++++++--- .../src/gpu_bridge/tensor_to_gpu.rs | 44 ++++- .../src/tensor/tensor_stats.rs | 43 ++--- .../rerun/datatypes/tensor_data_ext.py | 84 +++++++--- .../rerun_sdk/rerun/log_deprecated/image.py | 4 +- 19 files changed, 562 insertions(+), 114 deletions(-) create mode 100644 crates/re_renderer/shader/decodings.wgsl diff --git a/crates/re_data_ui/src/image.rs b/crates/re_data_ui/src/image.rs index 06f25e56080c..3dbd56826fb2 100644 --- a/crates/re_data_ui/src/image.rs +++ b/crates/re_data_ui/src/image.rs @@ -122,6 +122,7 @@ fn tensor_ui( .on_hover_ui(|ui| { // Show larger image on hover let max_size = Vec2::splat(400.0); + println!("THIS IS HOVER:"); show_image_at_max_size( ctx.render_ctx, ctx.re_ui, @@ -133,6 +134,14 @@ fn tensor_ui( }); } + let shape = match tensor.image_height_width_channels() { + Some([h, w, c]) => vec![ + TensorDimension::height(h), + TensorDimension::width(w), + TensorDimension::depth(c), + ], + None => tensor.shape.clone(), + }; ui.label(format!( "{} x {}{}", tensor.dtype(), @@ -211,6 +220,9 @@ fn tensor_ui( } if let Some([_h, _w, channels]) = tensor.image_height_width_channels() { + if let TensorBuffer::Nv12(_) = &tensor.buffer { + return; + } if channels == 3 { if let TensorBuffer::U8(data) = &tensor.buffer { ui.collapsing("Histogram", |ui| { @@ -226,7 +238,7 @@ fn tensor_ui( } fn texture_size(colormapped_texture: &ColormappedTexture) -> Vec2 { - let [w, h] = colormapped_texture.texture.width_height(); + let [w, h] = colormapped_texture.image_width_height(); egui::vec2(w as f32, h as f32) } @@ -244,6 +256,7 @@ fn show_image_at_max_size( desired_size *= (max_size.y / desired_size.y).min(1.0); desired_size }; + println!("Desired size: {:?}", desired_size); let (response, painter) = ui.allocate_painter(desired_size, egui::Sense::hover()); if let Err(err) = gpu_bridge::render_image( @@ -333,6 +346,11 @@ pub fn tensor_summary_ui_grid_contents( )); ui.end_row(); } + TensorBuffer::Nv12(_) => { + re_ui.grid_left_hand_label(ui, "Encoding"); + ui.label("NV12"); + ui.end_row(); + } } let TensorStats { @@ -352,8 +370,9 @@ pub fn tensor_summary_ui_grid_contents( } // Show finite range only if it is different from the actual range. if let (true, Some((min, max))) = (range != finite_range, finite_range) { - ui.label("Finite data range") - .on_hover_text("The finite values (ignoring all NaN & -Inf/+Inf) of the tensor range within these bounds"); + ui.label("Finite data range").on_hover_text( + "The finite values (ignoring all NaN & -Inf/+Inf) of the tensor range within these bounds" + ); ui.monospace(format!( "[{} - {}]", re_format::format_f64(*min), @@ -412,8 +431,8 @@ fn show_zoomed_image_region_tooltip( use egui::remap_clamp; let center_texel = [ - (remap_clamp(pointer_pos.x, image_rect.x_range(), 0.0..=w as f32) as isize), - (remap_clamp(pointer_pos.y, image_rect.y_range(), 0.0..=h as f32) as isize), + remap_clamp(pointer_pos.x, image_rect.x_range(), 0.0..=w as f32) as isize, + remap_clamp(pointer_pos.y, image_rect.y_range(), 0.0..=h as f32) as isize, ]; show_zoomed_image_region_area_outline( parent_ui.ctx(), @@ -535,7 +554,7 @@ fn try_show_zoomed_image_region( )?; const POINTS_PER_TEXEL: f32 = 5.0; - let size = Vec2::splat((ZOOMED_IMAGE_TEXEL_RADIUS * 2 + 1) as f32 * POINTS_PER_TEXEL); + let size = Vec2::splat(((ZOOMED_IMAGE_TEXEL_RADIUS * 2 + 1) as f32) * POINTS_PER_TEXEL); let (_id, zoom_rect) = ui.allocate_space(size); let painter = ui.painter(); @@ -547,7 +566,10 @@ fn try_show_zoomed_image_region( let image_rect_on_screen = egui::Rect::from_min_size( zoom_rect.center() - POINTS_PER_TEXEL - * egui::vec2(center_texel[0] as f32 + 0.5, center_texel[1] as f32 + 0.5), + * egui::vec2( + (center_texel[0] as f32) + 0.5, + (center_texel[1] as f32) + 0.5, + ), POINTS_PER_TEXEL * egui::vec2(width as f32, height as f32), ); @@ -583,7 +605,11 @@ fn try_show_zoomed_image_region( let zoom = rect.width(); let image_rect_on_screen = egui::Rect::from_min_size( rect.center() - - zoom * egui::vec2(center_texel[0] as f32 + 0.5, center_texel[1] as f32 + 0.5), + - zoom + * egui::vec2( + (center_texel[0] as f32) + 0.5, + (center_texel[1] as f32) + 0.5, + ), zoom * egui::vec2(width as f32, height as f32), ); gpu_bridge::render_image( @@ -634,7 +660,7 @@ fn tensor_pixel_value_ui( // This is a depth map if let Some(raw_value) = tensor.get(&[y, x]) { let raw_value = raw_value.as_f64(); - let meters = raw_value / meter as f64; + let meters = raw_value / (meter as f64); ui.label("Depth:"); if meters < 1.0 { ui.monospace(format!("{:.1} mm", meters * 1e3)); @@ -652,11 +678,20 @@ fn tensor_pixel_value_ui( .map(|v| format!("Val: {v}")), 3 => { // TODO(jleibs): Track RGB ordering somehow -- don't just assume it - if let (Some(r), Some(g), Some(b)) = ( - tensor.get_with_image_coords(x, y, 0), - tensor.get_with_image_coords(x, y, 1), - tensor.get_with_image_coords(x, y, 2), - ) { + if let Some([r, g, b]) = match &tensor.buffer { + TensorBuffer::Nv12(_) => tensor.get_nv12_pixel(x, y), + _ => { + if let (Some(r), Some(g), Some(b)) = ( + tensor.get_with_image_coords(x, y, 0), + tensor.get_with_image_coords(x, y, 1), + tensor.get_with_image_coords(x, y, 2), + ) { + Some([r, g, b]) + } else { + None + } + } + } { match (r, g, b) { (TensorElement::U8(r), TensorElement::U8(g), TensorElement::U8(b)) => { Some(format!("R: {r}, G: {g}, B: {b}, #{r:02X}{g:02X}{b:02X}")) diff --git a/crates/re_renderer/shader/decodings.wgsl b/crates/re_renderer/shader/decodings.wgsl new file mode 100644 index 000000000000..ae1f716c04c9 --- /dev/null +++ b/crates/re_renderer/shader/decodings.wgsl @@ -0,0 +1,25 @@ +#import <./types.wgsl> + + +/// Loads an RGBA texel from a texture holding an NV12 encoded image at the given screen space coordinates. +fn decode_nv12(texture: texture_2d, coords: IVec2) -> Vec4 { + let texture_dim = Vec2(textureDimensions(texture).xy); + let uv_offset = u32(floor(texture_dim.y / 1.5)); + let uv_row = u32(coords.y / 2); + var uv_col = u32(coords.x / 2) * 2u; + + let c = UVec2(coords); + let y = (f32(textureLoad(texture, c, 0).r) - 16.0) / 219.0; + let u = (f32(textureLoad(texture, UVec2(u32(uv_col), uv_offset + uv_row), 0).r) - 128.0) / 224.0; + let v = (f32(textureLoad(texture, UVec2((u32(uv_col) + 1u), uv_offset + uv_row), 0).r) - 128.0) / 224.0; + + // Get RGB values and apply reverse gamma correction since we are rendering to sRGB framebuffer + // let r = pow(y + 1.402 * v, 2.2); + // let g = pow(y - (0.344 * u + 0.714 * v), 2.2); + // let b = pow(y + 1.772 * u, 2.2); + + let r = y + 1.402 * v; + let g = y - (0.344 * u + 0.714 * v); + let b = y + 1.772 * u; + return Vec4(r, g, b, 1.0); +} diff --git a/crates/re_renderer/shader/rectangle.wgsl b/crates/re_renderer/shader/rectangle.wgsl index da481a48e338..0f546f9a6045 100644 --- a/crates/re_renderer/shader/rectangle.wgsl +++ b/crates/re_renderer/shader/rectangle.wgsl @@ -6,6 +6,7 @@ const SAMPLE_TYPE_FLOAT = 1u; const SAMPLE_TYPE_SINT = 2u; const SAMPLE_TYPE_UINT = 3u; +const SAMPLE_TYPE_NV12 = 4u; // How do we do colormapping? const COLOR_MAPPER_OFF = 1u; diff --git a/crates/re_renderer/shader/rectangle_fs.wgsl b/crates/re_renderer/shader/rectangle_fs.wgsl index 28a2801d39d5..59e43bb2c102 100644 --- a/crates/re_renderer/shader/rectangle_fs.wgsl +++ b/crates/re_renderer/shader/rectangle_fs.wgsl @@ -1,6 +1,7 @@ #import <./colormap.wgsl> #import <./rectangle.wgsl> #import <./utils/srgb.wgsl> +#import <./decodings.wgsl> fn is_magnifying(pixel_coord: Vec2) -> bool { return fwidth(pixel_coord.x) < 1.0; @@ -101,7 +102,24 @@ fn fs_main(in: VertexOut) -> @location(0) Vec4 { let v11 = decode_color(Vec4(textureLoad(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2( 0.5, 0.5), texture_dimensions), 0))); normalized_value = filter_bilinear(coord, v00, v01, v10, v11); } - } else { + } else if rect_info.sample_type == SAMPLE_TYPE_NV12 { + let texture_dimensions = Vec2(textureDimensions(texture_uint).xy); + let coord = in.texcoord * texture_dimensions; + if tex_filter(coord) == FILTER_NEAREST { + // nearest + normalized_value = (Vec4(decode_nv12(texture_uint, + clamp_to_edge_nearest_neighbor(coord, texture_dimensions)))); + } else { + // bilinear + let v00 = (Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2(-0.5, -0.5), texture_dimensions)))); + let v01 = (Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2(-0.5, 0.5), texture_dimensions)))); + let v10 = (Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2( 0.5, -0.5), texture_dimensions)))); + let v11 = (Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2( 0.5, 0.5), texture_dimensions)))); + normalized_value = filter_bilinear(coord, v00, v01, v10, v11); + } + } + + else { return ERROR_RGBA; // unknown sample type } diff --git a/crates/re_renderer/src/renderer/mod.rs b/crates/re_renderer/src/renderer/mod.rs index d44ccf85a0d8..80e28f0ca4de 100644 --- a/crates/re_renderer/src/renderer/mod.rs +++ b/crates/re_renderer/src/renderer/mod.rs @@ -21,8 +21,8 @@ pub use test_triangle::TestTriangleDrawData; mod rectangles; pub use rectangles::{ - ColorMapper, ColormappedTexture, RectangleDrawData, RectangleOptions, TextureFilterMag, - TextureFilterMin, TexturedRect, + ColorMapper, ColormappedTexture, RectangleDrawData, RectangleOptions, TextureEncoding, + TextureFilterMag, TextureFilterMin, TexturedRect, }; mod mesh_renderer; diff --git a/crates/re_renderer/src/renderer/rectangles.rs b/crates/re_renderer/src/renderer/rectangles.rs index a1fc42d25a39..38fba7fa4f2d 100644 --- a/crates/re_renderer/src/renderer/rectangles.rs +++ b/crates/re_renderer/src/renderer/rectangles.rs @@ -48,6 +48,12 @@ pub enum TextureFilterMin { // TODO(andreas): Offer mipmapping here? } +/// Describes how the color information is encoded in the texture. +#[derive(Clone, Debug, PartialEq)] +pub enum TextureEncoding { + Nv12, +} + /// Describes a texture and how to map it to a color. #[derive(Clone)] pub struct ColormappedTexture { @@ -83,6 +89,9 @@ pub struct ColormappedTexture { /// Setting a color mapper for a four-component texture is an error. /// Failure to set a color mapper for a one-component texture is an error. pub color_mapper: Option, + + /// For textures that don't store color information in conventional RGB color space, you need to supply a TextureEncoding. + pub encoding: Option, } /// How to map the normalized `.r` component to a color. @@ -113,6 +122,17 @@ impl ColormappedTexture { gamma: 1.0, multiply_rgb_with_alpha: true, color_mapper: None, + encoding: None, + } + } + + pub fn image_width_height(&self) -> [u32; 2] { + match self.encoding { + Some(TextureEncoding::Nv12) => { + let [width, height] = self.texture.width_height(); + [width, (height as f64 / 1.5) as u32] + } + _ => self.texture.width_height(), } } } @@ -134,6 +154,16 @@ pub struct TexturedRect { pub options: RectangleOptions, } +impl TexturedRect { + /// The uv extent of the image, taking into account the texture encoding. + pub fn image_extent_uv(&self) -> [glam::Vec3; 2] { + match self.colormapped_texture.encoding { + Some(TextureEncoding::Nv12) => [self.extent_u, self.extent_v / 1.5], + _ => [self.extent_u, self.extent_v], + } + } +} + #[derive(Clone)] pub struct RectangleOptions { pub texture_filter_magnification: TextureFilterMag, @@ -188,7 +218,9 @@ pub enum RectangleError { } mod gpu_data { - use crate::wgpu_buffer_types; + use gltf::json::extensions::texture; + + use crate::{texture_info, wgpu_buffer_types}; use super::{ColorMapper, RectangleError, TexturedRect}; @@ -198,6 +230,7 @@ mod gpu_data { const SAMPLE_TYPE_FLOAT: u32 = 1; const SAMPLE_TYPE_SINT: u32 = 2; const SAMPLE_TYPE_UINT: u32 = 3; + const SAMPLE_TYPE_NV12: u32 = 4; // How do we do colormapping? const COLOR_MAPPER_OFF: u32 = 1; @@ -261,6 +294,7 @@ mod gpu_data { gamma, color_mapper, multiply_rgb_with_alpha, + encoding: texture_encoding, } = colormapped_texture; let super::RectangleOptions { @@ -274,7 +308,13 @@ mod gpu_data { let sample_type = match texture_format.sample_type(None) { Some(wgpu::TextureSampleType::Float { .. }) => SAMPLE_TYPE_FLOAT, Some(wgpu::TextureSampleType::Sint) => SAMPLE_TYPE_SINT, - Some(wgpu::TextureSampleType::Uint) => SAMPLE_TYPE_UINT, + Some(wgpu::TextureSampleType::Uint) => { + if texture_encoding == &Some(super::TextureEncoding::Nv12) { + SAMPLE_TYPE_NV12 + } else { + SAMPLE_TYPE_UINT + } + } _ => { return Err(RectangleError::TextureFormatNotSupported(texture_format)); } @@ -292,9 +332,10 @@ mod gpu_data { Some(ColorMapper::Texture(_)) => { color_mapper_int = COLOR_MAPPER_TEXTURE; } - None => { - return Err(RectangleError::MissingColorMapper); - } + None => match texture_encoding { + Some(super::TextureEncoding::Nv12) => color_mapper_int = COLOR_MAPPER_OFF, + _ => return Err(RectangleError::MissingColorMapper), + }, }, 4 => { if color_mapper.is_some() { @@ -304,7 +345,7 @@ mod gpu_data { } } num_components => { - return Err(RectangleError::UnsupportedComponentCount(num_components)) + return Err(RectangleError::UnsupportedComponentCount(num_components)); } } @@ -317,6 +358,15 @@ mod gpu_data { super::TextureFilterMag::Nearest => FILTER_NEAREST, }; + println!( + "ENCODING: {:?} MIN: {:?} MAG: {:?} GAMMA: {:?}, color mapper: {:?}", + rectangle.colormapped_texture.encoding, + minification_filter, + magnification_filter, + gamma, + color_mapper_int + ); + Ok(Self { top_left_corner_position: (*top_left_corner_position).into(), colormap_function, @@ -435,17 +485,17 @@ impl RectangleDrawData { bind_group: ctx.gpu_resources.bind_groups.alloc( &ctx.device, &ctx.gpu_resources, - &BindGroupDesc { + &(BindGroupDesc { label: "RectangleInstance::bind_group".into(), entries: smallvec![ uniform_buffer, BindGroupEntry::DefaultTextureView(texture_float), BindGroupEntry::DefaultTextureView(texture_sint), BindGroupEntry::DefaultTextureView(texture_uint), - BindGroupEntry::DefaultTextureView(colormap_texture), + BindGroupEntry::DefaultTextureView(colormap_texture) ], layout: rectangle_renderer.bind_group_layout, - }, + }), ), draw_outline_mask: rectangle.options.outline_mask.is_some(), }); @@ -475,7 +525,7 @@ impl Renderer for RectangleRenderer { let bind_group_layout = pools.bind_group_layouts.get_or_create( device, - &BindGroupLayoutDesc { + &(BindGroupLayoutDesc { label: "RectangleRenderer::bind_group_layout".into(), entries: vec![ wgpu::BindGroupLayoutEntry { @@ -538,15 +588,15 @@ impl Renderer for RectangleRenderer { count: None, }, ], - }, + }), ); let pipeline_layout = pools.pipeline_layouts.get_or_create( device, - &PipelineLayoutDesc { + &(PipelineLayoutDesc { label: "RectangleRenderer::pipeline_layout".into(), entries: vec![shared_data.global_bindings.layout, bind_group_layout], - }, + }), &pools.bind_group_layouts, ); @@ -591,20 +641,20 @@ impl Renderer for RectangleRenderer { ); let render_pipeline_picking_layer = pools.render_pipelines.get_or_create( device, - &RenderPipelineDesc { + &(RenderPipelineDesc { label: "RectangleRenderer::render_pipeline_picking_layer".into(), fragment_entrypoint: "fs_main_picking_layer".into(), render_targets: smallvec![Some(PickingLayerProcessor::PICKING_LAYER_FORMAT.into())], depth_stencil: PickingLayerProcessor::PICKING_LAYER_DEPTH_STATE, multisample: PickingLayerProcessor::PICKING_LAYER_MSAA_STATE, ..render_pipeline_desc_color.clone() - }, + }), &pools.pipeline_layouts, &pools.shader_modules, ); let render_pipeline_outline_mask = pools.render_pipelines.get_or_create( device, - &RenderPipelineDesc { + &(RenderPipelineDesc { label: "RectangleRenderer::render_pipeline_outline_mask".into(), fragment_entrypoint: "fs_main_outline_mask".into(), render_targets: smallvec![Some(OutlineMaskProcessor::MASK_FORMAT.into())], @@ -613,7 +663,7 @@ impl Renderer for RectangleRenderer { &shared_data.config.device_caps, ), ..render_pipeline_desc_color - }, + }), &pools.pipeline_layouts, &pools.shader_modules, ); diff --git a/crates/re_renderer/src/workspace_shaders.rs b/crates/re_renderer/src/workspace_shaders.rs index 7d999da73645..845b90623493 100644 --- a/crates/re_renderer/src/workspace_shaders.rs +++ b/crates/re_renderer/src/workspace_shaders.rs @@ -37,6 +37,12 @@ pub fn init() { fs.create_file(virtpath, content).unwrap(); } + { + let virtpath = Path::new("shader/decodings.wgsl"); + let content = include_str!("../shader/decodings.wgsl").into(); + fs.create_file(virtpath, content).unwrap(); + } + { let virtpath = Path::new("shader/depth_cloud.wgsl"); let content = include_str!("../shader/depth_cloud.wgsl").into(); diff --git a/crates/re_space_view_bar_chart/src/space_view_class.rs b/crates/re_space_view_bar_chart/src/space_view_class.rs index c99c4ecaa3a3..2912ea8e8328 100644 --- a/crates/re_space_view_bar_chart/src/space_view_class.rs +++ b/crates/re_space_view_bar_chart/src/space_view_class.rs @@ -163,6 +163,13 @@ impl SpaceViewClass for BarChartSpaceView { ); continue; } + TensorData::NV12(_) => { + re_log::warn_once!( + "trying to display NV12 data as a bar chart ({:?})", + ent_path + ); + continue; + } }; plot_ui.bar_chart(chart); diff --git a/crates/re_space_view_spatial/src/parts/images.rs b/crates/re_space_view_spatial/src/parts/images.rs index 8369c3356365..aac3bb7e72b4 100644 --- a/crates/re_space_view_spatial/src/parts/images.rs +++ b/crates/re_space_view_spatial/src/parts/images.rs @@ -108,8 +108,8 @@ fn to_textured_rect( .world_from_entity .transform_vector3(glam::Vec3::X * width as f32), extent_v: ent_context - .world_from_entity - .transform_vector3(glam::Vec3::Y * height as f32), + .world_from_obj + .transform_vector3(glam::Vec3::Y * (height as f32) * 1.5), // TODO(zrezke): Make this based on encoding colormapped_texture, options: RectangleOptions { texture_filter_magnification, diff --git a/crates/re_space_view_spatial/src/ui_2d.rs b/crates/re_space_view_spatial/src/ui_2d.rs index eb1db9636d7f..5f75b51ee3dc 100644 --- a/crates/re_space_view_spatial/src/ui_2d.rs +++ b/crates/re_space_view_spatial/src/ui_2d.rs @@ -258,6 +258,7 @@ pub fn view_2d( .map_or(scene_rect_accum, |res| { Rect::from_min_max(egui::Pos2::ZERO, egui::pos2(res.x, res.y)) }); + println!("Pinhole resolution: {:?}", pinhole); let (desired_size, offset) = state .state_2d diff --git a/crates/re_space_view_tensor/src/space_view_class.rs b/crates/re_space_view_tensor/src/space_view_class.rs index 1f26ded0d93b..2815809c0201 100644 --- a/crates/re_space_view_tensor/src/space_view_class.rs +++ b/crates/re_space_view_tensor/src/space_view_class.rs @@ -264,10 +264,10 @@ fn view_tensor( .iter() .any(|selector| selector.visible) { - egui::Frame { + (egui::Frame { inner_margin: egui::Margin::symmetric(16.0, 8.0), ..Default::default() - } + }) .show(ui, |ui| { ui.spacing_mut().item_spacing = default_item_spacing; // keep the default spacing between sliders selectors_ui(ui, state, tensor); @@ -335,7 +335,7 @@ fn paint_tensor_slice( &tensor_stats, state, )?; - let [width, height] = colormapped_texture.texture.width_height(); + let [width, height] = colormapped_texture.image_width_height(); let img_size = egui::vec2(width as _, height as _); let img_size = Vec2::max(Vec2::splat(1.0), img_size); // better safe than sorry @@ -743,7 +743,7 @@ fn selectors_ui(ui: &mut egui::Ui, state: &mut PerTensorState, tensor: &TensorDa // Make the slider as big as needed: const MIN_SLIDER_WIDTH: f32 = 64.0; if ui.available_width() >= MIN_SLIDER_WIDTH { - ui.spacing_mut().slider_width = (size as f32 * 4.0) + ui.spacing_mut().slider_width = ((size as f32) * 4.0) .at_least(MIN_SLIDER_WIDTH) .at_most(ui.available_width()); ui.add(egui::Slider::new(selector_value, 0..=size - 1).show_value(false)) diff --git a/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs b/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs index 1d79f722133a..a62b494a1cd7 100644 --- a/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs +++ b/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs @@ -1,6 +1,7 @@ +use re_components::{DecodedTensor, TensorCastError, TensorData, TensorDataType}; use re_data_store::VersionedInstancePathHash; use re_renderer::{ - renderer::ColormappedTexture, + renderer::{ColormappedTexture, TextureEncoding}, resource_managers::{GpuTexture2D, Texture2DCreationDesc, TextureManager2DError}, }; use re_types::tensor_data::{DecodedTensor, TensorCastError, TensorDataType}; @@ -47,6 +48,10 @@ pub fn colormapped_texture( color_mapper: Some(re_renderer::renderer::ColorMapper::Function( color_mapping.map, )), + encoding: match &tensor.data { + &TensorData::NV12(_) => Some(TextureEncoding::Nv12), + _ => None, + }, }) } diff --git a/crates/re_types/definitions/rerun/datatypes/tensor_buffer.fbs b/crates/re_types/definitions/rerun/datatypes/tensor_buffer.fbs index 0e699f137961..23a3db0c2433 100644 --- a/crates/re_types/definitions/rerun/datatypes/tensor_buffer.fbs +++ b/crates/re_types/definitions/rerun/datatypes/tensor_buffer.fbs @@ -52,6 +52,9 @@ table F64Buffer(order: 100, transparent) { table JPEGBuffer(order: 100, transparent) { data: [ubyte] (order: 100); } +table NV12Buffer(order: 100, transparent) { + data: [ubyte] (order: 100); +} /// The underlying storage for a `Tensor`. @@ -73,4 +76,5 @@ union TensorBuffer ( F32: F32Buffer (transparent, order:1000), F64: F64Buffer (transparent, order:1200), JPEG: JPEGBuffer (transparent, order:1300), + NV12: NV12Buffer (transparent, order:1400), } diff --git a/crates/re_types/src/datatypes/tensor_buffer.rs b/crates/re_types/src/datatypes/tensor_buffer.rs index 550ac355d1ed..d85dd72d97da 100644 --- a/crates/re_types/src/datatypes/tensor_buffer.rs +++ b/crates/re_types/src/datatypes/tensor_buffer.rs @@ -31,6 +31,7 @@ pub enum TensorBuffer { F32(crate::ArrowBuffer), F64(crate::ArrowBuffer), Jpeg(crate::ArrowBuffer), + Nv12(crate::ArrowBuffer), } impl<'a> From for ::std::borrow::Cow<'a, TensorBuffer> { @@ -199,9 +200,21 @@ impl crate::Loggable for TensorBuffer { is_nullable: false, metadata: [].into(), }, + Field { + name: "NV12".to_owned(), + data_type: DataType::List(Box::new(Field { + name: "item".to_owned(), + data_type: DataType::UInt8, + is_nullable: false, + metadata: [].into(), + })), + is_nullable: false, + metadata: [].into(), + }, ], Some(vec![ 0i32, 1i32, 2i32, 3i32, 4i32, 5i32, 6i32, 7i32, 8i32, 9i32, 10i32, 11i32, 12i32, + 13i32, ]), UnionMode::Dense, ) @@ -241,6 +254,7 @@ impl crate::Loggable for TensorBuffer { Some(TensorBuffer::F32(_)) => 10i8, Some(TensorBuffer::F64(_)) => 11i8, Some(TensorBuffer::Jpeg(_)) => 12i8, + Some(TensorBuffer::Nv12(_)) => 13i8, }) .collect(), vec![ @@ -890,6 +904,60 @@ impl crate::Loggable for TensorBuffer { .boxed() } }, + { + let (somes, nv12): (Vec<_>, Vec<_>) = data + .iter() + .filter(|datum| matches!(datum.as_deref(), Some(TensorBuffer::Nv12(_)))) + .map(|datum| { + let datum = match datum.as_deref() { + Some(TensorBuffer::Nv12(v)) => Some(v.clone()), + _ => None, + }; + (datum.is_some(), datum) + }) + .unzip(); + let nv12_bitmap: Option<::arrow2::bitmap::Bitmap> = { + let any_nones = somes.iter().any(|some| !*some); + any_nones.then(|| somes.into()) + }; + { + use arrow2::{buffer::Buffer, offset::OffsetsBuffer}; + let nv12_inner_data: Buffer<_> = nv12 + .iter() + .flatten() + .map(|b| b.as_slice()) + .collect::>() + .concat() + .into(); + let nv12_inner_bitmap: Option<::arrow2::bitmap::Bitmap> = None; + let offsets = ::arrow2::offset::Offsets::::try_from_lengths( + nv12.iter().map(|opt| { + opt.as_ref() + .map(|datum| datum.num_instances()) + .unwrap_or_default() + }), + ) + .unwrap() + .into(); + ListArray::new( + DataType::List(Box::new(Field { + name: "item".to_owned(), + data_type: DataType::UInt8, + is_nullable: false, + metadata: [].into(), + })), + offsets, + PrimitiveArray::new( + DataType::UInt8, + nv12_inner_data, + nv12_inner_bitmap, + ) + .boxed(), + u8_bitmap, + ) + .boxed() + } + }, ], Some({ let mut u8_offset = 0; @@ -904,6 +972,7 @@ impl crate::Loggable for TensorBuffer { let mut f32_offset = 0; let mut f64_offset = 0; let mut jpeg_offset = 0; + let mut nv12_offset = 0; let mut nulls_offset = 0; data.iter() .map(|v| match v.as_deref() { @@ -972,6 +1041,11 @@ impl crate::Loggable for TensorBuffer { jpeg_offset += 1; offset } + Some(TensorBuffer::Nv12(_)) => { + let offset = nv12_offset; + nv12_offset += 1; + offset + } }) .collect() }), @@ -1135,10 +1209,21 @@ impl crate::Loggable for TensorBuffer { is_nullable: false, metadata: [].into(), }, + Filed { + name: "NV12".to_owned(), + data_type: DataType::List(Box::new(Field { + name: "item".to_owned(), + data_type: DataType::UInt8, + is_nullable: false, + metadata: [].into(), + })), + is_nullable: false, + metadata: [].into(), + }, ], Some(vec![ 0i32, 1i32, 2i32, 3i32, 4i32, 5i32, 6i32, 7i32, 8i32, 9i32, 10i32, - 11i32, 12i32, + 11i32, 12i32, 13i32, ]), UnionMode::Dense, ), @@ -2019,6 +2104,77 @@ impl crate::Loggable for TensorBuffer { } .collect::>() }; + let nv12 = { + if 1usize >= arrow_data_arrays.len() { + return Ok(Vec::new()); + } + let arrow_data = &*arrow_data_arrays[1usize]; + { + let arrow_data = arrow_data + .as_any() + .downcast_ref::<::arrow2::array::ListArray>() + .ok_or_else(|| { + crate::DeserializationError::datatype_mismatch( + DataType::List(Box::new(Field { + name: "item".to_owned(), + data_type: DataType::UInt8, + is_nullable: false, + metadata: [].into(), + })), + arrow_data.data_type().clone(), + ) + }) + .with_context("rerun.datatypes.TensorBuffer#U8")?; + if arrow_data.is_empty() { + Vec::new() + } else { + let arrow_data_inner = { + let arrow_data_inner = &**arrow_data.values(); + arrow_data_inner + .as_any() + .downcast_ref::() + .ok_or_else(|| { + crate::DeserializationError::datatype_mismatch( + DataType::UInt8, + arrow_data_inner.data_type().clone(), + ) + }) + .with_context("rerun.datatypes.TensorBuffer#U8")? + .values() + }; + let offsets = arrow_data.offsets(); + arrow2::bitmap::utils::ZipValidity::new_with_validity( + offsets.iter().zip(offsets.lengths()), + arrow_data.validity(), + ) + .map(|elem| { + elem.map(|(start, len)| { + let start = *start as usize; + let end = start + len; + if end as usize > arrow_data_inner.len() { + return Err(crate::DeserializationError::offset_slice_oob( + (start, end), + arrow_data_inner.len(), + )); + } + + #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] + let data = unsafe { + arrow_data_inner + .clone() + .sliced_unchecked(start as usize, end - start as usize) + }; + let data = crate::ArrowBuffer::from(data); + Ok(data) + }) + .transpose() + }) + .collect::>>>()? + } + .into_iter() + } + .collect::>() + }; arrow_data_types .iter() .enumerate() diff --git a/crates/re_types/src/datatypes/tensor_data_ext.rs b/crates/re_types/src/datatypes/tensor_data_ext.rs index 910a4c3914b6..591239810350 100644 --- a/crates/re_types/src/datatypes/tensor_data_ext.rs +++ b/crates/re_types/src/datatypes/tensor_data_ext.rs @@ -46,28 +46,36 @@ impl TensorData { /// If the tensor can be interpreted as an image, return the height, width, and channels/depth of it. pub fn image_height_width_channels(&self) -> Option<[u64; 3]> { let shape_short = self.shape_short(); - - match shape_short.len() { - 1 => { - // Special case: Nx1(x1x1x …) tensors are treated as Nx1 gray images. - // Special case: Nx1(x1x1x …) tensors are treated as Nx1 gray images. - if self.shape.len() >= 2 { - Some([shape_short[0].size, 1, 1]) - } else { - None - } + match &self.buffer { + TensorBuffer::Nv12(_) => { + // NV12 encodes a color image in 1.5 "channels" -> 1 luma (per pixel) + (1U+1V) / 4 pixels. + // Return the logical RGB size. + Some([((y.size as f64) / 1.5) as u64, x.size, 3]) } - 2 => Some([shape_short[0].size, shape_short[1].size, 1]), - 3 => { - let channels = shape_short[2].size; - if matches!(channels, 3 | 4) { - // rgb, rgba - Some([shape_short[0].size, shape_short[1].size, channels]) - } else { - None + _ => { + match shape_short.len() { + 1 => { + // Special case: Nx1(x1x1x …) tensors are treated as Nx1 gray images. + // Special case: Nx1(x1x1x …) tensors are treated as Nx1 gray images. + if self.shape.len() >= 2 { + Some([shape_short[0].size, 1, 1]) + } else { + None + } + } + 2 => Some([shape_short[0].size, shape_short[1].size, 1]), + 3 => { + let channels = shape_short[2].size; + if matches!(channels, 3 | 4) { + // rgb, rgba + Some([shape_short[0].size, shape_short[1].size, channels]) + } else { + None + } + } + _ => None, } } - _ => None, } } @@ -154,6 +162,52 @@ impl TensorData { TensorBuffer::F32(buf) => Some(TensorElement::F32(buf[offset])), TensorBuffer::F64(buf) => Some(TensorElement::F64(buf[offset])), TensorBuffer::Jpeg(_) => None, // Too expensive to unpack here. + TensorBuffer::Nv12(_) => { + { + // Returns the U32 packed RGBA value of the pixel at index [y, x] if it is valid. + let [y, x] = index else { + return None; + }; + if let Some( + [TensorElement::U8(r), TensorElement::U8(g), TensorElement::U8(b)], + ) = self.get_nv12_pixel(*x, *y) + { + let mut rgba = 0; + rgba |= (r as u32) << 24; + rgba |= (g as u32) << 16; + rgba |= (b as u32) << 8; + rgba |= 0xff; + Some(TensorElement::U32(rgba)) + } else { + None + } + } + } + } + } + + pub fn get_nv12_pixel(&self, x: u64, y: u64) -> Option<[TensorElement; 3]> { + let TensorBuffer::Nv12(buf) = &self.buffer else { + return None; + }; + match self.image_height_width_channels() { + Some([h, w, _]) => { + let uv_offset = (w * h) as u64; + let luma = ((buf[(y * w + x) as usize] as f64) - 16.0) / 216.0; + let u = ((buf[(uv_offset + (y / 2) * w + x) as usize] as f64) - 128.0) / 224.0; + let v = + ((buf[((uv_offset + (y / 2) * w + x) as usize) + 1] as f64) - 128.0) / 224.0; + let r = luma + 1.402 * v; + let g = luma - 0.344 * u + 0.714 * v; + let b = luma + 1.772 * u; + + Some([ + TensorElement::U8(f64::clamp(r * 255.0, 0.0, 255.0) as u8), + TensorElement::U8(f64::clamp(g * 255.0, 0.0, 255.0) as u8), + TensorElement::U8(f64::clamp(b * 255.0, 0.0, 255.0) as u8), + ]) + } + _ => None, } } @@ -251,7 +305,6 @@ macro_rules! tensor_type { }; } -tensor_type!(u8, U8); tensor_type!(u16, U16); tensor_type!(u32, U32); tensor_type!(u64, U64); @@ -266,6 +319,22 @@ tensor_type!(arrow2::types::f16, F16); tensor_type!(f32, F32); tensor_type!(f64, F64); +// Manual expension of tensor_type! macro for `half::u8` types. We need to do this, because u8 can store bytes that carry encoded data +impl<'a> TryFrom<&'a Tensor> for ::ndarray::ArrayViewD<'a, u8> { + type Error = TensorCastError; + + fn try_from(value: &'a Tensor) -> Result { + match &value.data { + TensorBuffer::U8(data) | TensorBuffer::Nv12(data) => { + let shape: Vec<_> = value.shape.iter().map(|d| d.size as usize).collect(); + ndarray::ArrayViewD::from_shape(shape, bytemuck::cast_slice(data.as_slice())) + .map_err(|err| TensorCastError::BadTensorShape { source: err }) + } + _ => Err(TensorCastError::TypeMismatch), + } + } +} + // Manual expansion of tensor_type! macro for `half::f16` types. We need to do this // because arrow uses its own half type. The two use the same underlying representation // but are still distinct types. `half::f16`, however, is more full-featured and diff --git a/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs b/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs index 40532742eb2a..1762b12e9c4f 100644 --- a/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs +++ b/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs @@ -16,7 +16,7 @@ use wgpu::TextureFormat; use re_renderer::{ pad_rgb_to_rgba, - renderer::{ColorMapper, ColormappedTexture}, + renderer::{ColorMapper, ColormappedTexture, TextureEncoding}, resource_managers::Texture2DCreationDesc, RenderContext, }; @@ -89,7 +89,10 @@ pub fn color_tensor_to_gpu( let [height, width, depth] = height_width_depth(tensor)?; let texture_handle = try_get_or_create_texture(render_ctx, hash(tensor_path_hash), || { - let (data, format) = match (depth, &tensor.buffer) { + let (data, format) = match (depth, &tensor.data) { + (3, TensorData::NV12(buf)) => { + (cast_slice_to_cow(buf.as_slice()), TextureFormat::R8Uint) + } // Normalize sRGB(A) textures to 0-1 range, and let the GPU premultiply alpha. // Why? Because premul must happen _before_ sRGB decode, so we can't // use a "Srgb-aware" texture like `Rgba8UnormSrgb` for RGBA. @@ -121,6 +124,10 @@ pub fn color_tensor_to_gpu( let decode_srgb = texture_format == TextureFormat::Rgba8Unorm || super::tensor_decode_srgb_gamma_heuristic(tensor_stats, tensor.dtype(), depth)?; + let encoding = match &tensor.data { + &TensorData::NV12(_) => Some(TextureEncoding::Nv12), + _ => None, + }; // Special casing for normalized textures used above: let range = if matches!( texture_format, @@ -134,7 +141,8 @@ pub fn color_tensor_to_gpu( super::tensor_data_range_heuristic(tensor_stats, tensor.dtype())? }; - let color_mapper = if texture_format.components() == 1 { + println!("texture components: {}", texture_format.components()); + let color_mapper = if encoding.is_none() && texture_format.components() == 1 { // Single-channel images = luminance = grayscale Some(ColorMapper::Function(re_renderer::Colormap::Grayscale)) } else { @@ -145,13 +153,20 @@ pub fn color_tensor_to_gpu( // Assume that the texture is not pre-multiplied if it has an alpha channel. let multiply_rgb_with_alpha = depth == 4; + let gamma = if encoding == Some(TextureEncoding::Nv12) { + 2.2 + } else { + 1.0 + }; + Ok(ColormappedTexture { texture: texture_handle, range, decode_srgb, multiply_rgb_with_alpha, - gamma: 1.0, + gamma, color_mapper, + encoding, }) } @@ -224,6 +239,7 @@ pub fn class_id_tensor_to_gpu( multiply_rgb_with_alpha: false, // already premultiplied! gamma: 1.0, color_mapper: Some(ColorMapper::Texture(colormap_texture_handle)), + encoding: None, }) } @@ -257,6 +273,7 @@ pub fn depth_tensor_to_gpu( multiply_rgb_with_alpha: false, gamma: 1.0, color_mapper: Some(ColorMapper::Function(re_renderer::Colormap::Turbo)), + encoding: None, }) } @@ -319,6 +336,10 @@ fn general_texture_creation_desc_from_tensor<'a>( TensorBuffer::Jpeg(_) => { unreachable!("DecodedTensor cannot contain a JPEG") } + + TensorData::NV12(_) => { + unreachable!("An NV12 tensor can only contain a 3 channel image.") + } } } 2 => { @@ -341,6 +362,9 @@ fn general_texture_creation_desc_from_tensor<'a>( TensorBuffer::Jpeg(_) => { unreachable!("DecodedTensor cannot contain a JPEG") } + TensorData::NV12(_) => { + unreachable!("An NV12 tensor can only contain a 3 channel image.") + } } } 3 => { @@ -384,6 +408,9 @@ fn general_texture_creation_desc_from_tensor<'a>( TensorBuffer::Jpeg(_) => { unreachable!("DecodedTensor cannot contain a JPEG") } + TensorData::NV12(buf) => { + (cast_slice_to_cow(buf.as_slice()), TextureFormat::R8Unorm) + } } } 4 => { @@ -406,6 +433,9 @@ fn general_texture_creation_desc_from_tensor<'a>( TensorBuffer::Jpeg(_) => { unreachable!("DecodedTensor cannot contain a JPEG") } + TensorData::NV12(_) => { + unreachable!("An NV12 tensor can only contain a 3 channel image.") + } } } depth => { @@ -483,9 +513,13 @@ fn pad_and_narrow_and_cast( fn height_width_depth(tensor: &TensorData) -> anyhow::Result<[u32; 3]> { use anyhow::Context as _; - let Some([height, width, channel]) = tensor.image_height_width_channels() else { + let Some([mut height, width, channel]) = tensor.image_height_width_channels() else { anyhow::bail!("Tensor is not an image"); }; + height = match tensor.data { + TensorData::NV12(_) => height * 3 / 2, + _ => height, + }; let [height, width] = [ u32::try_from(height).context("Image height is too large")?, diff --git a/crates/re_viewer_context/src/tensor/tensor_stats.rs b/crates/re_viewer_context/src/tensor/tensor_stats.rs index 40b677a0082e..3bf63a995391 100644 --- a/crates/re_viewer_context/src/tensor/tensor_stats.rs +++ b/crates/re_viewer_context/src/tensor/tensor_stats.rs @@ -16,7 +16,7 @@ impl TensorStats { use re_types::tensor_data::TensorDataType; macro_rules! declare_tensor_range_int { - ($name: ident, $typ: ty) => { + ($name:ident, $typ:ty) => { fn $name(tensor: ndarray::ArrayViewD<'_, $typ>) -> (f64, f64) { re_tracing::profile_function!(); let (min, max) = tensor @@ -29,7 +29,7 @@ impl TensorStats { } macro_rules! declare_tensor_range_float { - ($name: ident, $typ: ty) => { + ($name:ident, $typ:ty) => { fn $name(tensor: ndarray::ArrayViewD<'_, $typ>) -> (f64, f64) { re_tracing::profile_function!(); let (min, max) = tensor.fold( @@ -59,15 +59,14 @@ impl TensorStats { #[allow(clippy::needless_pass_by_value)] fn tensor_range_f16(tensor: ndarray::ArrayViewD<'_, f16>) -> (f64, f64) { re_tracing::profile_function!(); - let (min, max) = tensor - .fold((f16::INFINITY, f16::NEG_INFINITY), |(min, max), &value| { - (min.min(value), max.max(value)) - }); + let (min, max) = tensor.fold((f16::INFINITY, f16::NEG_INFINITY), |(min, max), &value| { + (min.min(value), max.max(value)) + }); (min.to_f64(), max.to_f64()) } macro_rules! declare_tensor_finite_range_float { - ($name: ident, $typ: ty) => { + ($name:ident, $typ:ty) => { fn $name(tensor: ndarray::ArrayViewD<'_, $typ>) -> (f64, f64) { re_tracing::profile_function!(); let (min, max) = tensor.fold( @@ -93,14 +92,9 @@ impl TensorStats { #[allow(clippy::needless_pass_by_value)] fn tensor_finite_range_f16(tensor: ndarray::ArrayViewD<'_, f16>) -> (f64, f64) { re_tracing::profile_function!(); - let (min, max) = - tensor.fold((f16::INFINITY, f16::NEG_INFINITY), |(min, max), &value| { - if value.is_finite() { - (min.min(value), max.max(value)) - } else { - (min, max) - } - }); + let (min, max) = tensor.fold((f16::INFINITY, f16::NEG_INFINITY), |(min, max), &value| { + if value.is_finite() { (min.min(value), max.max(value)) } else { (min, max) } + }); (min.to_f64(), max.to_f64()) } @@ -119,15 +113,18 @@ impl TensorStats { TensorDataType::F64 => ArrayViewD::::try_from(tensor).map(tensor_range_f64), }; - let finite_range = if range - .as_ref() - .ok() - .map_or(true, |r| r.0.is_finite() && r.1.is_finite()) + println!("range: {:?} for tensor: {:?}", range, tensor.data); + + let finite_range = if + range + .as_ref() + .ok() + .map_or(true, |r| r.0.is_finite() && r.1.is_finite()) { range.clone().ok() } else { let finite_range = match tensor.dtype() { - TensorDataType::U8 + | TensorDataType::U8 | TensorDataType::U16 | TensorDataType::U32 | TensorDataType::U64 @@ -149,11 +146,7 @@ impl TensorStats { // If we didn't find a finite range, set it to None. finite_range.ok().and_then(|r| { - if r.0.is_finite() && r.1.is_finite() { - Some(r) - } else { - None - } + if r.0.is_finite() && r.1.is_finite() { Some(r) } else { None } }) }; diff --git a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py index 050a7cc1ae1a..623b01a7d53b 100644 --- a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py +++ b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py @@ -41,6 +41,45 @@ def _to_numpy(tensor: TensorLike) -> npt.NDArray[Any]: return np.array(tensor, copy=False) +class ImageEncoding: + NV12: "NV12" + JPEG: "JPEG" + + @property + def name(self) -> str: + raise NotImplementedError + + +class NV12(ImageEncoding): + width: int | None + height: int | None + + def __init__(self, width: int | None = None, height: int | None = None): + if width is None and height is None: + raise ValueError("ImageEncodingNv12 needs to carry at least one width or height.") + self.width = width + self.height = height + + @property + def name(self) -> str: + return f"NV12" + + +class JPEG(ImageEncoding): + quality: int + + def __init__(self, quality: int): + self.quality = quality + + @property + def name(self) -> str: + return f"JPEG" + + +ImageEncoding.NV12 = NV12 +ImageEncoding.JPEG = JPEG + + class TensorDataExt: # TODO(jleibs): Should also provide custom converters for shape / buffer # assignment that prevent the user from putting the TensorData into an @@ -53,7 +92,7 @@ def __init__( buffer: TensorBufferLike | None = None, array: TensorLike | None = None, dim_names: Sequence[str | None] | None = None, - jpeg_quality: int | None = None, + encoding: ImageEncoding | None = None, ) -> None: """ Construct a `TensorData` object. @@ -141,25 +180,30 @@ def __init__( # This shouldn't be possible but typing can't figure it out raise ValueError("No shape provided.") - if jpeg_quality is not None: - if array is None: - _send_warning("Can only compress JPEG if an array is provided", 2) - else: - if array.dtype not in ["uint8", "sint32", "float32"]: - # Convert to a format supported by Image.fromarray - array = array.astype("float32") - - pil_image = Image.fromarray(array) - output = BytesIO() - pil_image.save(output, format="JPEG", quality=jpeg_quality) - jpeg_bytes = output.getvalue() - output.close() - jpeg_array = np.frombuffer(jpeg_bytes, dtype=np.uint8) - # self.buffer = TensorBuffer(inner=jpeg_array, kind="jpeg") # TODO(emilk): something like this should work? - self.buffer = TensorBuffer(jpeg_array) - self.buffer.kind = "jpeg" - return - + if encoding is not None: + if isinstance(encoding, ImageEncoding.JPEG): + if array is None: + _send_warning("Can only compress JPEG if an array is provided", 2) + else: + if array.dtype not in ["uint8", "sint32", "float32"]: + # Convert to a format supported by Image.fromarray + array = array.astype("float32") + + pil_image = Image.fromarray(array) + output = BytesIO() + pil_image.save(output, format="JPEG", quality=encoding.quality) + jpeg_bytes = output.getvalue() + output.close() + jpeg_array = np.frombuffer(jpeg_bytes, dtype=np.uint8) + # self.buffer = TensorBuffer(inner=jpeg_array, kind="jpeg") # TODO(emilk): something like this should work? + self.buffer = TensorBuffer(jpeg_array) + self.buffer.kind = "jpeg" + return + elif isinstance(encoding, ImageEncoding.NV12): + if array is None: + _send_warning("Can only compress NV12 if an array is provided", 2) + else: + self.buffer = TensorBuffer(array, "nv12") if buffer is not None: self.buffer = _tensor_data__buffer__special_field_converter_override(buffer) elif array is not None: diff --git a/rerun_py/rerun_sdk/rerun/log_deprecated/image.py b/rerun_py/rerun_sdk/rerun/log_deprecated/image.py index 0f1a4008252b..5c72cfd21e9e 100644 --- a/rerun_py/rerun_sdk/rerun/log_deprecated/image.py +++ b/rerun_py/rerun_sdk/rerun/log_deprecated/image.py @@ -28,7 +28,7 @@ def log_image( ext: dict[str, Any] | None = None, timeless: bool = False, recording: RecordingStream | None = None, - jpeg_quality: int | None = None, + encoding: ImageEncoding | None = None, ) -> None: """ Log a gray or color image. @@ -73,7 +73,7 @@ def log_image( """ - tensor_data = TensorData(array=image, jpeg_quality=jpeg_quality) + tensor_data = TensorData(array=image, encoding=encoding) log(entity_path, Image(tensor_data, draw_order=draw_order), ext=ext, timeless=timeless, recording=recording) From a95e1900f0326b1b521b38202ce0f4aacf9bf006 Mon Sep 17 00:00:00 2001 From: zrezke Date: Fri, 29 Sep 2023 13:20:19 +0200 Subject: [PATCH 02/20] Cleanup and fix nv12 image support --- crates/re_data_ui/src/image.rs | 6 +- crates/re_renderer/shader/rectangle_vs.wgsl | 3 + crates/re_renderer/src/renderer/rectangles.rs | 25 +----- .../src/space_view_class.rs | 2 +- .../re_space_view_spatial/src/parts/images.rs | 4 +- crates/re_space_view_spatial/src/picking.rs | 8 +- crates/re_space_view_spatial/src/ui_2d.rs | 1 - .../src/space_view_class.rs | 2 +- .../src/tensor_slice_to_gpu.rs | 10 ++- .../re_types/src/datatypes/tensor_buffer.rs | 27 ++++-- .../src/datatypes/tensor_buffer_ext.rs | 5 +- .../re_types/src/datatypes/tensor_data_ext.rs | 15 ++-- crates/re_types/src/tensor_data.rs | 5 +- .../src/gpu_bridge/colormap.rs | 1 + .../src/gpu_bridge/tensor_to_gpu.rs | 23 ++--- .../src/tensor/tensor_stats.rs | 37 ++++---- .../src/rerun/datatypes/tensor_buffer.cpp | 19 ++++ .../src/rerun/datatypes/tensor_buffer.hpp | 20 +++++ rerun_py/rerun_sdk/rerun/_image.py | 78 ++++++++++++---- .../rerun/datatypes/tensor_buffer.py | 10 ++- .../rerun_sdk/rerun/datatypes/tensor_data.py | 6 ++ .../rerun/datatypes/tensor_data_ext.py | 89 +++++-------------- .../rerun_sdk/rerun/log_deprecated/image.py | 5 +- 23 files changed, 230 insertions(+), 171 deletions(-) diff --git a/crates/re_data_ui/src/image.rs b/crates/re_data_ui/src/image.rs index 3dbd56826fb2..fbf9b0044b1b 100644 --- a/crates/re_data_ui/src/image.rs +++ b/crates/re_data_ui/src/image.rs @@ -122,7 +122,6 @@ fn tensor_ui( .on_hover_ui(|ui| { // Show larger image on hover let max_size = Vec2::splat(400.0); - println!("THIS IS HOVER:"); show_image_at_max_size( ctx.render_ctx, ctx.re_ui, @@ -145,7 +144,7 @@ fn tensor_ui( ui.label(format!( "{} x {}{}", tensor.dtype(), - format_tensor_shape_single_line(tensor.shape()), + format_tensor_shape_single_line(shape.as_slice()), if original_tensor.buffer.is_compressed_image() { " (compressed)" } else { @@ -238,7 +237,7 @@ fn tensor_ui( } fn texture_size(colormapped_texture: &ColormappedTexture) -> Vec2 { - let [w, h] = colormapped_texture.image_width_height(); + let [w, h] = colormapped_texture.width_height(); egui::vec2(w as f32, h as f32) } @@ -256,7 +255,6 @@ fn show_image_at_max_size( desired_size *= (max_size.y / desired_size.y).min(1.0); desired_size }; - println!("Desired size: {:?}", desired_size); let (response, painter) = ui.allocate_painter(desired_size, egui::Sense::hover()); if let Err(err) = gpu_bridge::render_image( diff --git a/crates/re_renderer/shader/rectangle_vs.wgsl b/crates/re_renderer/shader/rectangle_vs.wgsl index dddbed08cce2..c0222b35dc6f 100644 --- a/crates/re_renderer/shader/rectangle_vs.wgsl +++ b/crates/re_renderer/shader/rectangle_vs.wgsl @@ -10,6 +10,9 @@ fn vs_main(@builtin(vertex_index) v_idx: u32) -> VertexOut { var out: VertexOut; out.position = apply_depth_offset(frame.projection_from_world * Vec4(pos, 1.0), rect_info.depth_offset); out.texcoord = texcoord; + if rect_info.sample_type == SAMPLE_TYPE_NV12 { + out.texcoord.y /= 1.5; + } return out; } diff --git a/crates/re_renderer/src/renderer/rectangles.rs b/crates/re_renderer/src/renderer/rectangles.rs index 38fba7fa4f2d..b4095f94d22e 100644 --- a/crates/re_renderer/src/renderer/rectangles.rs +++ b/crates/re_renderer/src/renderer/rectangles.rs @@ -126,7 +126,7 @@ impl ColormappedTexture { } } - pub fn image_width_height(&self) -> [u32; 2] { + pub fn width_height(&self) -> [u32; 2] { match self.encoding { Some(TextureEncoding::Nv12) => { let [width, height] = self.texture.width_height(); @@ -154,16 +154,6 @@ pub struct TexturedRect { pub options: RectangleOptions, } -impl TexturedRect { - /// The uv extent of the image, taking into account the texture encoding. - pub fn image_extent_uv(&self) -> [glam::Vec3; 2] { - match self.colormapped_texture.encoding { - Some(TextureEncoding::Nv12) => [self.extent_u, self.extent_v / 1.5], - _ => [self.extent_u, self.extent_v], - } - } -} - #[derive(Clone)] pub struct RectangleOptions { pub texture_filter_magnification: TextureFilterMag, @@ -218,9 +208,7 @@ pub enum RectangleError { } mod gpu_data { - use gltf::json::extensions::texture; - - use crate::{texture_info, wgpu_buffer_types}; + use crate::wgpu_buffer_types; use super::{ColorMapper, RectangleError, TexturedRect}; @@ -358,15 +346,6 @@ mod gpu_data { super::TextureFilterMag::Nearest => FILTER_NEAREST, }; - println!( - "ENCODING: {:?} MIN: {:?} MAG: {:?} GAMMA: {:?}, color mapper: {:?}", - rectangle.colormapped_texture.encoding, - minification_filter, - magnification_filter, - gamma, - color_mapper_int - ); - Ok(Self { top_left_corner_position: (*top_left_corner_position).into(), colormap_function, diff --git a/crates/re_space_view_bar_chart/src/space_view_class.rs b/crates/re_space_view_bar_chart/src/space_view_class.rs index 2912ea8e8328..e3285e850b95 100644 --- a/crates/re_space_view_bar_chart/src/space_view_class.rs +++ b/crates/re_space_view_bar_chart/src/space_view_class.rs @@ -163,7 +163,7 @@ impl SpaceViewClass for BarChartSpaceView { ); continue; } - TensorData::NV12(_) => { + TensorBuffer::Nv12(_) => { re_log::warn_once!( "trying to display NV12 data as a bar chart ({:?})", ent_path diff --git a/crates/re_space_view_spatial/src/parts/images.rs b/crates/re_space_view_spatial/src/parts/images.rs index aac3bb7e72b4..78858f779d05 100644 --- a/crates/re_space_view_spatial/src/parts/images.rs +++ b/crates/re_space_view_spatial/src/parts/images.rs @@ -108,8 +108,8 @@ fn to_textured_rect( .world_from_entity .transform_vector3(glam::Vec3::X * width as f32), extent_v: ent_context - .world_from_obj - .transform_vector3(glam::Vec3::Y * (height as f32) * 1.5), // TODO(zrezke): Make this based on encoding + .world_from_entity + .transform_vector3(glam::Vec3::Y * (height as f32)), colormapped_texture, options: RectangleOptions { texture_filter_magnification, diff --git a/crates/re_space_view_spatial/src/picking.rs b/crates/re_space_view_spatial/src/picking.rs index 0cc200950c9f..761c5ded5b87 100644 --- a/crates/re_space_view_spatial/src/picking.rs +++ b/crates/re_space_view_spatial/src/picking.rs @@ -264,15 +264,13 @@ fn picking_textured_rects(context: &PickingContext, images: &[ViewerImage]) -> V let v = dir_from_rect_top_left.dot(rect.extent_v) / rect.extent_v.length_squared(); if (0.0..=1.0).contains(&u) && (0.0..=1.0).contains(&v) { + let [width, height] = rect.colormapped_texture.width_height(); hits.push(PickingRayHit { instance_path_hash: InstancePathHash { entity_path_hash: image.ent_path.hash(), instance_key: InstanceKey::from_2d_image_coordinate( - [ - (u * rect.colormapped_texture.texture.width() as f32) as u32, - (v * rect.colormapped_texture.texture.height() as f32) as u32, - ], - rect.colormapped_texture.texture.width() as u64, + [(u * width as f32) as u32, (v * height as f32) as u32], + width as u64, ), }, space_position: intersection_world, diff --git a/crates/re_space_view_spatial/src/ui_2d.rs b/crates/re_space_view_spatial/src/ui_2d.rs index 5f75b51ee3dc..eb1db9636d7f 100644 --- a/crates/re_space_view_spatial/src/ui_2d.rs +++ b/crates/re_space_view_spatial/src/ui_2d.rs @@ -258,7 +258,6 @@ pub fn view_2d( .map_or(scene_rect_accum, |res| { Rect::from_min_max(egui::Pos2::ZERO, egui::pos2(res.x, res.y)) }); - println!("Pinhole resolution: {:?}", pinhole); let (desired_size, offset) = state .state_2d diff --git a/crates/re_space_view_tensor/src/space_view_class.rs b/crates/re_space_view_tensor/src/space_view_class.rs index 2815809c0201..b4ec653ee29b 100644 --- a/crates/re_space_view_tensor/src/space_view_class.rs +++ b/crates/re_space_view_tensor/src/space_view_class.rs @@ -335,7 +335,7 @@ fn paint_tensor_slice( &tensor_stats, state, )?; - let [width, height] = colormapped_texture.image_width_height(); + let [width, height] = colormapped_texture.width_height(); let img_size = egui::vec2(width as _, height as _); let img_size = Vec2::max(Vec2::splat(1.0), img_size); // better safe than sorry diff --git a/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs b/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs index a62b494a1cd7..4c5c103fd6e6 100644 --- a/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs +++ b/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs @@ -1,10 +1,12 @@ -use re_components::{DecodedTensor, TensorCastError, TensorData, TensorDataType}; use re_data_store::VersionedInstancePathHash; use re_renderer::{ renderer::{ColormappedTexture, TextureEncoding}, resource_managers::{GpuTexture2D, Texture2DCreationDesc, TextureManager2DError}, }; -use re_types::tensor_data::{DecodedTensor, TensorCastError, TensorDataType}; +use re_types::{ + datatypes::TensorBuffer, + tensor_data::{DecodedTensor, TensorCastError, TensorDataType}, +}; use re_viewer_context::{ gpu_bridge::{self, tensor_data_range_heuristic, RangeError}, TensorStats, @@ -48,8 +50,8 @@ pub fn colormapped_texture( color_mapper: Some(re_renderer::renderer::ColorMapper::Function( color_mapping.map, )), - encoding: match &tensor.data { - &TensorData::NV12(_) => Some(TextureEncoding::Nv12), + encoding: match &tensor.buffer { + &TensorBuffer::Nv12(_) => Some(TextureEncoding::Nv12), _ => None, }, }) diff --git a/crates/re_types/src/datatypes/tensor_buffer.rs b/crates/re_types/src/datatypes/tensor_buffer.rs index d85dd72d97da..b135d5524662 100644 --- a/crates/re_types/src/datatypes/tensor_buffer.rs +++ b/crates/re_types/src/datatypes/tensor_buffer.rs @@ -953,7 +953,7 @@ impl crate::Loggable for TensorBuffer { nv12_inner_bitmap, ) .boxed(), - u8_bitmap, + nv12_bitmap, ) .boxed() } @@ -1209,7 +1209,7 @@ impl crate::Loggable for TensorBuffer { is_nullable: false, metadata: [].into(), }, - Filed { + Field { name: "NV12".to_owned(), data_type: DataType::List(Box::new(Field { name: "item".to_owned(), @@ -2105,10 +2105,10 @@ impl crate::Loggable for TensorBuffer { .collect::>() }; let nv12 = { - if 1usize >= arrow_data_arrays.len() { + if 13usize >= arrow_data_arrays.len() { return Ok(Vec::new()); } - let arrow_data = &*arrow_data_arrays[1usize]; + let arrow_data = &*arrow_data_arrays[13usize]; { let arrow_data = arrow_data .as_any() @@ -2124,7 +2124,7 @@ impl crate::Loggable for TensorBuffer { arrow_data.data_type().clone(), ) }) - .with_context("rerun.datatypes.TensorBuffer#U8")?; + .with_context("rerun.datatypes.TensorBuffer#NV12")?; if arrow_data.is_empty() { Vec::new() } else { @@ -2139,7 +2139,7 @@ impl crate::Loggable for TensorBuffer { arrow_data_inner.data_type().clone(), ) }) - .with_context("rerun.datatypes.TensorBuffer#U8")? + .with_context("rerun.datatypes.TensorBuffer#NV12")? .values() }; let offsets = arrow_data.offsets(); @@ -2364,6 +2364,21 @@ impl crate::Loggable for TensorBuffer { .ok_or_else(crate::DeserializationError::missing_data) .with_context("rerun.datatypes.TensorBuffer#JPEG")? }), + 13i8 => TensorBuffer::Nv12({ + if offset as usize >= nv12.len() { + return Err(crate::DeserializationError::offset_oob( + offset as _, + nv12.len(), + )) + .with_context("rerun.datatypes.TensorBuffer#NV12"); + } + + #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] + unsafe { nv12.get_unchecked(offset as usize) } + .clone() + .ok_or_else(crate::DeserializationError::missing_data) + .with_context("rerun.datatypes.TensorBuffer#NV12")? + }), _ => { return Err(crate::DeserializationError::missing_union_arm( Self::arrow_datatype(), diff --git a/crates/re_types/src/datatypes/tensor_buffer_ext.rs b/crates/re_types/src/datatypes/tensor_buffer_ext.rs index 68c7b57f035b..ad563838c678 100644 --- a/crates/re_types/src/datatypes/tensor_buffer_ext.rs +++ b/crates/re_types/src/datatypes/tensor_buffer_ext.rs @@ -18,6 +18,7 @@ impl TensorBuffer { Self::F32(_) => TensorDataType::F32, Self::F64(_) => TensorDataType::F64, Self::Jpeg(_) => TensorDataType::U8, + Self::Nv12(_) => TensorDataType::U8, } } @@ -36,6 +37,7 @@ impl TensorBuffer { Self::F32(buf) => buf.size_in_bytes(), Self::F64(buf) => buf.size_in_bytes(), Self::Jpeg(buf) => buf.size_in_bytes(), + Self::Nv12(buf) => buf.size_in_bytes(), } } @@ -57,7 +59,7 @@ impl TensorBuffer { | Self::F32(_) | Self::F64(_) => false, - Self::Jpeg(_) => true, + Self::Jpeg(_) | Self::Nv12(_) => true, } } } @@ -77,6 +79,7 @@ impl std::fmt::Debug for TensorBuffer { Self::F32(_) => write!(f, "F32({} bytes)", self.size_in_bytes()), Self::F64(_) => write!(f, "F64({} bytes)", self.size_in_bytes()), Self::Jpeg(_) => write!(f, "JPEG({} bytes)", self.size_in_bytes()), + Self::Nv12(_) => write!(f, "NV12({} bytes)", self.size_in_bytes()), } } } diff --git a/crates/re_types/src/datatypes/tensor_data_ext.rs b/crates/re_types/src/datatypes/tensor_data_ext.rs index 591239810350..87ee693e1366 100644 --- a/crates/re_types/src/datatypes/tensor_data_ext.rs +++ b/crates/re_types/src/datatypes/tensor_data_ext.rs @@ -49,8 +49,11 @@ impl TensorData { match &self.buffer { TensorBuffer::Nv12(_) => { // NV12 encodes a color image in 1.5 "channels" -> 1 luma (per pixel) + (1U+1V) / 4 pixels. - // Return the logical RGB size. - Some([((y.size as f64) / 1.5) as u64, x.size, 3]) + // Return the RGB size. + match shape_short { + [h, w] => Some([(h.size as f64 / 1.5) as u64, w.size, 3]), + _ => None, + } } _ => { match shape_short.len() { @@ -319,12 +322,12 @@ tensor_type!(arrow2::types::f16, F16); tensor_type!(f32, F32); tensor_type!(f64, F64); -// Manual expension of tensor_type! macro for `half::u8` types. We need to do this, because u8 can store bytes that carry encoded data -impl<'a> TryFrom<&'a Tensor> for ::ndarray::ArrayViewD<'a, u8> { +// Manual expension of tensor_type! macro for `u8` types. We need to do this, because u8 can store encoded data +impl<'a> TryFrom<&'a TensorData> for ::ndarray::ArrayViewD<'a, u8> { type Error = TensorCastError; - fn try_from(value: &'a Tensor) -> Result { - match &value.data { + fn try_from(value: &'a TensorData) -> Result { + match &value.buffer { TensorBuffer::U8(data) | TensorBuffer::Nv12(data) => { let shape: Vec<_> = value.shape.iter().map(|d| d.size as usize).collect(); ndarray::ArrayViewD::from_shape(shape, bytemuck::cast_slice(data.as_slice())) diff --git a/crates/re_types/src/tensor_data.rs b/crates/re_types/src/tensor_data.rs index fd0e9e114943..31db020348de 100644 --- a/crates/re_types/src/tensor_data.rs +++ b/crates/re_types/src/tensor_data.rs @@ -434,7 +434,7 @@ impl TryFrom for DecodedTensor { | TensorBuffer::F16(_) | TensorBuffer::F32(_) | TensorBuffer::F64(_) => Ok(Self(tensor)), - TensorBuffer::Jpeg(_) => Err(tensor), + TensorBuffer::Jpeg(_) | TensorBuffer::Nv12(_) => Err(tensor), } } } @@ -530,7 +530,8 @@ impl DecodedTensor { | TensorBuffer::I64(_) | TensorBuffer::F16(_) | TensorBuffer::F32(_) - | TensorBuffer::F64(_) => Ok(Self(maybe_encoded_tensor)), + | TensorBuffer::F64(_) + | TensorBuffer::Nv12(_) => Ok(Self(maybe_encoded_tensor)), // Decoding happens on the GPU TensorBuffer::Jpeg(jpeg_bytes) => { let [h, w, c] = maybe_encoded_tensor diff --git a/crates/re_viewer_context/src/gpu_bridge/colormap.rs b/crates/re_viewer_context/src/gpu_bridge/colormap.rs index 0d777ae33db7..b4b31fa89be6 100644 --- a/crates/re_viewer_context/src/gpu_bridge/colormap.rs +++ b/crates/re_viewer_context/src/gpu_bridge/colormap.rs @@ -42,6 +42,7 @@ fn colormap_preview_ui( decode_srgb: false, multiply_rgb_with_alpha: false, gamma: 1.0, + encoding: None, color_mapper: Some(re_renderer::renderer::ColorMapper::Function(colormap)), }; diff --git a/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs b/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs index 1762b12e9c4f..6e233ee5c5e8 100644 --- a/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs +++ b/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs @@ -89,8 +89,8 @@ pub fn color_tensor_to_gpu( let [height, width, depth] = height_width_depth(tensor)?; let texture_handle = try_get_or_create_texture(render_ctx, hash(tensor_path_hash), || { - let (data, format) = match (depth, &tensor.data) { - (3, TensorData::NV12(buf)) => { + let (data, format) = match (depth, &tensor.buffer) { + (3, TensorBuffer::Nv12(buf)) => { (cast_slice_to_cow(buf.as_slice()), TextureFormat::R8Uint) } // Normalize sRGB(A) textures to 0-1 range, and let the GPU premultiply alpha. @@ -124,8 +124,8 @@ pub fn color_tensor_to_gpu( let decode_srgb = texture_format == TextureFormat::Rgba8Unorm || super::tensor_decode_srgb_gamma_heuristic(tensor_stats, tensor.dtype(), depth)?; - let encoding = match &tensor.data { - &TensorData::NV12(_) => Some(TextureEncoding::Nv12), + let encoding = match &tensor.buffer { + &TensorBuffer::Nv12(_) => Some(TextureEncoding::Nv12), _ => None, }; // Special casing for normalized textures used above: @@ -136,12 +136,13 @@ pub fn color_tensor_to_gpu( [0.0, 1.0] } else if texture_format == TextureFormat::R8Snorm { [-1.0, 1.0] + } else if encoding == Some(TextureEncoding::Nv12) { + [0.0, 1.0] } else { // TODO(#2341): The range should be determined by a `DataRange` component. In absence this, heuristics apply. super::tensor_data_range_heuristic(tensor_stats, tensor.dtype())? }; - println!("texture components: {}", texture_format.components()); let color_mapper = if encoding.is_none() && texture_format.components() == 1 { // Single-channel images = luminance = grayscale Some(ColorMapper::Function(re_renderer::Colormap::Grayscale)) @@ -337,7 +338,7 @@ fn general_texture_creation_desc_from_tensor<'a>( unreachable!("DecodedTensor cannot contain a JPEG") } - TensorData::NV12(_) => { + TensorBuffer::Nv12(_) => { unreachable!("An NV12 tensor can only contain a 3 channel image.") } } @@ -362,7 +363,7 @@ fn general_texture_creation_desc_from_tensor<'a>( TensorBuffer::Jpeg(_) => { unreachable!("DecodedTensor cannot contain a JPEG") } - TensorData::NV12(_) => { + TensorBuffer::Nv12(_) => { unreachable!("An NV12 tensor can only contain a 3 channel image.") } } @@ -408,7 +409,7 @@ fn general_texture_creation_desc_from_tensor<'a>( TensorBuffer::Jpeg(_) => { unreachable!("DecodedTensor cannot contain a JPEG") } - TensorData::NV12(buf) => { + TensorBuffer::Nv12(buf) => { (cast_slice_to_cow(buf.as_slice()), TextureFormat::R8Unorm) } } @@ -433,7 +434,7 @@ fn general_texture_creation_desc_from_tensor<'a>( TensorBuffer::Jpeg(_) => { unreachable!("DecodedTensor cannot contain a JPEG") } - TensorData::NV12(_) => { + TensorBuffer::Nv12(_) => { unreachable!("An NV12 tensor can only contain a 3 channel image.") } } @@ -516,8 +517,8 @@ fn height_width_depth(tensor: &TensorData) -> anyhow::Result<[u32; 3]> { let Some([mut height, width, channel]) = tensor.image_height_width_channels() else { anyhow::bail!("Tensor is not an image"); }; - height = match tensor.data { - TensorData::NV12(_) => height * 3 / 2, + height = match tensor.buffer { + TensorBuffer::Nv12(_) => height * 3 / 2, _ => height, }; diff --git a/crates/re_viewer_context/src/tensor/tensor_stats.rs b/crates/re_viewer_context/src/tensor/tensor_stats.rs index 3bf63a995391..6206ad231af1 100644 --- a/crates/re_viewer_context/src/tensor/tensor_stats.rs +++ b/crates/re_viewer_context/src/tensor/tensor_stats.rs @@ -59,9 +59,10 @@ impl TensorStats { #[allow(clippy::needless_pass_by_value)] fn tensor_range_f16(tensor: ndarray::ArrayViewD<'_, f16>) -> (f64, f64) { re_tracing::profile_function!(); - let (min, max) = tensor.fold((f16::INFINITY, f16::NEG_INFINITY), |(min, max), &value| { - (min.min(value), max.max(value)) - }); + let (min, max) = tensor + .fold((f16::INFINITY, f16::NEG_INFINITY), |(min, max), &value| { + (min.min(value), max.max(value)) + }); (min.to_f64(), max.to_f64()) } @@ -92,9 +93,14 @@ impl TensorStats { #[allow(clippy::needless_pass_by_value)] fn tensor_finite_range_f16(tensor: ndarray::ArrayViewD<'_, f16>) -> (f64, f64) { re_tracing::profile_function!(); - let (min, max) = tensor.fold((f16::INFINITY, f16::NEG_INFINITY), |(min, max), &value| { - if value.is_finite() { (min.min(value), max.max(value)) } else { (min, max) } - }); + let (min, max) = + tensor.fold((f16::INFINITY, f16::NEG_INFINITY), |(min, max), &value| { + if value.is_finite() { + (min.min(value), max.max(value)) + } else { + (min, max) + } + }); (min.to_f64(), max.to_f64()) } @@ -113,18 +119,15 @@ impl TensorStats { TensorDataType::F64 => ArrayViewD::::try_from(tensor).map(tensor_range_f64), }; - println!("range: {:?} for tensor: {:?}", range, tensor.data); - - let finite_range = if - range - .as_ref() - .ok() - .map_or(true, |r| r.0.is_finite() && r.1.is_finite()) + let finite_range = if range + .as_ref() + .ok() + .map_or(true, |r| r.0.is_finite() && r.1.is_finite()) { range.clone().ok() } else { let finite_range = match tensor.dtype() { - | TensorDataType::U8 + TensorDataType::U8 | TensorDataType::U16 | TensorDataType::U32 | TensorDataType::U64 @@ -146,7 +149,11 @@ impl TensorStats { // If we didn't find a finite range, set it to None. finite_range.ok().and_then(|r| { - if r.0.is_finite() && r.1.is_finite() { Some(r) } else { None } + if r.0.is_finite() && r.1.is_finite() { + Some(r) + } else { + None + } }) }; diff --git a/rerun_cpp/src/rerun/datatypes/tensor_buffer.cpp b/rerun_cpp/src/rerun/datatypes/tensor_buffer.cpp index 133f551a787f..b95695a5b99a 100644 --- a/rerun_cpp/src/rerun/datatypes/tensor_buffer.cpp +++ b/rerun_cpp/src/rerun/datatypes/tensor_buffer.cpp @@ -63,6 +63,11 @@ namespace rerun { arrow::list(arrow::field("item", arrow::uint8(), false)), false ), + arrow::field( + "NV12", + arrow::list(arrow::field("item", arrow::uint8(), false)), + false + ), }); return datatype; } @@ -126,6 +131,10 @@ namespace rerun { memory_pool, std::make_shared(memory_pool) ), + std::make_shared( + memory_pool, + std::make_shared(memory_pool) + ), }), arrow_datatype() )); @@ -277,6 +286,16 @@ namespace rerun { ); break; } + case detail::TensorBufferTag::NV12: { + auto variant_builder = + static_cast(variant_builder_untyped); + (void)variant_builder; + return Error( + ErrorCode::NotImplemented, + "TODO(andreas): list types in unions are not yet supported" + ); + break; + } } } diff --git a/rerun_cpp/src/rerun/datatypes/tensor_buffer.hpp b/rerun_cpp/src/rerun/datatypes/tensor_buffer.hpp index 3adbd47b23cc..4f039af42149 100644 --- a/rerun_cpp/src/rerun/datatypes/tensor_buffer.hpp +++ b/rerun_cpp/src/rerun/datatypes/tensor_buffer.hpp @@ -38,6 +38,7 @@ namespace rerun { F32, F64, JPEG, + NV12, }; union TensorBufferData { @@ -65,6 +66,8 @@ namespace rerun { std::vector jpeg; + std::vector nv12; + TensorBufferData() {} ~TensorBufferData() {} @@ -138,6 +141,10 @@ namespace rerun { _data.jpeg = other._data.jpeg; break; } + case detail::TensorBufferTag::NV12: { + _data.nv12 = other._data.nv12; + break; + } case detail::TensorBufferTag::NONE: const void *otherbytes = reinterpret_cast(&other._data); void *thisbytes = reinterpret_cast(&this->_data); @@ -226,6 +233,11 @@ namespace rerun { _data.jpeg.~TypeAlias(); break; } + case detail::TensorBufferTag::NV12: { + typedef std::vector TypeAlias; + _data.nv12.~TypeAlias(); + break; + } } } @@ -332,6 +344,14 @@ namespace rerun { return self; } + static TensorBuffer nv12(std::vector nv12) { + typedef std::vector TypeAlias; + TensorBuffer self; + self._tag = detail::TensorBufferTag::NV12; + new (&self._data.nv12) TypeAlias(std::move(nv12)); + return self; + } + /// Returns the arrow data type this type corresponds to. static const std::shared_ptr &arrow_datatype(); diff --git a/rerun_py/rerun_sdk/rerun/_image.py b/rerun_py/rerun_sdk/rerun/_image.py index 6ce024912d1f..6f06ebb8622b 100644 --- a/rerun_py/rerun_sdk/rerun/_image.py +++ b/rerun_py/rerun_sdk/rerun/_image.py @@ -2,7 +2,6 @@ import io import pathlib -from enum import Enum from typing import IO, Iterable import numpy as np @@ -14,28 +13,49 @@ from .datatypes import TensorBuffer, TensorDimension -class ImageFormat(Enum): +class ImageFormat: """Image file format.""" - BMP = "BMP" - """BMP format.""" + name: str - GIF = "GIF" - """GIF format.""" + BMP: ImageFormat + GIF: ImageFormat + JPEG: ImageFormat + PNG: ImageFormat + TIFF: ImageFormat + NV12: type[NV12] - JPEG = "JPEG" - """JPEG format.""" - - PNG = "PNG" - """PNG format.""" - - TIFF = "TIFF" - """TIFF format.""" + def __init__(self, name: str): + self.name = name def __str__(self) -> str: return self.name +class NV12(ImageFormat): + """NV12 format.""" + + name = "NV12" + + def __init__(self, width: int | None = None, height: int | None = None) -> None: + if width is None and height is None: + raise ValueError("Must provide width or height") + self.width = width + self.height = height + + +# Assign the variants +# This allows for rust like enums, for example: +# ImageFormat.NV12(width=1920, height=1080) +# isinstance(ImageFormat.NV12, ImageFormat) == True and isinstance(ImageFormat.NV12, NV12) == True +ImageFormat.BMP = ImageFormat("BMP") +ImageFormat.GIF = ImageFormat("GIF") +ImageFormat.JPEG = ImageFormat("JPEG") +ImageFormat.PNG = ImageFormat("PNG") +ImageFormat.TIFF = ImageFormat("TIFF") +ImageFormat.NV12 = NV12 + + class ImageEncoded(AsComponents): def __init__( self, @@ -70,11 +90,6 @@ def __init__( if len([x for x in (path, contents) if x is not None]) != 1: raise ValueError("Must provide exactly one of 'path' or 'contents'") - if format is not None: - formats = (str(format),) - else: - formats = None - buffer: IO[bytes] | None = None if path is not None: buffer = open(path, "rb") @@ -86,6 +101,31 @@ def __init__( if buffer is None: raise ValueError("Input data could not be coerced to IO[bytes]") + formats = None + if format is not None: + if isinstance(format, NV12): + np_buf = np.frombuffer(buffer.read(), dtype=np.uint8) + height = format.height + width = format.width + if height is None and width is None: + raise ValueError("Must provide width or height") + elif height is None and width is not None: + height = int(np_buf.size / (width * 1.5)) + elif width is None and height is not None: + width = int(np_buf.size / (height * 1.5)) + assert height is not None and width is not None + np_buf = np_buf.reshape(int(height * 1.5), width, 1) + self.data = TensorData( + buffer=TensorBuffer(np_buf, kind="nv12"), + shape=[ + TensorDimension(np_buf.shape[0], "height"), + TensorDimension(np_buf.shape[1], "width"), + TensorDimension(1, "depth"), + ], + ) + self.draw_order = draw_order + return + formats = (str(format),) # Note that PIL loading is lazy. This will only identify the type of file # and not decode the whole jpeg. img_data = PILImage.open(buffer, formats=formats) diff --git a/rerun_py/rerun_sdk/rerun/datatypes/tensor_buffer.py b/rerun_py/rerun_sdk/rerun/datatypes/tensor_buffer.py index c9f2943ada7b..4b74b1f2b6d3 100644 --- a/rerun_py/rerun_sdk/rerun/datatypes/tensor_buffer.py +++ b/rerun_py/rerun_sdk/rerun/datatypes/tensor_buffer.py @@ -53,9 +53,11 @@ class TensorBuffer(TensorBufferExt): F64 (npt.NDArray[np.float64]): JPEG (npt.NDArray[np.uint8]): + + NV12 (npt.NDArray[np.uint8]): """ - kind: Literal["u8", "u16", "u32", "u64", "i8", "i16", "i32", "i64", "f16", "f32", "f64", "jpeg"] = field( + kind: Literal["u8", "u16", "u32", "u64", "i8", "i16", "i32", "i64", "f16", "f32", "f64", "jpeg", "nv12"] = field( default="u8" ) @@ -176,6 +178,12 @@ def __init__(self) -> None: nullable=False, metadata={}, ), + pa.field( + "NV12", + pa.list_(pa.field("item", pa.uint8(), nullable=False, metadata={})), + nullable=False, + metadata={}, + ), ] ), self._TYPE_NAME, diff --git a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py index a0a922d6d875..1acef81f83f9 100644 --- a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py +++ b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py @@ -155,6 +155,12 @@ def __init__(self) -> None: nullable=False, metadata={}, ), + pa.field( + "NV12", + pa.list_(pa.field("item", pa.uint8(), nullable=False, metadata={})), + nullable=False, + metadata={}, + ), ] ), nullable=False, diff --git a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py index 623b01a7d53b..e0767f476fa8 100644 --- a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py +++ b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py @@ -41,45 +41,6 @@ def _to_numpy(tensor: TensorLike) -> npt.NDArray[Any]: return np.array(tensor, copy=False) -class ImageEncoding: - NV12: "NV12" - JPEG: "JPEG" - - @property - def name(self) -> str: - raise NotImplementedError - - -class NV12(ImageEncoding): - width: int | None - height: int | None - - def __init__(self, width: int | None = None, height: int | None = None): - if width is None and height is None: - raise ValueError("ImageEncodingNv12 needs to carry at least one width or height.") - self.width = width - self.height = height - - @property - def name(self) -> str: - return f"NV12" - - -class JPEG(ImageEncoding): - quality: int - - def __init__(self, quality: int): - self.quality = quality - - @property - def name(self) -> str: - return f"JPEG" - - -ImageEncoding.NV12 = NV12 -ImageEncoding.JPEG = JPEG - - class TensorDataExt: # TODO(jleibs): Should also provide custom converters for shape / buffer # assignment that prevent the user from putting the TensorData into an @@ -92,7 +53,7 @@ def __init__( buffer: TensorBufferLike | None = None, array: TensorLike | None = None, dim_names: Sequence[str | None] | None = None, - encoding: ImageEncoding | None = None, + jpeg_quality: int | None = None, ) -> None: """ Construct a `TensorData` object. @@ -180,36 +141,31 @@ def __init__( # This shouldn't be possible but typing can't figure it out raise ValueError("No shape provided.") - if encoding is not None: - if isinstance(encoding, ImageEncoding.JPEG): - if array is None: - _send_warning("Can only compress JPEG if an array is provided", 2) - else: - if array.dtype not in ["uint8", "sint32", "float32"]: - # Convert to a format supported by Image.fromarray - array = array.astype("float32") - - pil_image = Image.fromarray(array) - output = BytesIO() - pil_image.save(output, format="JPEG", quality=encoding.quality) - jpeg_bytes = output.getvalue() - output.close() - jpeg_array = np.frombuffer(jpeg_bytes, dtype=np.uint8) - # self.buffer = TensorBuffer(inner=jpeg_array, kind="jpeg") # TODO(emilk): something like this should work? - self.buffer = TensorBuffer(jpeg_array) - self.buffer.kind = "jpeg" - return - elif isinstance(encoding, ImageEncoding.NV12): - if array is None: - _send_warning("Can only compress NV12 if an array is provided", 2) - else: - self.buffer = TensorBuffer(array, "nv12") + if jpeg_quality is not None: + if array is None: + _send_warning("Can only compress JPEG if an array is provided", 2) + else: + if array.dtype not in ["uint8", "sint32", "float32"]: + # Convert to a format supported by Image.fromarray + array = array.astype("float32") + + pil_image = Image.fromarray(array) + output = BytesIO() + pil_image.save(output, format="JPEG", quality=jpeg_quality) + jpeg_bytes = output.getvalue() + output.close() + jpeg_array = np.frombuffer(jpeg_bytes, dtype=np.uint8) + # self.buffer = TensorBuffer(inner=jpeg_array, kind="jpeg") # TODO(emilk): something like this should work? + self.buffer = TensorBuffer(jpeg_array) + self.buffer.kind = "jpeg" + return + if buffer is not None: self.buffer = _tensor_data__buffer__special_field_converter_override(buffer) elif array is not None: self.buffer = TensorBuffer(array.flatten()) - if self.buffer.kind != "jpeg": + if self.buffer.kind != "jpeg" and self.buffer.kind != "nv12": expected_buffer_size = prod(d.size for d in self.shape) if len(self.buffer.inner) != expected_buffer_size: @@ -305,9 +261,10 @@ def _build_buffer_array(buffer: TensorBufferLike) -> pa.Array: buffer = buffer.flatten() data_inner = pa.ListArray.from_arrays(pa.array([0, len(buffer)]), buffer) - if kind == "jpeg": discriminant = "JPEG" + elif kind == "nv12": + discriminant = "NV12" else: assert buffer.dtype.type in DTYPE_MAP, f"Failed to find {buffer.dtype.type} in f{DTYPE_MAP}" discriminant = DTYPE_MAP[buffer.dtype.type] diff --git a/rerun_py/rerun_sdk/rerun/log_deprecated/image.py b/rerun_py/rerun_sdk/rerun/log_deprecated/image.py index 5c72cfd21e9e..bef5980f4d29 100644 --- a/rerun_py/rerun_sdk/rerun/log_deprecated/image.py +++ b/rerun_py/rerun_sdk/rerun/log_deprecated/image.py @@ -28,7 +28,7 @@ def log_image( ext: dict[str, Any] | None = None, timeless: bool = False, recording: RecordingStream | None = None, - encoding: ImageEncoding | None = None, + jpeg_quality: int | None = None, ) -> None: """ Log a gray or color image. @@ -73,8 +73,7 @@ def log_image( """ - tensor_data = TensorData(array=image, encoding=encoding) - + tensor_data = TensorData(array=image, jpeg_quality=jpeg_quality) log(entity_path, Image(tensor_data, draw_order=draw_order), ext=ext, timeless=timeless, recording=recording) From 7f299b30a0530fefcad2c7a13a1341b133c9e5f4 Mon Sep 17 00:00:00 2001 From: zrezke Date: Fri, 29 Sep 2023 13:23:19 +0200 Subject: [PATCH 03/20] clean shader --- crates/re_renderer/shader/decodings.wgsl | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/crates/re_renderer/shader/decodings.wgsl b/crates/re_renderer/shader/decodings.wgsl index ae1f716c04c9..6bc13023ed98 100644 --- a/crates/re_renderer/shader/decodings.wgsl +++ b/crates/re_renderer/shader/decodings.wgsl @@ -8,16 +8,10 @@ fn decode_nv12(texture: texture_2d, coords: IVec2) -> Vec4 { let uv_row = u32(coords.y / 2); var uv_col = u32(coords.x / 2) * 2u; - let c = UVec2(coords); - let y = (f32(textureLoad(texture, c, 0).r) - 16.0) / 219.0; + let y = (f32(textureLoad(texture, UVec2(coords), 0).r) - 16.0) / 219.0; let u = (f32(textureLoad(texture, UVec2(u32(uv_col), uv_offset + uv_row), 0).r) - 128.0) / 224.0; let v = (f32(textureLoad(texture, UVec2((u32(uv_col) + 1u), uv_offset + uv_row), 0).r) - 128.0) / 224.0; - // Get RGB values and apply reverse gamma correction since we are rendering to sRGB framebuffer - // let r = pow(y + 1.402 * v, 2.2); - // let g = pow(y - (0.344 * u + 0.714 * v), 2.2); - // let b = pow(y + 1.772 * u, 2.2); - let r = y + 1.402 * v; let g = y - (0.344 * u + 0.714 * v); let b = y + 1.772 * u; From a3495b790723ce24e4232f71097d56fa66783d81 Mon Sep 17 00:00:00 2001 From: Filip Jeretina <59307111+zrezke@users.noreply.github.com> Date: Mon, 2 Oct 2023 09:02:43 +0200 Subject: [PATCH 04/20] Update crates/re_types/src/datatypes/tensor_data_ext.rs Co-authored-by: Andreas Reich --- crates/re_types/src/datatypes/tensor_data_ext.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/re_types/src/datatypes/tensor_data_ext.rs b/crates/re_types/src/datatypes/tensor_data_ext.rs index 87ee693e1366..3b81b89414a8 100644 --- a/crates/re_types/src/datatypes/tensor_data_ext.rs +++ b/crates/re_types/src/datatypes/tensor_data_ext.rs @@ -51,7 +51,7 @@ impl TensorData { // NV12 encodes a color image in 1.5 "channels" -> 1 luma (per pixel) + (1U+1V) / 4 pixels. // Return the RGB size. match shape_short { - [h, w] => Some([(h.size as f64 / 1.5) as u64, w.size, 3]), + [h, w] => Some([h.size * 3 / 2, w.size, 3]), _ => None, } } From 54e32c8869827644a9b5d2460663bee9cde8ad63 Mon Sep 17 00:00:00 2001 From: Filip Jeretina <59307111+zrezke@users.noreply.github.com> Date: Mon, 2 Oct 2023 09:03:17 +0200 Subject: [PATCH 05/20] Update crates/re_renderer/src/renderer/rectangles.rs Co-authored-by: Andreas Reich --- crates/re_renderer/src/renderer/rectangles.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/re_renderer/src/renderer/rectangles.rs b/crates/re_renderer/src/renderer/rectangles.rs index b4095f94d22e..9fa5cb0d3a98 100644 --- a/crates/re_renderer/src/renderer/rectangles.rs +++ b/crates/re_renderer/src/renderer/rectangles.rs @@ -130,7 +130,7 @@ impl ColormappedTexture { match self.encoding { Some(TextureEncoding::Nv12) => { let [width, height] = self.texture.width_height(); - [width, (height as f64 / 1.5) as u32] + [width, height * 2 / 3] } _ => self.texture.width_height(), } From ae1da4ecf686fd672e27b4ba1074ad23dbaddf9b Mon Sep 17 00:00:00 2001 From: zrezke Date: Sun, 8 Oct 2023 12:44:05 +0200 Subject: [PATCH 06/20] Rename texture encoding to shader_decoding, fix sizing multipliers, fix gamma correction, fix shader clamping errors. Added a reference for the YUV2sRGB conversion coefficients. Made the image_height_width_channels TensorBuffer matching exhaustive. --- crates/re_renderer/shader/decodings.wgsl | 14 +++++-- crates/re_renderer/shader/rectangle_fs.wgsl | 10 ++--- crates/re_renderer/src/renderer/mod.rs | 2 +- crates/re_renderer/src/renderer/rectangles.rs | 24 ++++++------ .../re_space_view_spatial/src/parts/images.rs | 2 +- .../src/space_view_class.rs | 4 +- .../src/tensor_slice_to_gpu.rs | 6 +-- .../re_types/src/datatypes/tensor_data_ext.rs | 22 +++++++++-- .../src/gpu_bridge/colormap.rs | 2 +- .../src/gpu_bridge/tensor_to_gpu.rs | 37 +++++++++---------- 10 files changed, 71 insertions(+), 52 deletions(-) diff --git a/crates/re_renderer/shader/decodings.wgsl b/crates/re_renderer/shader/decodings.wgsl index 6bc13023ed98..8318dbee48ee 100644 --- a/crates/re_renderer/shader/decodings.wgsl +++ b/crates/re_renderer/shader/decodings.wgsl @@ -8,12 +8,18 @@ fn decode_nv12(texture: texture_2d, coords: IVec2) -> Vec4 { let uv_row = u32(coords.y / 2); var uv_col = u32(coords.x / 2) * 2u; - let y = (f32(textureLoad(texture, UVec2(coords), 0).r) - 16.0) / 219.0; + let y = max(0.0, (f32(textureLoad(texture, UVec2(coords), 0).r) - 16.0)) / 219.0; let u = (f32(textureLoad(texture, UVec2(u32(uv_col), uv_offset + uv_row), 0).r) - 128.0) / 224.0; let v = (f32(textureLoad(texture, UVec2((u32(uv_col) + 1u), uv_offset + uv_row), 0).r) - 128.0) / 224.0; - let r = y + 1.402 * v; - let g = y - (0.344 * u + 0.714 * v); - let b = y + 1.772 * u; + // Reference for conversion coefficients: https://en.wikipedia.org/wiki/YCbCr + // BT.601 (aka. SDTV, aka. Rec.601). WIKI section: ITU-R BT.601 conversion + // let r = clamp(y + 1.402 * v, 0.0, 1.0); + // let g = clamp(y - (0.344 * u + 0.714 * v), 0.0, 1.0); + // let b = clamp(y + 1.772 * u, 0.0, 1.0); + // BT.709 (aka. HDTV, aka. Rec.709). WIKI section: ITU-R BT.709 conversion + let r = clamp(y + 1.5748 * v, 0.0, 1.0); + let g = clamp(y + u * -0.1873 + v * -0.4681, 0.0, 1.0); + let b = clamp(y + u * 1.8556, 0.0 , 1.0); return Vec4(r, g, b, 1.0); } diff --git a/crates/re_renderer/shader/rectangle_fs.wgsl b/crates/re_renderer/shader/rectangle_fs.wgsl index 59e43bb2c102..c3cdf538d504 100644 --- a/crates/re_renderer/shader/rectangle_fs.wgsl +++ b/crates/re_renderer/shader/rectangle_fs.wgsl @@ -107,14 +107,14 @@ fn fs_main(in: VertexOut) -> @location(0) Vec4 { let coord = in.texcoord * texture_dimensions; if tex_filter(coord) == FILTER_NEAREST { // nearest - normalized_value = (Vec4(decode_nv12(texture_uint, + normalized_value = decode_color(Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord, texture_dimensions)))); } else { // bilinear - let v00 = (Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2(-0.5, -0.5), texture_dimensions)))); - let v01 = (Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2(-0.5, 0.5), texture_dimensions)))); - let v10 = (Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2( 0.5, -0.5), texture_dimensions)))); - let v11 = (Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2( 0.5, 0.5), texture_dimensions)))); + let v00 = decode_color(Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2(-0.5, -0.5), texture_dimensions)))); + let v01 = decode_color(Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2(-0.5, 0.5), texture_dimensions)))); + let v10 = decode_color(Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2( 0.5, -0.5), texture_dimensions)))); + let v11 = decode_color(Vec4(decode_nv12(texture_uint, clamp_to_edge_nearest_neighbor(coord + vec2( 0.5, 0.5), texture_dimensions)))); normalized_value = filter_bilinear(coord, v00, v01, v10, v11); } } diff --git a/crates/re_renderer/src/renderer/mod.rs b/crates/re_renderer/src/renderer/mod.rs index 80e28f0ca4de..0f88aa315bd0 100644 --- a/crates/re_renderer/src/renderer/mod.rs +++ b/crates/re_renderer/src/renderer/mod.rs @@ -21,7 +21,7 @@ pub use test_triangle::TestTriangleDrawData; mod rectangles; pub use rectangles::{ - ColorMapper, ColormappedTexture, RectangleDrawData, RectangleOptions, TextureEncoding, + ColorMapper, ColormappedTexture, RectangleDrawData, RectangleOptions, ShaderDecoding, TextureFilterMag, TextureFilterMin, TexturedRect, }; diff --git a/crates/re_renderer/src/renderer/rectangles.rs b/crates/re_renderer/src/renderer/rectangles.rs index 9fa5cb0d3a98..faa052932af7 100644 --- a/crates/re_renderer/src/renderer/rectangles.rs +++ b/crates/re_renderer/src/renderer/rectangles.rs @@ -50,7 +50,7 @@ pub enum TextureFilterMin { /// Describes how the color information is encoded in the texture. #[derive(Clone, Debug, PartialEq)] -pub enum TextureEncoding { +pub enum ShaderDecoding { Nv12, } @@ -90,8 +90,8 @@ pub struct ColormappedTexture { /// Failure to set a color mapper for a one-component texture is an error. pub color_mapper: Option, - /// For textures that don't store color information in conventional RGB color space, you need to supply a TextureEncoding. - pub encoding: Option, + /// For textures that need decoding in the shader, for example NV12 encoded images. + pub shader_decoding: Option, } /// How to map the normalized `.r` component to a color. @@ -122,13 +122,13 @@ impl ColormappedTexture { gamma: 1.0, multiply_rgb_with_alpha: true, color_mapper: None, - encoding: None, + shader_decoding: None, } } pub fn width_height(&self) -> [u32; 2] { - match self.encoding { - Some(TextureEncoding::Nv12) => { + match self.shader_decoding { + Some(ShaderDecoding::Nv12) => { let [width, height] = self.texture.width_height(); [width, height * 2 / 3] } @@ -282,7 +282,7 @@ mod gpu_data { gamma, color_mapper, multiply_rgb_with_alpha, - encoding: texture_encoding, + shader_decoding, } = colormapped_texture; let super::RectangleOptions { @@ -297,7 +297,7 @@ mod gpu_data { Some(wgpu::TextureSampleType::Float { .. }) => SAMPLE_TYPE_FLOAT, Some(wgpu::TextureSampleType::Sint) => SAMPLE_TYPE_SINT, Some(wgpu::TextureSampleType::Uint) => { - if texture_encoding == &Some(super::TextureEncoding::Nv12) { + if shader_decoding == &Some(super::ShaderDecoding::Nv12) { SAMPLE_TYPE_NV12 } else { SAMPLE_TYPE_UINT @@ -320,8 +320,8 @@ mod gpu_data { Some(ColorMapper::Texture(_)) => { color_mapper_int = COLOR_MAPPER_TEXTURE; } - None => match texture_encoding { - Some(super::TextureEncoding::Nv12) => color_mapper_int = COLOR_MAPPER_OFF, + None => match shader_decoding { + Some(super::ShaderDecoding::Nv12) => color_mapper_int = COLOR_MAPPER_OFF, _ => return Err(RectangleError::MissingColorMapper), }, }, @@ -464,7 +464,7 @@ impl RectangleDrawData { bind_group: ctx.gpu_resources.bind_groups.alloc( &ctx.device, &ctx.gpu_resources, - &(BindGroupDesc { + &BindGroupDesc { label: "RectangleInstance::bind_group".into(), entries: smallvec![ uniform_buffer, @@ -474,7 +474,7 @@ impl RectangleDrawData { BindGroupEntry::DefaultTextureView(colormap_texture) ], layout: rectangle_renderer.bind_group_layout, - }), + }, ), draw_outline_mask: rectangle.options.outline_mask.is_some(), }); diff --git a/crates/re_space_view_spatial/src/parts/images.rs b/crates/re_space_view_spatial/src/parts/images.rs index 78858f779d05..8369c3356365 100644 --- a/crates/re_space_view_spatial/src/parts/images.rs +++ b/crates/re_space_view_spatial/src/parts/images.rs @@ -109,7 +109,7 @@ fn to_textured_rect( .transform_vector3(glam::Vec3::X * width as f32), extent_v: ent_context .world_from_entity - .transform_vector3(glam::Vec3::Y * (height as f32)), + .transform_vector3(glam::Vec3::Y * height as f32), colormapped_texture, options: RectangleOptions { texture_filter_magnification, diff --git a/crates/re_space_view_tensor/src/space_view_class.rs b/crates/re_space_view_tensor/src/space_view_class.rs index b4ec653ee29b..0151d8efc358 100644 --- a/crates/re_space_view_tensor/src/space_view_class.rs +++ b/crates/re_space_view_tensor/src/space_view_class.rs @@ -264,10 +264,10 @@ fn view_tensor( .iter() .any(|selector| selector.visible) { - (egui::Frame { + egui::Frame { inner_margin: egui::Margin::symmetric(16.0, 8.0), ..Default::default() - }) + } .show(ui, |ui| { ui.spacing_mut().item_spacing = default_item_spacing; // keep the default spacing between sliders selectors_ui(ui, state, tensor); diff --git a/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs b/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs index 4c5c103fd6e6..60059feaf4dc 100644 --- a/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs +++ b/crates/re_space_view_tensor/src/tensor_slice_to_gpu.rs @@ -1,6 +1,6 @@ use re_data_store::VersionedInstancePathHash; use re_renderer::{ - renderer::{ColormappedTexture, TextureEncoding}, + renderer::{ColormappedTexture, ShaderDecoding}, resource_managers::{GpuTexture2D, Texture2DCreationDesc, TextureManager2DError}, }; use re_types::{ @@ -50,8 +50,8 @@ pub fn colormapped_texture( color_mapper: Some(re_renderer::renderer::ColorMapper::Function( color_mapping.map, )), - encoding: match &tensor.buffer { - &TensorBuffer::Nv12(_) => Some(TextureEncoding::Nv12), + shader_decoding: match &tensor.buffer { + &TensorBuffer::Nv12(_) => Some(ShaderDecoding::Nv12), _ => None, }, }) diff --git a/crates/re_types/src/datatypes/tensor_data_ext.rs b/crates/re_types/src/datatypes/tensor_data_ext.rs index 3b81b89414a8..13f656d57c2a 100644 --- a/crates/re_types/src/datatypes/tensor_data_ext.rs +++ b/crates/re_types/src/datatypes/tensor_data_ext.rs @@ -1,4 +1,7 @@ -use crate::tensor_data::{TensorCastError, TensorDataType, TensorElement}; +use crate::{ + archetypes::Tensor, + tensor_data::{TensorCastError, TensorDataType, TensorElement}, +}; #[cfg(feature = "image")] use crate::tensor_data::{DecodedTensor, TensorImageLoadError, TensorImageSaveError}; @@ -47,15 +50,25 @@ impl TensorData { pub fn image_height_width_channels(&self) -> Option<[u64; 3]> { let shape_short = self.shape_short(); match &self.buffer { + // In the case of NV12, return the shape of the RGB image, not the tensor size. TensorBuffer::Nv12(_) => { // NV12 encodes a color image in 1.5 "channels" -> 1 luma (per pixel) + (1U+1V) / 4 pixels. - // Return the RGB size. match shape_short { - [h, w] => Some([h.size * 3 / 2, w.size, 3]), + [h, w] => Some([h.size * 2 / 3, w.size, 3]), _ => None, } } - _ => { + TensorBuffer::U8(_) + | TensorBuffer::U16(_) + | TensorBuffer::U32(_) + | TensorBuffer::U64(_) + | TensorBuffer::I8(_) + | TensorBuffer::I16(_) + | TensorBuffer::I32(_) + | TensorBuffer::I64(_) + | TensorBuffer::F16(_) + | TensorBuffer::F32(_) + | TensorBuffer::F64(_) => { match shape_short.len() { 1 => { // Special case: Nx1(x1x1x …) tensors are treated as Nx1 gray images. @@ -79,6 +92,7 @@ impl TensorData { _ => None, } } + TensorBuffer::Jpeg(_) => None, } } diff --git a/crates/re_viewer_context/src/gpu_bridge/colormap.rs b/crates/re_viewer_context/src/gpu_bridge/colormap.rs index b4b31fa89be6..c1569ce45c22 100644 --- a/crates/re_viewer_context/src/gpu_bridge/colormap.rs +++ b/crates/re_viewer_context/src/gpu_bridge/colormap.rs @@ -42,7 +42,7 @@ fn colormap_preview_ui( decode_srgb: false, multiply_rgb_with_alpha: false, gamma: 1.0, - encoding: None, + shader_decoding: None, color_mapper: Some(re_renderer::renderer::ColorMapper::Function(colormap)), }; diff --git a/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs b/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs index 6e233ee5c5e8..e3c2c4c0aa34 100644 --- a/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs +++ b/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs @@ -16,7 +16,7 @@ use wgpu::TextureFormat; use re_renderer::{ pad_rgb_to_rgba, - renderer::{ColorMapper, ColormappedTexture, TextureEncoding}, + renderer::{ColorMapper, ColormappedTexture, ShaderDecoding}, resource_managers::Texture2DCreationDesc, RenderContext, }; @@ -119,15 +119,19 @@ pub fn color_tensor_to_gpu( .map_err(|err| anyhow::anyhow!("Failed to create texture for color tensor: {err}"))?; let texture_format = texture_handle.format(); - - // TODO(emilk): let the user specify the color space. - let decode_srgb = texture_format == TextureFormat::Rgba8Unorm - || super::tensor_decode_srgb_gamma_heuristic(tensor_stats, tensor.dtype(), depth)?; - - let encoding = match &tensor.buffer { - &TensorBuffer::Nv12(_) => Some(TextureEncoding::Nv12), + let shader_decoding = match &tensor.buffer { + &TensorBuffer::Nv12(_) => Some(ShaderDecoding::Nv12), _ => None, }; + // TODO(emilk): let the user specify the color space. + let decode_srgb = match shader_decoding { + Some(ShaderDecoding::Nv12) => true, + None => { + texture_format == TextureFormat::Rgba8Unorm + || super::tensor_decode_srgb_gamma_heuristic(tensor_stats, tensor.dtype(), depth)? + } + }; + // Special casing for normalized textures used above: let range = if matches!( texture_format, @@ -136,14 +140,14 @@ pub fn color_tensor_to_gpu( [0.0, 1.0] } else if texture_format == TextureFormat::R8Snorm { [-1.0, 1.0] - } else if encoding == Some(TextureEncoding::Nv12) { + } else if shader_decoding == Some(ShaderDecoding::Nv12) { [0.0, 1.0] } else { // TODO(#2341): The range should be determined by a `DataRange` component. In absence this, heuristics apply. super::tensor_data_range_heuristic(tensor_stats, tensor.dtype())? }; - let color_mapper = if encoding.is_none() && texture_format.components() == 1 { + let color_mapper = if shader_decoding.is_none() && texture_format.components() == 1 { // Single-channel images = luminance = grayscale Some(ColorMapper::Function(re_renderer::Colormap::Grayscale)) } else { @@ -153,12 +157,7 @@ pub fn color_tensor_to_gpu( // TODO(wumpf): There should be a way to specify whether a texture uses pre-multiplied alpha or not. // Assume that the texture is not pre-multiplied if it has an alpha channel. let multiply_rgb_with_alpha = depth == 4; - - let gamma = if encoding == Some(TextureEncoding::Nv12) { - 2.2 - } else { - 1.0 - }; + let gamma = 1.0; Ok(ColormappedTexture { texture: texture_handle, @@ -167,7 +166,7 @@ pub fn color_tensor_to_gpu( multiply_rgb_with_alpha, gamma, color_mapper, - encoding, + shader_decoding, }) } @@ -240,7 +239,7 @@ pub fn class_id_tensor_to_gpu( multiply_rgb_with_alpha: false, // already premultiplied! gamma: 1.0, color_mapper: Some(ColorMapper::Texture(colormap_texture_handle)), - encoding: None, + shader_decoding: None, }) } @@ -274,7 +273,7 @@ pub fn depth_tensor_to_gpu( multiply_rgb_with_alpha: false, gamma: 1.0, color_mapper: Some(ColorMapper::Function(re_renderer::Colormap::Turbo)), - encoding: None, + shader_decoding: None, }) } From b39e4054216fecc7646505e28f8b3007f3b481f5 Mon Sep 17 00:00:00 2001 From: zrezke Date: Mon, 9 Oct 2023 12:51:21 +0200 Subject: [PATCH 07/20] Addressed some more change requests --- crates/re_renderer/shader/decodings.wgsl | 17 ++++++++--------- .../re_types/src/datatypes/tensor_data_ext.rs | 1 + .../src/gpu_bridge/tensor_to_gpu.rs | 11 ++++++----- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/crates/re_renderer/shader/decodings.wgsl b/crates/re_renderer/shader/decodings.wgsl index 8318dbee48ee..17fe3bc628df 100644 --- a/crates/re_renderer/shader/decodings.wgsl +++ b/crates/re_renderer/shader/decodings.wgsl @@ -12,14 +12,13 @@ fn decode_nv12(texture: texture_2d, coords: IVec2) -> Vec4 { let u = (f32(textureLoad(texture, UVec2(u32(uv_col), uv_offset + uv_row), 0).r) - 128.0) / 224.0; let v = (f32(textureLoad(texture, UVec2((u32(uv_col) + 1u), uv_offset + uv_row), 0).r) - 128.0) / 224.0; - // Reference for conversion coefficients: https://en.wikipedia.org/wiki/YCbCr - // BT.601 (aka. SDTV, aka. Rec.601). WIKI section: ITU-R BT.601 conversion - // let r = clamp(y + 1.402 * v, 0.0, 1.0); - // let g = clamp(y - (0.344 * u + 0.714 * v), 0.0, 1.0); - // let b = clamp(y + 1.772 * u, 0.0, 1.0); - // BT.709 (aka. HDTV, aka. Rec.709). WIKI section: ITU-R BT.709 conversion - let r = clamp(y + 1.5748 * v, 0.0, 1.0); - let g = clamp(y + u * -0.1873 + v * -0.4681, 0.0, 1.0); - let b = clamp(y + u * 1.8556, 0.0 , 1.0); + // BT.601 (aka. SDTV, aka. Rec.601). wiki: https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion + let r = clamp(y + 1.402 * v, 0.0, 1.0); + let g = clamp(y - (0.344 * u + 0.714 * v), 0.0, 1.0); + let b = clamp(y + 1.772 * u, 0.0, 1.0); + // BT.709 (aka. HDTV, aka. Rec.709). wiki: https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.709_conversion + // let r = clamp(y + 1.5748 * v, 0.0, 1.0); + // let g = clamp(y + u * -0.1873 + v * -0.4681, 0.0, 1.0); + // let b = clamp(y + u * 1.8556, 0.0 , 1.0); return Vec4(r, g, b, 1.0); } diff --git a/crates/re_types/src/datatypes/tensor_data_ext.rs b/crates/re_types/src/datatypes/tensor_data_ext.rs index 13f656d57c2a..309f3f4d2107 100644 --- a/crates/re_types/src/datatypes/tensor_data_ext.rs +++ b/crates/re_types/src/datatypes/tensor_data_ext.rs @@ -203,6 +203,7 @@ impl TensorData { } } + /// Returns decoded RGB8 value at the given image coordinates if this tensor is a valid NV12 image. pub fn get_nv12_pixel(&self, x: u64, y: u64) -> Option<[TensorElement; 3]> { let TensorBuffer::Nv12(buf) = &self.buffer else { return None; diff --git a/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs b/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs index e3c2c4c0aa34..7a0022d47ada 100644 --- a/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs +++ b/crates/re_viewer_context/src/gpu_bridge/tensor_to_gpu.rs @@ -86,7 +86,7 @@ pub fn color_tensor_to_gpu( tensor: &DecodedTensor, tensor_stats: &TensorStats, ) -> anyhow::Result { - let [height, width, depth] = height_width_depth(tensor)?; + let [height, width, depth] = texture_height_width_channels(tensor)?; let texture_handle = try_get_or_create_texture(render_ctx, hash(tensor_path_hash), || { let (data, format) = match (depth, &tensor.buffer) { @@ -181,7 +181,7 @@ pub fn class_id_tensor_to_gpu( tensor_stats: &TensorStats, annotations: &Annotations, ) -> anyhow::Result { - let [_height, _width, depth] = height_width_depth(tensor)?; + let [_height, _width, depth] = texture_height_width_channels(tensor)?; anyhow::ensure!( depth == 1, "Cannot apply annotations to tensor of shape {:?}", @@ -253,7 +253,7 @@ pub fn depth_tensor_to_gpu( tensor: &DecodedTensor, tensor_stats: &TensorStats, ) -> anyhow::Result { - let [_height, _width, depth] = height_width_depth(tensor)?; + let [_height, _width, depth] = texture_height_width_channels(tensor)?; anyhow::ensure!( depth == 1, "Depth tensor of weird shape: {:?}", @@ -314,7 +314,7 @@ fn general_texture_creation_desc_from_tensor<'a>( debug_name: &str, tensor: &'a DecodedTensor, ) -> anyhow::Result> { - let [height, width, depth] = height_width_depth(tensor)?; + let [height, width, depth] = texture_height_width_channels(tensor)?; let (data, format) = match depth { 1 => { @@ -510,13 +510,14 @@ fn pad_and_narrow_and_cast( // ----------------------------------------------------------------------------; -fn height_width_depth(tensor: &TensorData) -> anyhow::Result<[u32; 3]> { +fn texture_height_width_channels(tensor: &TensorData) -> anyhow::Result<[u32; 3]> { use anyhow::Context as _; let Some([mut height, width, channel]) = tensor.image_height_width_channels() else { anyhow::bail!("Tensor is not an image"); }; height = match tensor.buffer { + // Correct the texture height for NV12, tensor.image_height_width_channels returns the RGB size for NV12 images. The actual texture size has dimensions (h*3/2, w, 1). TensorBuffer::Nv12(_) => height * 3 / 2, _ => height, }; From 4e786a6cd8b7625b258bcc8fa87475357f475e23 Mon Sep 17 00:00:00 2001 From: zrezke Date: Sat, 14 Oct 2023 12:31:36 +0200 Subject: [PATCH 08/20] Replace width, height parameters of ImageFormat.NV12 with a single size_hint. --- crates/re_renderer/shader/decodings.wgsl | 1 + rerun_py/rerun_sdk/rerun/_image.py | 33 ++++++++++++------------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/crates/re_renderer/shader/decodings.wgsl b/crates/re_renderer/shader/decodings.wgsl index 17fe3bc628df..ef35725b1f29 100644 --- a/crates/re_renderer/shader/decodings.wgsl +++ b/crates/re_renderer/shader/decodings.wgsl @@ -12,6 +12,7 @@ fn decode_nv12(texture: texture_2d, coords: IVec2) -> Vec4 { let u = (f32(textureLoad(texture, UVec2(u32(uv_col), uv_offset + uv_row), 0).r) - 128.0) / 224.0; let v = (f32(textureLoad(texture, UVec2((u32(uv_col) + 1u), uv_offset + uv_row), 0).r) - 128.0) / 224.0; + // Specifying the color standard should be exposed in the future (https://github.com/rerun-io/rerun/pull/3541) // BT.601 (aka. SDTV, aka. Rec.601). wiki: https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion let r = clamp(y + 1.402 * v, 0.0, 1.0); let g = clamp(y - (0.344 * u + 0.714 * v), 0.0, 1.0); diff --git a/rerun_py/rerun_sdk/rerun/_image.py b/rerun_py/rerun_sdk/rerun/_image.py index 6b09bbb9c940..7ef64b041ecd 100644 --- a/rerun_py/rerun_sdk/rerun/_image.py +++ b/rerun_py/rerun_sdk/rerun/_image.py @@ -2,7 +2,7 @@ import io import pathlib -from typing import IO, Iterable +from typing import IO, Iterable, Tuple import numpy as np from PIL import Image as PILImage @@ -38,12 +38,18 @@ class NV12(ImageFormat): """NV12 format.""" name = "NV12" + size_hint: Tuple[int, int] - def __init__(self, width: int | None = None, height: int | None = None) -> None: - if width is None and height is None: - raise ValueError("Must provide width or height") - self.width = width - self.height = height + def __init__(self, size_hint: Tuple[int, int]) -> None: + """ + An NV12 encoded image. + + Parameters + ---------- + size_hint: + A tuple of (height, width), specifying the RGB size of the image + """ + self.size_hint = size_hint # Assign the variants @@ -111,18 +117,11 @@ def __init__( if format is not None: if isinstance(format, NV12): np_buf = np.frombuffer(buffer.read(), dtype=np.uint8) - height = format.height - width = format.width - if height is None and width is None: - raise ValueError("Must provide width or height") - elif height is None and width is not None: - height = int(np_buf.size / (width * 1.5)) - elif width is None and height is not None: - width = int(np_buf.size / (height * 1.5)) - assert height is not None and width is not None - np_buf = np_buf.reshape(int(height * 1.5), width, 1) + np_buf = np_buf.reshape(int(format.size_hint[0] * 1.5), format.size_hint[1]) + tensor_buffer = TensorBuffer(np_buf) + tensor_buffer.kind = "nv12" self.data = TensorData( - buffer=TensorBuffer(np_buf, kind="nv12"), + buffer=tensor_buffer, shape=[ TensorDimension(np_buf.shape[0], "height"), TensorDimension(np_buf.shape[1], "width"), From 5e877d98ffede3bc5ef4f74c9b9738a789e01bd1 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 16 Oct 2023 10:53:56 +0200 Subject: [PATCH 09/20] Fix lacking ndarray conversion, fix compile warnings --- crates/re_data_ui/src/image.rs | 4 ++-- crates/re_renderer/src/renderer/rectangles.rs | 2 +- .../re_types/src/datatypes/tensor_data_ext.rs | 24 +++++++++++++------ 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/crates/re_data_ui/src/image.rs b/crates/re_data_ui/src/image.rs index 7904a228612d..4dd25e351a89 100644 --- a/crates/re_data_ui/src/image.rs +++ b/crates/re_data_ui/src/image.rs @@ -706,11 +706,11 @@ fn tensor_pixel_value_ui( if let Some([r, g, b]) = match &tensor.buffer { TensorBuffer::Nv12(_) => tensor.get_nv12_pixel(x, y), _ => { - if let (Some(r), Some(g), Some(b)) = ( + if let [Some(r), Some(g), Some(b)] = [ tensor.get_with_image_coords(x, y, 0), tensor.get_with_image_coords(x, y, 1), tensor.get_with_image_coords(x, y, 2), - ) { + ] { Some([r, g, b]) } else { None diff --git a/crates/re_renderer/src/renderer/rectangles.rs b/crates/re_renderer/src/renderer/rectangles.rs index c55b92c9e1e7..3fea5f64ae92 100644 --- a/crates/re_renderer/src/renderer/rectangles.rs +++ b/crates/re_renderer/src/renderer/rectangles.rs @@ -49,7 +49,7 @@ pub enum TextureFilterMin { } /// Describes how the color information is encoded in the texture. -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum ShaderDecoding { Nv12, } diff --git a/crates/re_types/src/datatypes/tensor_data_ext.rs b/crates/re_types/src/datatypes/tensor_data_ext.rs index 309f3f4d2107..218611b1db7e 100644 --- a/crates/re_types/src/datatypes/tensor_data_ext.rs +++ b/crates/re_types/src/datatypes/tensor_data_ext.rs @@ -1,7 +1,4 @@ -use crate::{ - archetypes::Tensor, - tensor_data::{TensorCastError, TensorDataType, TensorElement}, -}; +use crate::tensor_data::{TensorCastError, TensorDataType, TensorElement}; #[cfg(feature = "image")] use crate::tensor_data::{DecodedTensor, TensorImageLoadError, TensorImageSaveError}; @@ -210,7 +207,7 @@ impl TensorData { }; match self.image_height_width_channels() { Some([h, w, _]) => { - let uv_offset = (w * h) as u64; + let uv_offset = w * h; let luma = ((buf[(y * w + x) as usize] as f64) - 16.0) / 216.0; let u = ((buf[(uv_offset + (y / 2) * w + x) as usize] as f64) - 128.0) / 224.0; let v = @@ -242,7 +239,7 @@ impl TensorData { // ---------------------------------------------------------------------------- -macro_rules! tensor_type { +macro_rules! ndarray_from_tensor { ($type:ty, $variant:ident) => { impl<'a> TryFrom<&'a TensorData> for ::ndarray::ArrayViewD<'a, $type> { type Error = TensorCastError; @@ -258,7 +255,11 @@ macro_rules! tensor_type { } } } + }; +} +macro_rules! tensor_from_ndarray { + ($type:ty, $variant:ident) => { impl<'a, D: ::ndarray::Dimension> TryFrom<::ndarray::ArrayView<'a, $type, D>> for TensorData { @@ -323,6 +324,13 @@ macro_rules! tensor_type { }; } +macro_rules! tensor_type { + ($type:ty, $variant:ident) => { + ndarray_from_tensor!($type, $variant); + tensor_from_ndarray!($type, $variant); + }; +} + tensor_type!(u16, U16); tensor_type!(u32, U32); tensor_type!(u64, U64); @@ -337,7 +345,9 @@ tensor_type!(arrow2::types::f16, F16); tensor_type!(f32, F32); tensor_type!(f64, F64); -// Manual expension of tensor_type! macro for `u8` types. We need to do this, because u8 can store encoded data +tensor_from_ndarray!(u8, U8); + +// Manual expansion of ndarray_from_tensor! macro for `u8` types. We need to do this, because u8 can store encoded data impl<'a> TryFrom<&'a TensorData> for ::ndarray::ArrayViewD<'a, u8> { type Error = TensorCastError; From 4ee752365f3647371ca44f5c0f08b0a7920c5469 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 16 Oct 2023 11:03:50 +0200 Subject: [PATCH 10/20] python formatting --- rerun_py/rerun_sdk/rerun/_image.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rerun_py/rerun_sdk/rerun/_image.py b/rerun_py/rerun_sdk/rerun/_image.py index 7ef64b041ecd..3bc106c0db4d 100644 --- a/rerun_py/rerun_sdk/rerun/_image.py +++ b/rerun_py/rerun_sdk/rerun/_image.py @@ -2,7 +2,7 @@ import io import pathlib -from typing import IO, Iterable, Tuple +from typing import IO, Iterable import numpy as np from PIL import Image as PILImage @@ -38,9 +38,9 @@ class NV12(ImageFormat): """NV12 format.""" name = "NV12" - size_hint: Tuple[int, int] + size_hint: tuple[int, int] - def __init__(self, size_hint: Tuple[int, int]) -> None: + def __init__(self, size_hint: tuple[int, int]) -> None: """ An NV12 encoded image. From 276ce9288e82134d854b7d8a959e8e9892c67ef1 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 16 Oct 2023 11:04:05 +0200 Subject: [PATCH 11/20] nv12 image test --- tests/python/nv12image/main.py | 49 +++++++++++++++++++++++++ tests/python/nv12image/requirements.txt | 3 ++ tests/python/requirements.txt | 1 + 3 files changed, 53 insertions(+) create mode 100755 tests/python/nv12image/main.py create mode 100644 tests/python/nv12image/requirements.txt diff --git a/tests/python/nv12image/main.py b/tests/python/nv12image/main.py new file mode 100755 index 000000000000..20da9d24294c --- /dev/null +++ b/tests/python/nv12image/main.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""Testing NV12 image encoding.""" +from __future__ import annotations + +import argparse +import os +from typing import Any + +import cv2 +import numpy as np +import rerun + + +def bgra2nv12(bgra: Any) -> np.ndarray: + yuv = cv2.cvtColor(bgra, cv2.COLOR_BGRA2YUV_I420) + uv_row_cnt = yuv.shape[0] // 3 + uv_plane = np.transpose(yuv[uv_row_cnt * 2 :].reshape(2, -1), [1, 0]) + yuv[uv_row_cnt * 2 :] = uv_plane.reshape(uv_row_cnt, -1) + return yuv + + +def main() -> None: + parser = argparse.ArgumentParser(description="Displaying NV12 encoded images.") + rerun.script_add_args(parser) + args = parser.parse_args() + + rerun.script_setup(args, "rerun_test_nv12image") + + # Make sure you use a colorful image! + dir_path = os.path.dirname(os.path.realpath(__file__)) + img_path = f"{dir_path}/../../../crates/re_ui/data/logo_dark_mode.png" + img_bgra = cv2.imread(img_path, cv2.IMREAD_UNCHANGED) + + img_rgb = cv2.cvtColor(img_bgra, cv2.COLOR_BGRA2RGB) + rerun.log("img_reference", rerun.Image(img_rgb)) + + rerun.log( + "img_nv12", + rerun.ImageEncoded( + contents=bytes(bgra2nv12(img_bgra)), + format=rerun.ImageFormat.NV12((img_bgra.shape[0], img_bgra.shape[1])), + ), + ) + + rerun.script_teardown(args) + + +if __name__ == "__main__": + main() diff --git a/tests/python/nv12image/requirements.txt b/tests/python/nv12image/requirements.txt new file mode 100644 index 000000000000..4364766d7369 --- /dev/null +++ b/tests/python/nv12image/requirements.txt @@ -0,0 +1,3 @@ +numpy +opencv-python +rerun-sdk diff --git a/tests/python/requirements.txt b/tests/python/requirements.txt index 2da31733cced..a9fce2d3701c 100644 --- a/tests/python/requirements.txt +++ b/tests/python/requirements.txt @@ -1 +1,2 @@ -r test_api/requirements.txt +-r nv12image/requirements.txt From ce23bee9bcf0b137c1e0471e8cf5870222ba3787 Mon Sep 17 00:00:00 2001 From: zrezke Date: Mon, 16 Oct 2023 16:16:35 +0200 Subject: [PATCH 12/20] Added python example for logging nv12 encoded images. --- examples/python/nv12/README.md | 19 ++++++++ examples/python/nv12/main.py | 65 +++++++++++++++++++++++++++ examples/python/nv12/requirements.txt | 3 ++ 3 files changed, 87 insertions(+) create mode 100644 examples/python/nv12/README.md create mode 100755 examples/python/nv12/main.py create mode 100644 examples/python/nv12/requirements.txt diff --git a/examples/python/nv12/README.md b/examples/python/nv12/README.md new file mode 100644 index 000000000000..94431bbbc557 --- /dev/null +++ b/examples/python/nv12/README.md @@ -0,0 +1,19 @@ +--- +title: Template +tags: [kebab-case, comma, separated] +description: "Short ~100-sign description of the example. No longer than 130 signs!" +python: https://github.com/rerun-io/rerun/tree/latest/examples/python/template/main.py +rust: https://github.com/rerun-io/rerun/tree/latest/examples/rust/template/src/main.rs +--- + + + +This example displays an NV12 encoded video stream from a webcam in rerun. + +```bash +pip install -r examples/python/nv12/requirements.txt +python examples/python/nv12/main.py +``` diff --git a/examples/python/nv12/main.py b/examples/python/nv12/main.py new file mode 100755 index 000000000000..f93e72141445 --- /dev/null +++ b/examples/python/nv12/main.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +""" +Stream NV12 images from a webcam. + +Run: +```sh +pip install -r examples/python/nv12/requirements.txt +python examples/python/nv12/main.py +``` +""" +from __future__ import annotations + +import argparse + +import rerun as rr # pip install rerun-sdk +import cv2 +import numpy as np +import time + + +def bgr2nv12(bgr: np.ndarray) -> np.ndarray: + yuv = cv2.cvtColor(bgr, cv2.COLOR_RGB2YUV_I420) + uv_row_cnt = yuv.shape[0] // 3 + uv_plane = np.transpose(yuv[uv_row_cnt * 2 :].reshape(2, -1), [1, 0]) + yuv[uv_row_cnt * 2 :] = uv_plane.reshape(uv_row_cnt, -1) + return yuv + + +def main() -> None: + parser = argparse.ArgumentParser(description="Example of using the Rerun visualizer to display NV12 images.") + rr.script_add_args(parser) + parser.add_argument( + "-t", + "--timeout", + type=float, + default=5, + help="Timeout in seconds, after which the script will stop streaming frames.", + ) + args = parser.parse_args() + + rr.script_setup(args, "NV12 image example") + + cap = cv2.VideoCapture(0) + if not cap.isOpened(): + raise RuntimeError("This example requires a webcam.") + start_time = time.time() + print(f"Started streaming NV12 images for {args.timeout} seconds.") + while start_time + args.timeout > time.time(): + ret, frame = cap.read() + if not ret: + time.sleep(0.01) + continue + rr.log( + "NV12", + rr.ImageEncoded( + contents=bytes(bgr2nv12(frame)), + format=rr.ImageFormat.NV12((frame.shape[0], frame.shape[1])), + ), + ) + time.sleep(0.01) + rr.script_teardown(args) + + +if __name__ == "__main__": + main() diff --git a/examples/python/nv12/requirements.txt b/examples/python/nv12/requirements.txt new file mode 100644 index 000000000000..4aa030be3b15 --- /dev/null +++ b/examples/python/nv12/requirements.txt @@ -0,0 +1,3 @@ +rerun-sdk>=0.10 +opencv-python +numpy From 21aa188650a8cbdb74440fcba98c5f051e445691 Mon Sep 17 00:00:00 2001 From: zrezke Date: Mon, 16 Oct 2023 16:35:32 +0200 Subject: [PATCH 13/20] Fill in example readme --- examples/python/nv12/README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/python/nv12/README.md b/examples/python/nv12/README.md index 94431bbbc557..207ede3e0771 100644 --- a/examples/python/nv12/README.md +++ b/examples/python/nv12/README.md @@ -1,9 +1,8 @@ --- -title: Template -tags: [kebab-case, comma, separated] -description: "Short ~100-sign description of the example. No longer than 130 signs!" -python: https://github.com/rerun-io/rerun/tree/latest/examples/python/template/main.py -rust: https://github.com/rerun-io/rerun/tree/latest/examples/rust/template/src/main.rs +title: NV12 +tags: [2d, image-encoding, yuv] +description: "Visualize an NV12 encoded video stream from a webcam." +python: https://github.com/rerun-io/rerun/tree/latest/examples/python/nv12/main.py ---