From 8f4f4e9f993597868b8504f583a4e2c926da7e03 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 15 Dec 2023 14:51:57 +0100 Subject: [PATCH] `DataLoader`s 2: add text-based `DataLoader` (`.txt`, `.md`) (#4518) What the title says. ![image](https://github.com/rerun-io/rerun/assets/2910679/68f2e499-f4df-4e75-95f6-0ed7f479c5e6) Checks: - [x] `cargo r -p rerun-cli --no-default-features --features native_viewer -- examples/assets/example.{glb,gltf,obj,jpg,png,rrd,txt,md}` - [x] Native: `File > Open > examples/assets/*` - [x] Native: `Drag-n-drop > examples/assets/*` - [x] Web: `File > Open > examples/assets/*` - [x] Web: `Drag-n-drop > examples/assets/*` --- Part of a series of PRs to make it possible to load _any_ file from the local filesystem, by any means, on web and native: - #4516 - #4517 - #4518 - #4519 - #4520 - #4521 - TODO: register custom loaders - TODO: high level docs and guides for everything related to loading files --- .typos.toml | 1 + .../src/data_loader/loader_archetype.rs | 32 ++++++++++++++++- crates/re_data_source/src/data_loader/mod.rs | 2 ++ crates/re_data_source/src/lib.rs | 11 +++--- crates/re_types/Cargo.toml | 2 +- crates/re_types/src/archetypes/image_ext.rs | 6 ++-- crates/re_types/src/archetypes/mod.rs | 1 + .../src/archetypes/text_document_ext.rs | 35 +++++++++++++++++++ docs/cspell.json | 3 +- examples/assets/example.md | 21 +++++++++++ examples/assets/example.txt | 9 +++++ scripts/lint.py | 5 +-- 12 files changed, 115 insertions(+), 13 deletions(-) create mode 100644 crates/re_types/src/archetypes/text_document_ext.rs create mode 100644 examples/assets/example.md create mode 100644 examples/assets/example.txt diff --git a/.typos.toml b/.typos.toml index c03ed6d4a537..de2f439fc0ec 100644 --- a/.typos.toml +++ b/.typos.toml @@ -7,6 +7,7 @@ extend-exclude = [ ".typos.toml", "crates/re_ui/data/design_tokens.json", "crates/re_ui/src/design_tokens.rs", + "examples/assets", ] diff --git a/crates/re_data_source/src/data_loader/loader_archetype.rs b/crates/re_data_source/src/data_loader/loader_archetype.rs index ebf0f645663e..dafbde2c6c17 100644 --- a/crates/re_data_source/src/data_loader/loader_archetype.rs +++ b/crates/re_data_source/src/data_loader/loader_archetype.rs @@ -96,7 +96,15 @@ impl DataLoader for ArchetypeLoader { entity_path, contents.into_owned(), )?); - }; + } else if crate::SUPPORTED_TEXT_EXTENSIONS.contains(&extension.as_str()) { + re_log::debug!(?filepath, loader = self.name(), "Loading text document…",); + rows.extend(load_text_document( + filepath, + timepoint, + entity_path, + contents.into_owned(), + )?); + } for row in rows { if tx.send(row.into()).is_err() { @@ -153,3 +161,25 @@ fn load_image( Ok(rows.into_iter()) } + +fn load_text_document( + filepath: std::path::PathBuf, + timepoint: TimePoint, + entity_path: EntityPath, + contents: Vec, +) -> Result, DataLoaderError> { + re_tracing::profile_function!(); + + let rows = [ + { + let arch = re_types::archetypes::TextDocument::from_file_contents( + contents, + re_types::components::MediaType::guess_from_path(filepath), + )?; + DataRow::from_archetype(RowId::new(), timepoint, entity_path, &arch)? + }, + // + ]; + + Ok(rows.into_iter()) +} diff --git a/crates/re_data_source/src/data_loader/mod.rs b/crates/re_data_source/src/data_loader/mod.rs index f0a4ef31c95f..530d3350a97a 100644 --- a/crates/re_data_source/src/data_loader/mod.rs +++ b/crates/re_data_source/src/data_loader/mod.rs @@ -26,6 +26,7 @@ use re_log_types::{ArrowMsg, DataRow, LogMsg}; /// - [`ArchetypeLoader`] for: /// - [3D models] /// - [Images] +/// - [Text files] /// /// ## Execution /// @@ -38,6 +39,7 @@ use re_log_types::{ArrowMsg, DataRow, LogMsg}; /// [Rerun extensions]: crate::SUPPORTED_RERUN_EXTENSIONS /// [3D models]: crate::SUPPORTED_MESH_EXTENSIONS /// [Images]: crate::SUPPORTED_IMAGE_EXTENSIONS +/// [Text files]: crate::SUPPORTED_TEXT_EXTENSIONS // // TODO(#4525): `DataLoader`s should support arbitrary URIs // TODO(#4526): `DataLoader`s should be exposed to the SDKs diff --git a/crates/re_data_source/src/lib.rs b/crates/re_data_source/src/lib.rs index 8d169c2be269..791c80712ae1 100644 --- a/crates/re_data_source/src/lib.rs +++ b/crates/re_data_source/src/lib.rs @@ -4,11 +4,7 @@ //! - Over WebSockets //! - From disk //! -//! Also handles different file types: -//! -//! - .rrd -//! - images -//! - meshes +//! Also handles different file types: rrd, images, text files, 3D models, point clouds… mod data_loader; mod data_source; @@ -51,12 +47,16 @@ pub const SUPPORTED_MESH_EXTENSIONS: &[&str] = &["glb", "gltf", "obj"]; pub const SUPPORTED_RERUN_EXTENSIONS: &[&str] = &["rrd"]; +// TODO(#4555): Add catch-all builtin `DataLoader` for text files +pub const SUPPORTED_TEXT_EXTENSIONS: &[&str] = &["txt", "md"]; + /// All file extension supported by our builtin [`DataLoader`]s. pub fn supported_extensions() -> impl Iterator { SUPPORTED_RERUN_EXTENSIONS .iter() .chain(SUPPORTED_IMAGE_EXTENSIONS) .chain(SUPPORTED_MESH_EXTENSIONS) + .chain(SUPPORTED_TEXT_EXTENSIONS) .copied() } @@ -65,4 +65,5 @@ pub fn is_supported_file_extension(extension: &str) -> bool { SUPPORTED_IMAGE_EXTENSIONS.contains(&extension) || SUPPORTED_MESH_EXTENSIONS.contains(&extension) || SUPPORTED_RERUN_EXTENSIONS.contains(&extension) + || SUPPORTED_TEXT_EXTENSIONS.contains(&extension) } diff --git a/crates/re_types/Cargo.toml b/crates/re_types/Cargo.toml index 12ff2962009c..ff94e96cbd2a 100644 --- a/crates/re_types/Cargo.toml +++ b/crates/re_types/Cargo.toml @@ -23,7 +23,7 @@ features = ["all"] default = [] ## All features except `testing`. -all = ["ecolor", "glam", "serde"] +all = ["ecolor", "glam", "image", "serde"] ## Enables the `datagen` module, which exposes a number of tools for generating random data for ## tests and benchmarks. diff --git a/crates/re_types/src/archetypes/image_ext.rs b/crates/re_types/src/archetypes/image_ext.rs index 2ce43189dc5d..c3b60e095f97 100644 --- a/crates/re_types/src/archetypes/image_ext.rs +++ b/crates/re_types/src/archetypes/image_ext.rs @@ -1,5 +1,3 @@ -use image::ImageFormat; - use crate::{ datatypes::TensorData, image::{find_non_empty_dim_indices, ImageConstructionError}, @@ -49,6 +47,7 @@ impl Image { /// Creates a new [`Image`] from a file. /// /// The image format will be inferred from the path (extension), or the contents if that fails. + #[cfg(feature = "image")] #[cfg(not(target_arch = "wasm32"))] #[inline] pub fn from_file_path(filepath: impl AsRef) -> anyhow::Result { @@ -61,10 +60,11 @@ impl Image { /// Creates a new [`Image`] from the contents of a file. /// /// If unspecified, the image format will be inferred from the contents. + #[cfg(feature = "image")] #[inline] pub fn from_file_contents( contents: Vec, - format: Option, + format: Option, ) -> anyhow::Result { let format = if let Some(format) = format { format diff --git a/crates/re_types/src/archetypes/mod.rs b/crates/re_types/src/archetypes/mod.rs index b1e2676047cd..f940a8594486 100644 --- a/crates/re_types/src/archetypes/mod.rs +++ b/crates/re_types/src/archetypes/mod.rs @@ -28,6 +28,7 @@ mod segmentation_image_ext; mod tensor; mod tensor_ext; mod text_document; +mod text_document_ext; mod text_log; mod time_series_scalar; mod transform3d; diff --git a/crates/re_types/src/archetypes/text_document_ext.rs b/crates/re_types/src/archetypes/text_document_ext.rs new file mode 100644 index 000000000000..30ad37d48156 --- /dev/null +++ b/crates/re_types/src/archetypes/text_document_ext.rs @@ -0,0 +1,35 @@ +use crate::components::MediaType; + +use super::TextDocument; + +impl TextDocument { + /// Creates a new [`TextDocument`] from a utf8 file. + /// + /// The media type will be inferred from the path (extension), or the contents if that fails. + #[cfg(not(target_arch = "wasm32"))] + pub fn from_file_path(filepath: impl AsRef) -> anyhow::Result { + use anyhow::Context as _; + + let filepath = filepath.as_ref(); + let contents = std::fs::read(filepath) + .with_context(|| format!("could not read file contents: {filepath:?}"))?; + Self::from_file_contents(contents, MediaType::guess_from_path(filepath)) + .with_context(|| format!("could not parse file contents: {filepath:?}")) + } + + /// Creates a new [`TextDocument`] from the contents of a utf8 file. + /// + /// If unspecified, the media type will be inferred from the contents. + #[inline] + pub fn from_file_contents( + contents: Vec, + media_type: Option>, + ) -> anyhow::Result { + let media_type = media_type.map(Into::into); + let media_type = MediaType::or_guess_from_data(media_type, &contents); + Ok(Self { + text: String::from_utf8(contents)?.into(), + media_type, + }) + } +} diff --git a/docs/cspell.json b/docs/cspell.json index ed65c18898e7..510b3b67d6e1 100644 --- a/docs/cspell.json +++ b/docs/cspell.json @@ -3,7 +3,8 @@ "version": "0.2", "usePnP": true, "ignorePaths": [ - "node_modules/**" + "node_modules/**", + "../examples/assets/**" ], "ignoreWords": [ "-useb", diff --git a/examples/assets/example.md b/examples/assets/example.md new file mode 100644 index 000000000000..162baef247ec --- /dev/null +++ b/examples/assets/example.md @@ -0,0 +1,21 @@ +# Huge + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque eleifend mi eget tellus pellentesque, sit amet ultricies tortor iaculis. Pellentesque viverra ipsum ut nisl rutrum maximus. Nunc vehicula lectus quis lacinia condimentum. Suspendisse lobortis mattis nisl, id egestas dui condimentum vitae. Integer lacus quam, commodo eget pretium at, scelerisque ac diam. Donec consectetur mauris felis. Proin pharetra, velit in venenatis facilisis, nunc diam blandit justo, sit amet laoreet ex nunc ut mi. Morbi ut tincidunt leo, eu maximus urna. + +## Big + +Aenean odio enim, elementum non nibh at, lacinia pretium elit. Quisque sit amet porta metus. Nunc in arcu turpis. Sed non tristique tellus, eget ultrices arcu. In rhoncus nibh in dolor pellentesque, id suscipit sem volutpat. Suspendisse condimentum tempor ante, sed rutrum eros viverra vitae. Sed vitae vestibulum eros, eu auctor velit. Praesent a efficitur elit. Nulla finibus porttitor tortor nec semper. + +Nulla fermentum est ac convallis bibendum. Ut cursus, libero at sollicitudin laoreet, nunc ante aliquam dolor, quis malesuada mi ligula a sapien. Pellentesque sollicitudin odio a tempor tempus. Ut finibus nulla eget placerat hendrerit. Aenean eu arcu metus. Aliquam erat volutpat. Sed in ullamcorper mauris. Ut sollicitudin nisi fermentum, molestie justo eu, malesuada magna. Proin semper nisi sit amet pulvinar lacinia. Etiam purus magna, accumsan facilisis tellus eu, tincidunt dignissim dui. + +### Less big + +In massa arcu, finibus congue vulputate quis, pulvinar ac est. Morbi felis nibh, cursus ut mi id, rutrum rutrum est. Cras interdum enim non ipsum ornare commodo. Ut blandit, dui quis efficitur eleifend, urna nisl cursus metus, at placerat tortor orci et sem. Morbi sodales felis sed mattis tempus. Vivamus scelerisque dignissim mi. Etiam elementum mattis turpis, id porttitor arcu. Maecenas dui ipsum, scelerisque non molestie eu, hendrerit in justo. In hac habitasse platea dictumst. Curabitur faucibus hendrerit turpis quis gravida. + +Etiam velit mauris, varius in aliquam eu, malesuada eu massa. Nulla eu arcu in velit bibendum volutpat. Nulla sollicitudin lectus nisi, ac efficitur nibh consectetur vitae. Fusce a placerat turpis. Nullam tincidunt sed nulla sed vulputate. In id pharetra libero, congue aliquet justo. In laoreet, odio a interdum fermentum, leo orci efficitur turpis, at tempus diam sem quis diam. Vestibulum ultricies urna eget mi dignissim convallis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nulla ut gravida ante, eget sodales metus. Integer pellentesque tempus magna, a scelerisque mi suscipit sed. Aenean sed malesuada ex. + +#### Tiny + +```rust +"with some code!" +``` diff --git a/examples/assets/example.txt b/examples/assets/example.txt new file mode 100644 index 000000000000..3307469ace44 --- /dev/null +++ b/examples/assets/example.txt @@ -0,0 +1,9 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque eleifend mi eget tellus pellentesque, sit amet ultricies tortor iaculis. Pellentesque viverra ipsum ut nisl rutrum maximus. Nunc vehicula lectus quis lacinia condimentum. Suspendisse lobortis mattis nisl, id egestas dui condimentum vitae. Integer lacus quam, commodo eget pretium at, scelerisque ac diam. Donec consectetur mauris felis. Proin pharetra, velit in venenatis facilisis, nunc diam blandit justo, sit amet laoreet ex nunc ut mi. Morbi ut tincidunt leo, eu maximus urna. + +Aenean odio enim, elementum non nibh at, lacinia pretium elit. Quisque sit amet porta metus. Nunc in arcu turpis. Sed non tristique tellus, eget ultrices arcu. In rhoncus nibh in dolor pellentesque, id suscipit sem volutpat. Suspendisse condimentum tempor ante, sed rutrum eros viverra vitae. Sed vitae vestibulum eros, eu auctor velit. Praesent a efficitur elit. Nulla finibus porttitor tortor nec semper. + +Nulla fermentum est ac convallis bibendum. Ut cursus, libero at sollicitudin laoreet, nunc ante aliquam dolor, quis malesuada mi ligula a sapien. Pellentesque sollicitudin odio a tempor tempus. Ut finibus nulla eget placerat hendrerit. Aenean eu arcu metus. Aliquam erat volutpat. Sed in ullamcorper mauris. Ut sollicitudin nisi fermentum, molestie justo eu, malesuada magna. Proin semper nisi sit amet pulvinar lacinia. Etiam purus magna, accumsan facilisis tellus eu, tincidunt dignissim dui. + +In massa arcu, finibus congue vulputate quis, pulvinar ac est. Morbi felis nibh, cursus ut mi id, rutrum rutrum est. Cras interdum enim non ipsum ornare commodo. Ut blandit, dui quis efficitur eleifend, urna nisl cursus metus, at placerat tortor orci et sem. Morbi sodales felis sed mattis tempus. Vivamus scelerisque dignissim mi. Etiam elementum mattis turpis, id porttitor arcu. Maecenas dui ipsum, scelerisque non molestie eu, hendrerit in justo. In hac habitasse platea dictumst. Curabitur faucibus hendrerit turpis quis gravida. + +Etiam velit mauris, varius in aliquam eu, malesuada eu massa. Nulla eu arcu in velit bibendum volutpat. Nulla sollicitudin lectus nisi, ac efficitur nibh consectetur vitae. Fusce a placerat turpis. Nullam tincidunt sed nulla sed vulputate. In id pharetra libero, congue aliquet justo. In laoreet, odio a interdum fermentum, leo orci efficitur turpis, at tempus diam sem quis diam. Vestibulum ultricies urna eget mi dignissim convallis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nulla ut gravida ante, eget sodales metus. Integer pellentesque tempus magna, a scelerisque mi suscipit sed. Aenean sed malesuada ex. diff --git a/scripts/lint.py b/scripts/lint.py index a4d6c81e66f9..2215556bb447 100755 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -834,16 +834,17 @@ def main() -> None: ] exclude_paths = ( - "./rerun_cpp/docs/html", "./.github/workflows/reusable_checks.yml", # zombie TODO hunting job "./CODE_STYLE.md", "./crates/re_types_builder/src/reflection.rs", # auto-generated + "./examples/assets", "./examples/python/detect_and_track_objects/cache/version.txt", "./examples/rust/objectron/src/objectron.rs", # auto-generated + "./rerun_cpp/docs/doxygen-awesome/", # copied from an external repository + "./rerun_cpp/docs/html", "./scripts/lint.py", # we contain all the patterns we are linting against "./scripts/zombie_todos.py", "./web_viewer/re_viewer.js", # auto-generated by wasm_bindgen - "./rerun_cpp/docs/doxygen-awesome/", # copied from an external repository ) should_ignore = parse_gitignore(".gitignore") # TODO(emilk): parse all .gitignore files, not just top-level