Skip to content

Commit

Permalink
DataLoaders 1: introduce, and migrate to, DataLoaders (#4517)
Browse files Browse the repository at this point in the history
**COMMIT PER COMMIT!**

_That's the nasty one... there's always a nasty one 😬_ 

Introduces the `DataLoader` trait, which can load any file, either from
a filepath by reading from the local filesystem, or from a pre-loaded
file's contents.

`DataLoader`s are run in parallel where possible, and have complete say
over what data they are interested in or not (i.e. they are not
registered based on an extension, which is very limited in practice).

I've decided to commit small example assets for the types we support,
cause I feel we really need a fast way of checking whether everything
still works from time to time.
It's pretty light but might be controversial, I figure.

Checks:
- [x] `cargo r -p rerun-cli --no-default-features --features
native_viewer -- examples/assets/example.{glb,gltf,obj,jpg,png,rrd}`
- [x] Native: `File > Open > examples/assets/*`
- [x] Native: `Drag-n-drop > examples/assets/*`
- [x] Web: `File > Open > examples/assets/*`
- [x] Web: `Drag-n-drop > examples/assets/*`

---

Part of a series of PRs to make it possible to load _any_ file from the
local filesystem, by any means, on web and native:
- #4516
- #4517 
- #4518 
- #4519 
- #4520 
- #4521 
- TODO: register custom loaders
- TODO: high level docs and guides for everything related to loading
files
  • Loading branch information
teh-cmc authored Dec 15, 2023
1 parent 490fc63 commit b51df6a
Show file tree
Hide file tree
Showing 22 changed files with 1,019 additions and 400 deletions.
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion crates/re_data_source/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,15 @@ re_tracing.workspace = true
re_types = { workspace = true, features = ["image"] }
re_ws_comms = { workspace = true, features = ["client"] }

ahash.workspace = true
anyhow.workspace = true
image.workspace = true
itertools.workspace = true
once_cell.workspace = true
parking_lot.workspace = true
rayon.workspace = true
thiserror.workspace = true

walkdir.workspace = true

[build-dependencies]
re_build_tools.workspace = true
155 changes: 155 additions & 0 deletions crates/re_data_source/src/data_loader/loader_archetype.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
use re_log_types::{DataRow, EntityPath, RowId, TimePoint};

use crate::{DataLoader, DataLoaderError, LoadedData};

// ---

/// Loads data from any supported file or in-memory contents as native [`re_types::Archetype`]s.
///
/// This is a simple generic [`DataLoader`] for filetypes that match 1-to-1 with our builtin
/// archetypes.
pub struct ArchetypeLoader;

impl DataLoader for ArchetypeLoader {
#[inline]
fn name(&self) -> String {
"rerun.data_loaders.Archetype".into()
}

#[cfg(not(target_arch = "wasm32"))]
fn load_from_path(
&self,
store_id: re_log_types::StoreId,
filepath: std::path::PathBuf,
tx: std::sync::mpsc::Sender<LoadedData>,
) -> Result<(), crate::DataLoaderError> {
use anyhow::Context as _;

if filepath.is_dir() {
return Ok(()); // simply not interested
}

re_tracing::profile_function!(filepath.display().to_string());

let contents = std::fs::read(&filepath)
.with_context(|| format!("Failed to read file {filepath:?}"))?;
let contents = std::borrow::Cow::Owned(contents);

self.load_from_file_contents(store_id, filepath, contents, tx)
}

fn load_from_file_contents(
&self,
_store_id: re_log_types::StoreId,
filepath: std::path::PathBuf,
contents: std::borrow::Cow<'_, [u8]>,
tx: std::sync::mpsc::Sender<LoadedData>,
) -> Result<(), crate::DataLoaderError> {
re_tracing::profile_function!(filepath.display().to_string());

let entity_path = EntityPath::from_file_path(&filepath);

let mut timepoint = TimePoint::timeless();
// TODO(cmc): log these once heuristics (I think?) are fixed
if false {
if let Ok(metadata) = filepath.metadata() {
use re_log_types::{Time, Timeline};

if let Some(created) = metadata.created().ok().and_then(|t| Time::try_from(t).ok())
{
timepoint.insert(Timeline::new_temporal("created_at"), created.into());
}
if let Some(modified) = metadata
.modified()
.ok()
.and_then(|t| Time::try_from(t).ok())
{
timepoint.insert(Timeline::new_temporal("modified_at"), modified.into());
}
if let Some(accessed) = metadata
.accessed()
.ok()
.and_then(|t| Time::try_from(t).ok())
{
timepoint.insert(Timeline::new_temporal("accessed_at"), accessed.into());
}
}
}

let extension = crate::extension(&filepath);

let mut rows = Vec::new();

if crate::SUPPORTED_MESH_EXTENSIONS.contains(&extension.as_str()) {
re_log::debug!(?filepath, loader = self.name(), "Loading 3D model…",);
rows.extend(load_mesh(
filepath,
timepoint,
entity_path,
contents.into_owned(),
)?);
} else if crate::SUPPORTED_IMAGE_EXTENSIONS.contains(&extension.as_str()) {
re_log::debug!(?filepath, loader = self.name(), "Loading image…",);
rows.extend(load_image(
&filepath,
timepoint,
entity_path,
contents.into_owned(),
)?);
};

for row in rows {
if tx.send(row.into()).is_err() {
break; // The other end has decided to hang up, not our problem.
}
}

Ok(())
}
}

// ---

fn load_mesh(
filepath: std::path::PathBuf,
timepoint: TimePoint,
entity_path: EntityPath,
contents: Vec<u8>,
) -> Result<impl ExactSizeIterator<Item = DataRow>, DataLoaderError> {
re_tracing::profile_function!();

let rows = [
{
let arch = re_types::archetypes::Asset3D::from_file_contents(
contents,
re_types::components::MediaType::guess_from_path(filepath),
);
DataRow::from_archetype(RowId::new(), timepoint, entity_path, &arch)?
},
//
];

Ok(rows.into_iter())
}

fn load_image(
filepath: &std::path::Path,
timepoint: TimePoint,
entity_path: EntityPath,
contents: Vec<u8>,
) -> Result<impl ExactSizeIterator<Item = DataRow>, DataLoaderError> {
re_tracing::profile_function!();

let rows = [
{
let arch = re_types::archetypes::Image::from_file_contents(
contents,
image::ImageFormat::from_path(filepath).ok(),
)?;
DataRow::from_archetype(RowId::new(), timepoint, entity_path, &arch)?
},
//
];

Ok(rows.into_iter())
}
98 changes: 98 additions & 0 deletions crates/re_data_source/src/data_loader/loader_rrd.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
use re_log_encoding::decoder::Decoder;

// ---

/// Loads data from any `rrd` file or in-memory contents.
pub struct RrdLoader;

impl crate::DataLoader for RrdLoader {
#[inline]
fn name(&self) -> String {
"rerun.data_loaders.Rrd".into()
}

#[cfg(not(target_arch = "wasm32"))]
fn load_from_path(
&self,
// NOTE: The Store ID comes from the rrd file itself.
_store_id: re_log_types::StoreId,
filepath: std::path::PathBuf,
tx: std::sync::mpsc::Sender<crate::LoadedData>,
) -> Result<(), crate::DataLoaderError> {
use anyhow::Context as _;

re_tracing::profile_function!(filepath.display().to_string());

let extension = crate::extension(&filepath);
if extension != "rrd" {
return Ok(()); // simply not interested
}

re_log::debug!(
?filepath,
loader = self.name(),
"Loading rrd data from filesystem…",
);

let version_policy = re_log_encoding::decoder::VersionPolicy::Warn;
let file = std::fs::File::open(&filepath)
.with_context(|| format!("Failed to open file {filepath:?}"))?;
let file = std::io::BufReader::new(file);

let decoder = re_log_encoding::decoder::Decoder::new(version_policy, file)?;
decode_and_stream(&filepath, &tx, decoder);

Ok(())
}

fn load_from_file_contents(
&self,
// NOTE: The Store ID comes from the rrd file itself.
_store_id: re_log_types::StoreId,
filepath: std::path::PathBuf,
contents: std::borrow::Cow<'_, [u8]>,
tx: std::sync::mpsc::Sender<crate::LoadedData>,
) -> Result<(), crate::DataLoaderError> {
re_tracing::profile_function!(filepath.display().to_string());

let extension = crate::extension(&filepath);
if extension != "rrd" {
return Ok(()); // simply not interested
}

let version_policy = re_log_encoding::decoder::VersionPolicy::Warn;
let contents = std::io::Cursor::new(contents);
let decoder = match re_log_encoding::decoder::Decoder::new(version_policy, contents) {
Ok(decoder) => decoder,
Err(err) => match err {
// simply not interested
re_log_encoding::decoder::DecodeError::NotAnRrd
| re_log_encoding::decoder::DecodeError::Options(_) => return Ok(()),
_ => return Err(err.into()),
},
};
decode_and_stream(&filepath, &tx, decoder);
Ok(())
}
}

fn decode_and_stream<R: std::io::Read>(
filepath: &std::path::Path,
tx: &std::sync::mpsc::Sender<crate::LoadedData>,
decoder: Decoder<R>,
) {
re_tracing::profile_function!(filepath.display().to_string());

for msg in decoder {
let msg = match msg {
Ok(msg) => msg,
Err(err) => {
re_log::warn_once!("Failed to decode message in {filepath:?}: {err}");
continue;
}
};
if tx.send(msg.into()).is_err() {
break; // The other end has decided to hang up, not our problem.
}
}
}
Loading

0 comments on commit b51df6a

Please sign in to comment.