From 8e2744b8453ffa4e3b6c0a398d17996f9b499c42 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Sun, 11 Jun 2023 17:09:22 +0200 Subject: [PATCH 01/18] more build tools --- crates/re_build_tools/src/hashing.rs | 48 +++++++--------------------- 1 file changed, 11 insertions(+), 37 deletions(-) diff --git a/crates/re_build_tools/src/hashing.rs b/crates/re_build_tools/src/hashing.rs index f734651b8fa4..c8a37035ce03 100644 --- a/crates/re_build_tools/src/hashing.rs +++ b/crates/re_build_tools/src/hashing.rs @@ -5,8 +5,6 @@ use std::{fs, io}; use anyhow::Context as _; use sha2::{Digest, Sha256}; -use crate::{rerun_if_changed, rerun_if_changed_or_doesnt_exist}; - // --- fn encode_hex(bytes: &[u8]) -> String { @@ -19,8 +17,8 @@ fn encode_hex(bytes: &[u8]) -> String { /// Walks the directory at `path` in filename order. /// -/// If `extensions` is specified, only files with the right extensions will be iterated. -/// Specified extensions should _not_ include the leading dot, e.g. `fbs` rather than `.fbs`. +/// If `extensions` is specified, only files with the right extensions will be hashed. +/// Specified extensions should include the dot, e.g. `.fbs`. pub fn iter_dir<'a>( path: impl AsRef, extensions: Option<&'a [&'a str]>, @@ -30,9 +28,9 @@ pub fn iter_dir<'a>( let is_interesting = extensions.map_or(true, |extensions| { extensions.iter().any(|ext| { entry - .path() - .extension() - .map_or(false, |ext2| *ext == ext2.to_string_lossy()) + .file_name() + .to_str() + .map_or(false, |s| s.ends_with(ext)) }) }); is_dir || is_interesting @@ -50,8 +48,6 @@ pub fn iter_dir<'a>( /// Given a file path, computes the sha256 hash of its contents and returns an hexadecimal string /// for it. /// -/// This will automatically emit a `rerun-if-changed` clause for the specified file. -/// /// Panics if the file doesn't exist. pub fn compute_file_hash(path: impl AsRef) -> String { let mut hasher = Sha256::new(); @@ -64,20 +60,14 @@ pub fn compute_file_hash(path: impl AsRef) -> String { .with_context(|| format!("couldn't copy from {path:?}")) .unwrap(); - rerun_if_changed(path); - encode_hex(hasher.finalize().as_slice()) } -/// Given a directory path, computes the sha256 hash of the accumulated contents of all of its -/// files (ordered by filename), and returns an hexadecimal string for it. -/// -/// This includes files in sub-directories (i.e. it's recursive). -/// -/// This will automatically emit a `rerun-if-changed` clause for all the files that were hashed. +/// Given a directory path, computes the sha256 hash of its contents (ordered by filename) and +/// returns an hexadecimal string for it. /// -/// If `extensions` is specified, only files with the right extensions will be iterated. -/// Specified extensions should _not_ include the leading dot, e.g. `fbs` rather than `.fbs`. +/// If `extensions` is specified, only files with the right extensions will be hashed. +/// Specified extensions should include the dot, e.g. `.fbs`. pub fn compute_dir_hash<'a>(path: impl AsRef, extensions: Option<&'a [&'a str]>) -> String { let mut hasher = Sha256::new(); @@ -89,19 +79,13 @@ pub fn compute_dir_hash<'a>(path: impl AsRef, extensions: Option<&'a [&'a io::copy(&mut file, &mut hasher) .with_context(|| format!("couldn't copy from {filepath:?}")) .unwrap(); - - rerun_if_changed(path); } encode_hex(hasher.finalize().as_slice()) } -/// Given a crate name, computes the sha256 hash of its source code (ordered by filename) and +/// Given a crate name, computes the sha256 hash of its source (ordered by filename) and /// returns an hexadecimal string for it. -/// -/// This includes the source code of all its direct and indirect dependencies. -/// -/// This will automatically emit a `rerun-if-changed` clause for all the files that were hashed. pub fn compute_crate_hash(pkg_name: impl AsRef) -> String { use cargo_metadata::{CargoOpt, MetadataCommand}; let metadata = MetadataCommand::new() @@ -138,9 +122,6 @@ pub fn compute_strings_hash(strs: &[&str]) -> String { /// Writes the given `hash` at the specified `path`. /// -/// `hash` should have been computed using of the methods in this module: [`compute_file_hash`], -/// [`compute_dir_hash`], [`compute_crate_hash`]. -/// /// Panics on I/O errors. /// /// Use [`read_versioning_hash`] to read it back. @@ -156,23 +137,16 @@ pub fn write_versioning_hash(path: impl AsRef, hash: impl AsRef) { {hash} " )); - std::fs::write(path, contents.trim()) + std::fs::write(path, contents) .with_context(|| format!("couldn't write to {path:?}")) .unwrap(); } /// Reads back a versioning hash that was written with [`write_versioning_hash`]. /// -/// This will automatically emit a `rerun-if-changed` clause for the specified filepath. -/// /// Returns `None` on error. pub fn read_versioning_hash(path: impl AsRef) -> Option { let path = path.as_ref(); - - // NOTE: It's important we trigger if the file doesn't exist, as this means the user explicitly - // deleted the versioning file, i.e. they're trying to force a rebuild. - rerun_if_changed_or_doesnt_exist(path); - std::fs::read_to_string(path).ok().and_then(|contents| { contents .lines() From 326c1951d1760e3c43370c730b1d5d08f1bacb23 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Sun, 11 Jun 2023 17:31:40 +0200 Subject: [PATCH 02/18] self-review --- crates/re_build_tools/src/hashing.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/crates/re_build_tools/src/hashing.rs b/crates/re_build_tools/src/hashing.rs index c8a37035ce03..1c9f8fd5aed3 100644 --- a/crates/re_build_tools/src/hashing.rs +++ b/crates/re_build_tools/src/hashing.rs @@ -17,7 +17,7 @@ fn encode_hex(bytes: &[u8]) -> String { /// Walks the directory at `path` in filename order. /// -/// If `extensions` is specified, only files with the right extensions will be hashed. +/// If `extensions` is specified, only files with the right extensions will be iterated. /// Specified extensions should include the dot, e.g. `.fbs`. pub fn iter_dir<'a>( path: impl AsRef, @@ -63,8 +63,10 @@ pub fn compute_file_hash(path: impl AsRef) -> String { encode_hex(hasher.finalize().as_slice()) } -/// Given a directory path, computes the sha256 hash of its contents (ordered by filename) and -/// returns an hexadecimal string for it. +/// Given a directory path, computes the sha256 hash of the accumulated contents of all of its +/// files (ordered by filename), and returns an hexadecimal string for it. +/// +/// This includes files in sub-directories (i.e. it's recursive). /// /// If `extensions` is specified, only files with the right extensions will be hashed. /// Specified extensions should include the dot, e.g. `.fbs`. @@ -84,8 +86,10 @@ pub fn compute_dir_hash<'a>(path: impl AsRef, extensions: Option<&'a [&'a encode_hex(hasher.finalize().as_slice()) } -/// Given a crate name, computes the sha256 hash of its source (ordered by filename) and +/// Given a crate name, computes the sha256 hash of its source code (ordered by filename) and /// returns an hexadecimal string for it. +/// +/// This includes the source code of all its direct and indirect dependencies. pub fn compute_crate_hash(pkg_name: impl AsRef) -> String { use cargo_metadata::{CargoOpt, MetadataCommand}; let metadata = MetadataCommand::new() @@ -137,7 +141,7 @@ pub fn write_versioning_hash(path: impl AsRef, hash: impl AsRef) { {hash} " )); - std::fs::write(path, contents) + std::fs::write(path, contents.trim()) .with_context(|| format!("couldn't write to {path:?}")) .unwrap(); } From 40f40594da44722754773bef14fb6985a15fc5db Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Tue, 13 Jun 2023 08:57:06 +0200 Subject: [PATCH 03/18] addressing PR comments --- crates/re_build_tools/src/hashing.rs | 34 +++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/crates/re_build_tools/src/hashing.rs b/crates/re_build_tools/src/hashing.rs index 1c9f8fd5aed3..f734651b8fa4 100644 --- a/crates/re_build_tools/src/hashing.rs +++ b/crates/re_build_tools/src/hashing.rs @@ -5,6 +5,8 @@ use std::{fs, io}; use anyhow::Context as _; use sha2::{Digest, Sha256}; +use crate::{rerun_if_changed, rerun_if_changed_or_doesnt_exist}; + // --- fn encode_hex(bytes: &[u8]) -> String { @@ -18,7 +20,7 @@ fn encode_hex(bytes: &[u8]) -> String { /// Walks the directory at `path` in filename order. /// /// If `extensions` is specified, only files with the right extensions will be iterated. -/// Specified extensions should include the dot, e.g. `.fbs`. +/// Specified extensions should _not_ include the leading dot, e.g. `fbs` rather than `.fbs`. pub fn iter_dir<'a>( path: impl AsRef, extensions: Option<&'a [&'a str]>, @@ -28,9 +30,9 @@ pub fn iter_dir<'a>( let is_interesting = extensions.map_or(true, |extensions| { extensions.iter().any(|ext| { entry - .file_name() - .to_str() - .map_or(false, |s| s.ends_with(ext)) + .path() + .extension() + .map_or(false, |ext2| *ext == ext2.to_string_lossy()) }) }); is_dir || is_interesting @@ -48,6 +50,8 @@ pub fn iter_dir<'a>( /// Given a file path, computes the sha256 hash of its contents and returns an hexadecimal string /// for it. /// +/// This will automatically emit a `rerun-if-changed` clause for the specified file. +/// /// Panics if the file doesn't exist. pub fn compute_file_hash(path: impl AsRef) -> String { let mut hasher = Sha256::new(); @@ -60,6 +64,8 @@ pub fn compute_file_hash(path: impl AsRef) -> String { .with_context(|| format!("couldn't copy from {path:?}")) .unwrap(); + rerun_if_changed(path); + encode_hex(hasher.finalize().as_slice()) } @@ -68,8 +74,10 @@ pub fn compute_file_hash(path: impl AsRef) -> String { /// /// This includes files in sub-directories (i.e. it's recursive). /// -/// If `extensions` is specified, only files with the right extensions will be hashed. -/// Specified extensions should include the dot, e.g. `.fbs`. +/// This will automatically emit a `rerun-if-changed` clause for all the files that were hashed. +/// +/// If `extensions` is specified, only files with the right extensions will be iterated. +/// Specified extensions should _not_ include the leading dot, e.g. `fbs` rather than `.fbs`. pub fn compute_dir_hash<'a>(path: impl AsRef, extensions: Option<&'a [&'a str]>) -> String { let mut hasher = Sha256::new(); @@ -81,6 +89,8 @@ pub fn compute_dir_hash<'a>(path: impl AsRef, extensions: Option<&'a [&'a io::copy(&mut file, &mut hasher) .with_context(|| format!("couldn't copy from {filepath:?}")) .unwrap(); + + rerun_if_changed(path); } encode_hex(hasher.finalize().as_slice()) @@ -90,6 +100,8 @@ pub fn compute_dir_hash<'a>(path: impl AsRef, extensions: Option<&'a [&'a /// returns an hexadecimal string for it. /// /// This includes the source code of all its direct and indirect dependencies. +/// +/// This will automatically emit a `rerun-if-changed` clause for all the files that were hashed. pub fn compute_crate_hash(pkg_name: impl AsRef) -> String { use cargo_metadata::{CargoOpt, MetadataCommand}; let metadata = MetadataCommand::new() @@ -126,6 +138,9 @@ pub fn compute_strings_hash(strs: &[&str]) -> String { /// Writes the given `hash` at the specified `path`. /// +/// `hash` should have been computed using of the methods in this module: [`compute_file_hash`], +/// [`compute_dir_hash`], [`compute_crate_hash`]. +/// /// Panics on I/O errors. /// /// Use [`read_versioning_hash`] to read it back. @@ -148,9 +163,16 @@ pub fn write_versioning_hash(path: impl AsRef, hash: impl AsRef) { /// Reads back a versioning hash that was written with [`write_versioning_hash`]. /// +/// This will automatically emit a `rerun-if-changed` clause for the specified filepath. +/// /// Returns `None` on error. pub fn read_versioning_hash(path: impl AsRef) -> Option { let path = path.as_ref(); + + // NOTE: It's important we trigger if the file doesn't exist, as this means the user explicitly + // deleted the versioning file, i.e. they're trying to force a rebuild. + rerun_if_changed_or_doesnt_exist(path); + std::fs::read_to_string(path).ok().and_then(|contents| { contents .lines() From 0a7f4451dbccffb3166a0e3b22fe85d225e1d1b0 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Sun, 11 Jun 2023 17:44:22 +0200 Subject: [PATCH 04/18] introduce re_types_builder --- Cargo.lock | 29 +- crates/re_types_builder/Cargo.toml | 2 +- crates/re_types_builder/build.rs | 20 +- .../definitions/reflection.fbs | 3 - crates/re_types_builder/src/arrow_registry.rs | 14 +- crates/re_types_builder/src/codegen/common.rs | 19 -- crates/re_types_builder/src/codegen/mod.rs | 7 +- crates/re_types_builder/src/codegen/python.rs | 322 +++++++++--------- crates/re_types_builder/src/codegen/rust.rs | 157 +++++---- crates/re_types_builder/src/lib.rs | 104 +++--- crates/re_types_builder/src/objects.rs | 146 ++++---- 11 files changed, 411 insertions(+), 412 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1558ece32be5..be455137ee2a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3729,7 +3729,7 @@ dependencies = [ "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", + "unindent 0.1.11", ] [[package]] @@ -3938,7 +3938,7 @@ dependencies = [ "glob", "sha2", "time", - "unindent", + "unindent 0.1.11", "walkdir", ] @@ -4227,7 +4227,7 @@ dependencies = [ "thiserror", "tobj", "type-map", - "unindent", + "unindent 0.2.1", "walkdir", "wasm-bindgen-futures", "web-sys", @@ -4507,7 +4507,7 @@ dependencies = [ "flatbuffers", "indent", "re_build_tools", - "unindent", + "unindent 0.2.1", "xshell", ] @@ -5801,6 +5801,12 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" +[[package]] +name = "unindent" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa30f5ea51ff7edfc797c6d3f9ec8cbd8cfedef5371766b7181d33977f4814f" + [[package]] name = "untrusted" version = "0.7.1" @@ -6665,6 +6671,21 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dbabb1cbd15a1d6d12d9ed6b35cc6777d4af87ab3ba155ea37215f20beab80c" +[[package]] +name = "xshell" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "962c039b3a7b16cf4e9a4248397c6585c07547412e7d6a6e035389a802dcfe90" +dependencies = [ + "xshell-macros", +] + +[[package]] +name = "xshell-macros" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dbabb1cbd15a1d6d12d9ed6b35cc6777d4af87ab3ba155ea37215f20beab80c" + [[package]] name = "xxhash-rust" version = "0.8.6" diff --git a/crates/re_types_builder/Cargo.toml b/crates/re_types_builder/Cargo.toml index ffdd334219bd..01b3cc8efc4a 100644 --- a/crates/re_types_builder/Cargo.toml +++ b/crates/re_types_builder/Cargo.toml @@ -25,7 +25,7 @@ arrow2.workspace = true convert_case = "0.6" flatbuffers = "23.0" indent = "0.1" -unindent.workspace = true +unindent = "0.2" xshell = "0.2" diff --git a/crates/re_types_builder/build.rs b/crates/re_types_builder/build.rs index bac5e2d4deb6..199ee8e2a8bb 100644 --- a/crates/re_types_builder/build.rs +++ b/crates/re_types_builder/build.rs @@ -3,7 +3,8 @@ use xshell::{cmd, Shell}; use re_build_tools::{ - compute_file_hash, is_tracked_env_var_set, read_versioning_hash, write_versioning_hash, + compute_file_hash, is_tracked_env_var_set, read_versioning_hash, rerun_if_changed, + rerun_if_changed_or_doesnt_exist, write_versioning_hash, }; // --- @@ -32,14 +33,9 @@ fn main() { return; } - // We're building an actual build graph here, and Cargo has no idea about it. - // - // Worse: some nodes in our build graph actually output artifacts into the src/ directory, - // which Cargo always interprets as "need to rebuild everything ASAP", leading to an infinite - // feedback loop. - // - // For these reasons, we manually compute and track signature hashes for the graph nodes we - // depend on, and make sure to exit early if everything's already up to date. + rerun_if_changed_or_doesnt_exist(SOURCE_HASH_PATH); + rerun_if_changed(FBS_REFLECTION_DEFINITION_PATH); + let cur_hash = read_versioning_hash(SOURCE_HASH_PATH); let new_hash = compute_file_hash(FBS_REFLECTION_DEFINITION_PATH); @@ -54,8 +50,6 @@ fn main() { } } - // NOTE: This requires `flatc` to be in $PATH, but only for contributors, not end users. - // Even for contributors, `flatc` won't be needed unless they edit some of the .fbs files. let sh = Shell::new().unwrap(); cmd!( sh, @@ -66,8 +60,8 @@ fn main() { // NOTE: We're purposefully ignoring the error here. // - // In the very unlikely chance that the user doesn't have the `fmt` component installed, - // there's still no good reason to fail the build. + // In the very unlikely chance that the user doesn't have `rustfmt` in their $PATH, there's + // still no good reason to fail the build. // // The CI will catch the unformatted file at PR time and complain appropriately anyhow. cmd!(sh, "cargo fmt").run().ok(); diff --git a/crates/re_types_builder/definitions/reflection.fbs b/crates/re_types_builder/definitions/reflection.fbs index cf6cb9e3c4e4..513311f1b9c3 100644 --- a/crates/re_types_builder/definitions/reflection.fbs +++ b/crates/re_types_builder/definitions/reflection.fbs @@ -1,6 +1,3 @@ -// Copied verbatim from the official flatbuffers source tree: -// https://github.com/google/flatbuffers/blob/63b7b25289447313ab6e79191fa1733748dca0da/reflection/reflection.fbs - // This schema defines objects that represent a parsed schema, like // the binary version of a .fbs file. // This could be used to operate on unknown FlatBuffers at runtime. diff --git a/crates/re_types_builder/src/arrow_registry.rs b/crates/re_types_builder/src/arrow_registry.rs index 505d0ccd253a..dd07e7fba4d1 100644 --- a/crates/re_types_builder/src/arrow_registry.rs +++ b/crates/re_types_builder/src/arrow_registry.rs @@ -4,7 +4,13 @@ use anyhow::Context as _; use arrow2::datatypes::{DataType, Field, UnionMode}; use std::collections::{BTreeMap, HashMap}; -use crate::{ElementType, Object, Type, ARROW_ATTR_SPARSE_UNION, ARROW_ATTR_TRANSPARENT}; +use crate::{ElementType, Object, Type}; + +// --- + +// TODO(cmc): find a way to extract attr name constants directly from the IDL definitions +pub const ARROW_ATTR_TRANSPARENT: &str = "arrow.attr.transparent"; +pub const ARROW_ATTR_SPARSE_UNION: &str = "arrow.attr.sparse_union"; // --- Registry --- @@ -50,11 +56,12 @@ impl ArrowRegistry { fn arrow_datatype_from_object(&self, obj: &Object) -> LazyDatatype { let is_struct = obj.is_struct(); + let is_transparent = obj.try_get_attr::(ARROW_ATTR_TRANSPARENT).is_some(); let num_fields = obj.fields.len(); assert!( - !is_transparent || (is_struct && num_fields == 1), + !(is_transparent && (!is_struct || num_fields != 1)), "cannot have a transparent arrow object with any number of fields but 1: {:?} has {num_fields}", obj.fqname, ); @@ -163,9 +170,6 @@ impl ArrowRegistry { // --- Field --- /// A yet-to-be-resolved [`arrow2::datatypes::Field`]. -/// -/// Type resolution is a two-pass process as we first need to register all existing types before we -/// can denormalize their definitions into their parents. #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct LazyField { /// Its name diff --git a/crates/re_types_builder/src/codegen/common.rs b/crates/re_types_builder/src/codegen/common.rs index d0bd52a5716c..4317b76c481c 100644 --- a/crates/re_types_builder/src/codegen/common.rs +++ b/crates/re_types_builder/src/codegen/common.rs @@ -39,22 +39,3 @@ pub fn quote_doc_from_docs(docs: &Docs, tags: &[&str]) -> Vec { lines } - -pub trait StringExt { - fn push_text(&mut self, text: impl AsRef, linefeeds: usize, indent: usize) -> &mut Self; - fn push_unindented_text(&mut self, text: impl AsRef, linefeeds: usize) -> &mut Self; -} - -impl StringExt for String { - fn push_text(&mut self, text: impl AsRef, linefeeds: usize, indent: usize) -> &mut Self { - self.push_str(&indent::indent_all_by(indent, text.as_ref())); - self.push_str(&vec!["\n"; linefeeds].join("")); - self - } - - fn push_unindented_text(&mut self, text: impl AsRef, linefeeds: usize) -> &mut Self { - self.push_str(&unindent::unindent(text.as_ref())); - self.push_str(&vec!["\n"; linefeeds].join("")); - self - } -} diff --git a/crates/re_types_builder/src/codegen/mod.rs b/crates/re_types_builder/src/codegen/mod.rs index 240fabd14e9a..49bdc9d9278f 100644 --- a/crates/re_types_builder/src/codegen/mod.rs +++ b/crates/re_types_builder/src/codegen/mod.rs @@ -15,10 +15,15 @@ pub trait CodeGenerator { pub const AUTOGEN_WARNING: &str = "NOTE: This file was autogenerated by re_types_builder; DO NOT EDIT."; +// TODO(cmc): find a way to extract attr name constants directly from the IDL definitions +pub const RERUN_ATTR_COMPONENT_REQUIRED: &str = "rerun.attr.component_required"; +pub const RERUN_ATTR_COMPONENT_RECOMMENDED: &str = "rerun.attr.component_recommended"; +pub const RERUN_ATTR_COMPONENT_OPTIONAL: &str = "rerun.attr.component_optional"; + // --- mod common; -use self::common::{quote_doc_from_docs, StringExt}; +use self::common::quote_doc_from_docs; mod python; mod rust; diff --git a/crates/re_types_builder/src/codegen/python.rs b/crates/re_types_builder/src/codegen/python.rs index a695cbee5970..9b6f85970405 100644 --- a/crates/re_types_builder/src/codegen/python.rs +++ b/crates/re_types_builder/src/codegen/python.rs @@ -8,9 +8,8 @@ use std::{ }; use crate::{ - codegen::{StringExt as _, AUTOGEN_WARNING}, - ArrowRegistry, CodeGenerator, Docs, ElementType, Object, ObjectField, ObjectKind, Objects, - Type, PYTHON_ATTR_ALIASES, PYTHON_ATTR_ARRAY_ALIASES, PYTHON_ATTR_TRANSPARENT, + codegen::AUTOGEN_WARNING, ArrowRegistry, CodeGenerator, Docs, ElementType, Object, ObjectField, + ObjectKind, Objects, Type, }; // --- @@ -18,6 +17,11 @@ use crate::{ // NOTE: `rerun2` while we figure out how to integrate back into the main SDK. const MODULE_NAME: &str = "rerun2"; +// TODO(cmc): find a way to extract attr name constants directly from the IDL definitions +pub const ATTR_TRANSPARENT: &str = "python.attr.transparent"; +pub const ATTR_ALIASES: &str = "python.attr.aliases"; +pub const ATTR_ARRAY_ALIASES: &str = "python.attr.array_aliases"; + pub struct PythonCodeGenerator { pkg_path: PathBuf, } @@ -90,20 +94,17 @@ fn quote_lib(out_path: impl AsRef, archetype_names: &[String]) -> PathBuf .unwrap(); let path = out_path.join("__init__.py"); - let manifest = quote_manifest(archetype_names); let archetype_names = archetype_names.join(", "); let mut code = String::new(); + // NOTE: noqa F401 (unused import) because while unnecessary these listings are + // very useful to look at. code += &unindent::unindent(&format!( r#" # {AUTOGEN_WARNING} - from __future__ import annotations - - __all__ = [{manifest}] - - from .archetypes import {archetype_names} + from .archetypes import {archetype_names} # noqa: F401 "# )); @@ -135,9 +136,16 @@ fn quote_objects( } else { QuotedObject::from_union(arrow_registry, all_objects, obj) }; - let filepath = out_path.join(obj.filepath.file_name().unwrap()); - files.entry(filepath.clone()).or_default().push(obj); + + match files.entry(filepath.clone()) { + std::collections::hash_map::Entry::Occupied(mut entry) => { + entry.get_mut().push(obj); + } + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(vec![obj]); + } + }; } // (module_name, [object_name]) @@ -147,27 +155,25 @@ fn quote_objects( for (filepath, objs) in files { let names = objs .iter() - .flat_map(|obj| match obj.kind { + .map(|obj| match obj.kind { ObjectKind::Datatype | ObjectKind::Component => { let name = &obj.name; - - vec![ - format!("{name}"), - format!("{name}Like"), - format!("{name}Array"), - format!("{name}ArrayLike"), - format!("{name}Type"), - ] + format!("{name}, {name}Like, {name}Array, {name}ArrayLike, {name}Type") } - ObjectKind::Archetype => vec![obj.name.clone()], + ObjectKind::Archetype => obj.name.clone(), }) .collect::>(); // NOTE: Isolating the file stem only works because we're handling datatypes, components // and archetypes separately (and even then it's a bit shady, eh). - mods.entry(filepath.file_stem().unwrap().to_string_lossy().to_string()) - .or_default() - .extend(names.iter().cloned()); + match mods.entry(filepath.file_stem().unwrap().to_string_lossy().to_string()) { + std::collections::hash_map::Entry::Occupied(mut entry) => { + entry.get_mut().extend(names); + } + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(names); + } + }; filepaths.push(filepath.clone()); let mut file = std::fs::File::create(&filepath) @@ -175,23 +181,11 @@ fn quote_objects( .unwrap(); let mut code = String::new(); - code.push_text(&format!("# {AUTOGEN_WARNING}"), 2, 0); - - let manifest = quote_manifest(names); - code.push_unindented_text( - format!( - " - from __future__ import annotations - - __all__ = [{manifest}] - - ", - ), - 0, - ); + code.push_str(&format!("# {AUTOGEN_WARNING}\n\n")); for obj in objs { - code.push_text(&obj.code, 1, 0); + code.push_str(&obj.code); + code.push('\n'); } file.write_all(code.as_bytes()) .with_context(|| format!("{filepath:?}")) @@ -204,25 +198,20 @@ fn quote_objects( let mut code = String::new(); - let manifest = quote_manifest(mods.iter().flat_map(|(_, names)| names.iter())); - - code.push_text(&format!("# {AUTOGEN_WARNING}"), 2, 0); - code.push_unindented_text( - format!( - " - from __future__ import annotations + code.push_str(&format!("# {AUTOGEN_WARNING}\n\n")); + code.push_str(&unindent::unindent( + " + # NOTE: + # - we use fully qualified paths to prevent lazy circular imports + # - `noqa F401` (unused import) everywhere because, while not strictly necessary, + # these imports are very nice for end users. - __all__ = [{manifest}] - - # NOTE: we use fully qualified paths to prevent lazy circular imports. - ", - ), - 0, - ); + ", + )); for (module, names) in &mods { let names = names.join(", "); - code.push_text(&format!("from .{module} import {names}"), 1, 0); + code.push_str(&format!("from .{module} import {names} # noqa: F401\n")); } filepaths.push(path.clone()); @@ -262,28 +251,26 @@ impl QuotedObject { let mut code = String::new(); - code.push_text("e_module_prelude(), 0, 0); + code.push_str("e_module_prelude()); for clause in obj .fields .iter() .filter_map(quote_import_clauses_from_field) { - code.push_text(&clause, 1, 0); + code.push_str(&clause); + code.push('\n'); } - code.push_unindented_text( - format!( - r#" + code.push_str(&unindent::unindent(&format!( + r#" - @dataclass - class {name}: - "# - ), - 0, - ); + @dataclass + class {name}: + "# + ))); - code.push_text(quote_doc_from_docs(docs), 0, 4); + code.push_str(&indent::indent_all_by(4, quote_doc_from_docs(docs))); for field in fields { let ObjectField { @@ -308,25 +295,42 @@ impl QuotedObject { let typ = if field.required { typ } else { - format!("{typ} | None = None") + format!("Optional[{typ}] = None") }; - code.push_text(format!("{name}: {typ}"), 1, 4); + code.push_str(&indent::indent_all_by(4, format!("{name}: {typ}\n"))); - code.push_text(quote_doc_from_docs(docs), 0, 4); + code.push_str(&indent::indent_all_by(4, quote_doc_from_docs(docs))); } - code.push_text(quote_str_repr_from_obj(obj), 1, 4); - code.push_text(quote_array_method_from_obj(objects, obj), 1, 4); - code.push_text(quote_str_method_from_obj(objects, obj), 1, 4); + code.push_str(&indent::indent_all_by(4, quote_str_repr_from_obj(obj))); + code.push('\n'); + + code.push_str(&indent::indent_all_by( + 4, + quote_array_method_from_obj(objects, obj), + )); + code.push('\n'); + + code.push_str(&indent::indent_all_by( + 4, + quote_str_method_from_obj(objects, obj), + )); + code.push('\n'); if obj.kind == ObjectKind::Archetype { - code.push_text(quote_builder_from_obj(objects, obj), 1, 4); + code.push_str(&indent::indent_all_by( + 4, + quote_builder_from_obj(objects, obj), + )); + code.push('\n'); } else { - code.push_text(quote_aliases_from_object(obj), 1, 4); + code.push_str("e_aliases_from_object(obj)); + code.push('\n'); } - code.push_text(quote_arrow_support_from_obj(arrow_registry, obj), 1, 4); + code.push_str("e_arrow_support_from_obj(arrow_registry, obj)); + code.push('\n'); let mut filepath = PathBuf::from(filepath); filepath.set_extension("py"); @@ -356,28 +360,26 @@ impl QuotedObject { let mut code = String::new(); - code.push_text("e_module_prelude(), 0, 0); + code.push_str("e_module_prelude()); for clause in obj .fields .iter() .filter_map(quote_import_clauses_from_field) { - code.push_text(&clause, 1, 0); + code.push_str(&clause); + code.push('\n'); } - code.push_unindented_text( - format!( - r#" + code.push_str(&unindent::unindent(&format!( + r#" - @dataclass - class {name}: - "# - ), - 0, - ); + @dataclass + class {name}: + "# + ))); - code.push_text(quote_doc_from_docs(docs), 0, 4); + code.push_str(&indent::indent_all_by(4, quote_doc_from_docs(docs))); for field in fields { let ObjectField { @@ -394,16 +396,34 @@ impl QuotedObject { let (typ, _) = quote_field_type_from_field(objects, field, false); // NOTE: It's always optional since only one of the fields can be set at a time. - code.push_text(format!("{name}: {typ} | None = None"), 1, 4); + code.push_str(&indent::indent_all_by( + 4, + format!("{name}: Optional[{typ}] = None\n"), + )); - code.push_text(quote_doc_from_docs(docs), 0, 4); + code.push_str(&indent::indent_all_by(4, quote_doc_from_docs(docs))); } - code.push_text(quote_str_repr_from_obj(obj), 1, 4); - code.push_text(quote_array_method_from_obj(objects, obj), 1, 4); - code.push_text(quote_str_method_from_obj(objects, obj), 1, 4); - code.push_text(quote_aliases_from_object(obj), 1, 4); - code.push_text(quote_arrow_support_from_obj(arrow_registry, obj), 1, 4); + code.push_str(&indent::indent_all_by(4, quote_str_repr_from_obj(obj))); + code.push('\n'); + + code.push_str(&indent::indent_all_by( + 4, + quote_array_method_from_obj(objects, obj), + )); + code.push('\n'); + + code.push_str(&indent::indent_all_by( + 4, + quote_str_method_from_obj(objects, obj), + )); + code.push('\n'); + + code.push_str("e_aliases_from_object(obj)); + code.push('\n'); + + code.push_str("e_arrow_support_from_obj(arrow_registry, obj)); + code.push('\n'); let mut filepath = PathBuf::from(filepath); filepath.set_extension("py"); @@ -419,20 +439,12 @@ impl QuotedObject { // --- Code generators --- -fn quote_manifest(names: impl IntoIterator>) -> String { - let mut quoted_names: Vec<_> = names - .into_iter() - .map(|name| format!("{:?}", name.as_ref())) - .collect(); - quoted_names.sort(); - - quoted_names.join(", ") -} - fn quote_module_prelude() -> String { - // NOTE: All the extraneous stuff will be cleaned up courtesy of `ruff`. + // NOTE: All the extraneous stull will be cleaned up courtesy of `ruff`. unindent::unindent( r#" + from __future__ import annotations + import numpy as np import numpy.typing as npt import pyarrow as pa @@ -544,43 +556,38 @@ fn quote_str_method_from_obj(objects: &Objects, obj: &Object) -> String { fn quote_aliases_from_object(obj: &Object) -> String { assert!(obj.kind != ObjectKind::Archetype); - let aliases = obj.try_get_attr::(PYTHON_ATTR_ALIASES); + let aliases = obj.try_get_attr::(ATTR_ALIASES); let array_aliases = obj - .try_get_attr::(PYTHON_ATTR_ARRAY_ALIASES) + .try_get_attr::(ATTR_ARRAY_ALIASES) .unwrap_or_default(); let name = &obj.name; let mut code = String::new(); - code.push_unindented_text( - &if let Some(aliases) = aliases { - format!( - r#" - {name}Like = Union[ - {name}, - {aliases} - ] - "#, - ) - } else { - format!("{name}Like = {name}") - }, - 1, - ); - - code.push_unindented_text( - format!( + code.push_str(&if let Some(aliases) = aliases { + unindent::unindent(&format!( r#" - {name}ArrayLike = Union[ - {name}Like, - Sequence[{name}Like], - {array_aliases} + {name}Like = Union[ + {name}, + {aliases} ] + "#, - ), - 0, - ); + )) + } else { + format!("{name}Like = {name}\n") + }); + + code.push_str(&unindent::unindent(&format!( + r#" + {name}ArrayLike = Union[ + {name}Like, + Sequence[{name}Like], + {array_aliases} + ] + "#, + ))); code } @@ -621,10 +628,6 @@ fn quote_import_clauses_from_field(field: &ObjectField) -> Option { } /// Returns type name as string and whether it was force unwrapped. -/// -/// Specifying `unwrap = true` will unwrap the final type before returning it, e.g. `Vec` -/// becomes just `String`. -/// The returned boolean indicates whether there was anything to unwrap at all. fn quote_field_type_from_field( objects: &Objects, field: &ObjectField, @@ -681,9 +684,7 @@ fn quote_field_type_from_field( // TODO(cmc): it is a bit weird to be doing the transparency logic (which is language // agnostic) in a python specific quoting function... a static helper at the very least // would be nice. - let is_transparent = field - .try_get_attr::(PYTHON_ATTR_TRANSPARENT) - .is_some(); + let is_transparent = field.try_get_attr::(ATTR_TRANSPARENT).is_some(); if is_transparent { let target = objects.get(fqname); assert!( @@ -721,6 +722,7 @@ fn quote_type_from_element_type(typ: &ElementType) -> String { ElementType::Object(fqname) => { let (from, class) = fqname.rsplit_once('.').unwrap_or(("", fqname.as_str())); if from.starts_with("rerun.datatypes") { + // NOTE: Only need the class name, pre-generated import clause takes care of the rest. format!("datatypes.{class}") } else if from.starts_with("rerun.components") { format!("components.{class}") @@ -794,11 +796,11 @@ fn quote_arrow_support_from_obj(arrow_registry: &ArrowRegistry, obj: &Object) -> class {many}(pa.ExtensionArray, {many}Ext): # type: ignore[misc] @staticmethod - def from_similar(data: {many_aliases} | None): + def from_similar(data: Optional[{many_aliases}]): if data is None: return {arrow}().wrap_array(pa.array([], type={arrow}().storage_type)) else: - return {many}Ext._from_similar( + return {many}Ext.from_similar( data, mono={mono}, mono_aliases={mono_aliases}, @@ -806,7 +808,7 @@ fn quote_arrow_support_from_obj(arrow_registry: &ArrowRegistry, obj: &Object) -> many_aliases={many_aliases}, arrow={arrow}, ) - "# + "# )) } ObjectKind::Archetype => String::new(), @@ -849,42 +851,38 @@ fn quote_builder_from_obj(objects: &Objects, obj: &Object) -> String { let (typ, unwrapped) = quote_field_type_from_field(objects, field, true); if unwrapped { // This was originally a vec/array! - format!("{}: {typ}ArrayLike | None = None", field.name) + format!("{}: Optional[{typ}ArrayLike] = None", field.name) } else { - format!("{}: {typ}Like | None = None", field.name) + format!("{}: Optional[{typ}Like] = None", field.name) } }) .collect::>() .join(", "); - code.push_text( - format!("def __init__(self, {required_args}, *, {optional_args}) -> None:"), - 1, - 0, - ); + code.push_str(&format!( + "def __init__(self, {required_args}, *, {optional_args}) -> None:\n" + )); - code.push_text("# Required components", 1, 4); + code.push_str(&indent::indent_all_by(4, "# Required components\n")); for field in required { let name = &field.name; let (typ, _) = quote_field_type_from_field(objects, field, true); - code.push_text( - format!("self.{name} = {typ}Array.from_similar({name})"), - 1, + code.push_str(&indent::indent_all_by( 4, - ); + format!("self.{name} = {typ}Array.from_similar({name})\n"), + )); } code.push('\n'); - code.push_text("# Optional components\n", 1, 4); + code.push_str(&indent::indent_all_by(4, "# Optional components\n")); for field in optional { let name = &field.name; let (typ, _) = quote_field_type_from_field(objects, field, true); - code.push_text( - format!("self.{name} = {typ}Array.from_similar({name})"), - 1, + code.push_str(&indent::indent_all_by( 4, - ); + format!("self.{name} = {typ}Array.from_similar({name})\n"), + )); } code @@ -938,7 +936,11 @@ fn quote_arrow_datatype(datatype: &DataType) -> String { .join(", "); format!("pa.struct([{fields}])") } - DataType::Extension(_, datatype, _) => quote_arrow_datatype(datatype), + DataType::Extension(_, datatype, _) => { + // TODO(cmc): not sure we need all that for the python backend since we already + // do the wrapping trick...? + quote_arrow_datatype(datatype) + } _ => unimplemented!("{datatype:#?}"), // NOLINT } } diff --git a/crates/re_types_builder/src/codegen/rust.rs b/crates/re_types_builder/src/codegen/rust.rs index ae87dedf9398..588c5ebc7663 100644 --- a/crates/re_types_builder/src/codegen/rust.rs +++ b/crates/re_types_builder/src/codegen/rust.rs @@ -8,14 +8,21 @@ use std::{ }; use crate::{ - codegen::{StringExt as _, AUTOGEN_WARNING}, + codegen::{ + AUTOGEN_WARNING, RERUN_ATTR_COMPONENT_OPTIONAL, RERUN_ATTR_COMPONENT_RECOMMENDED, + RERUN_ATTR_COMPONENT_REQUIRED, + }, ArrowRegistry, CodeGenerator, Docs, ElementType, Object, ObjectField, ObjectKind, Objects, - Type, RERUN_ATTR_COMPONENT_OPTIONAL, RERUN_ATTR_COMPONENT_RECOMMENDED, - RERUN_ATTR_COMPONENT_REQUIRED, RUST_ATTR_DERIVE, RUST_ATTR_REPR, RUST_ATTR_TUPLE_STRUCT, + Type, }; // --- +// TODO(cmc): find a way to extract attr name constants directly from the IDL definitions +pub const ATTR_DERIVE: &str = "rust.attr.derive"; +pub const ATTR_REPR: &str = "rust.attr.repr"; +pub const ATTR_TUPLE_STRUCT: &str = "rust.attr.tuple_struct"; + pub struct RustCodeGenerator { crate_path: PathBuf, } @@ -86,7 +93,15 @@ fn quote_objects( }; let filepath = out_path.join(obj.filepath.file_name().unwrap()); - files.entry(filepath.clone()).or_default().push(obj); + + match files.entry(filepath.clone()) { + std::collections::hash_map::Entry::Occupied(mut entry) => { + entry.get_mut().push(obj); + } + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(vec![obj]); + } + }; } // (module_name, [object_name]) @@ -97,9 +112,14 @@ fn quote_objects( // NOTE: Isolating the file stem only works because we're handling datatypes, components // and archetypes separately (and even then it's a bit shady, eh). let names = objs.iter().map(|obj| obj.name.clone()).collect::>(); - mods.entry(filepath.file_stem().unwrap().to_string_lossy().to_string()) - .or_default() - .extend(names); + match mods.entry(filepath.file_stem().unwrap().to_string_lossy().to_string()) { + std::collections::hash_map::Entry::Occupied(mut entry) => { + entry.get_mut().extend(names); + } + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(names); + } + }; filepaths.push(filepath.clone()); let mut file = std::fs::File::create(&filepath) @@ -107,10 +127,11 @@ fn quote_objects( .unwrap(); let mut code = String::new(); - code.push_text(format!("// {AUTOGEN_WARNING}"), 2, 0); + code.push_str(&format!("// {AUTOGEN_WARNING}\n\n")); for obj in objs { - code.push_text(&obj.code, 1, 0); + code.push_str(&obj.code); + code.push('\n'); } file.write_all(code.as_bytes()) .with_context(|| format!("{filepath:?}")) @@ -123,17 +144,17 @@ fn quote_objects( let mut code = String::new(); - code.push_text(format!("// {AUTOGEN_WARNING}"), 2, 0); + code.push_str(&format!("// {AUTOGEN_WARNING}\n\n")); for module in mods.keys() { - code.push_text(format!("mod {module};"), 1, 0); + code.push_str(&format!("mod {module};\n")); - // Detect if someone manually created an extension file, and automatically + // NOTE: detect if someone manually created an extension file, and automatically // import it if so. let mut ext_path = out_path.join(format!("{module}_ext")); ext_path.set_extension("rs"); if ext_path.exists() { - code.push_text(format!("mod {module}_ext;"), 1, 0); + code.push_str(&format!("mod {module}_ext;\n")); } } @@ -141,7 +162,7 @@ fn quote_objects( for (module, names) in &mods { let names = names.join(", "); - code.push_text(format!("pub use self::{module}::{{{names}}};"), 1, 0); + code.push_str(&format!("pub use self::{module}::{{{names}}};\n")); } filepaths.push(path.clone()); @@ -180,21 +201,23 @@ impl QuotedObject { let mut code = String::new(); - code.push_text("e_doc_from_docs(docs), 0, 0); + code.push_str("e_doc_from_docs(docs)); if let Some(clause) = quote_derive_clause_from_obj(obj) { - code.push_text(&clause, 1, 0); + code.push_str(&clause); + code.push('\n'); } if let Some(clause) = quote_repr_clause_from_obj(obj) { - code.push_text(&clause, 1, 0); + code.push_str(&clause); + code.push('\n'); } let is_tuple_struct = is_tuple_struct_from_obj(obj); if is_tuple_struct { - code.push_text(&format!("pub struct {name}("), 0, 0); + code.push_str(&format!("pub struct {name}(")); } else { - code.push_text(&format!("pub struct {name} {{"), 1, 0); + code.push_str(&format!("pub struct {name} {{\n")); } for field in fields { @@ -207,11 +230,11 @@ impl QuotedObject { typ: _, attrs: _, required, - // TODO(#2366): support for deprecation notices + // TODO(cmc): support for deprecation notices deprecated: _, } = field; - code.push_text("e_doc_from_docs(docs), 0, 0); + code.push_str("e_doc_from_docs(docs)); let (typ, _) = quote_field_type_from_field(field, false); let typ = if *required { @@ -221,9 +244,9 @@ impl QuotedObject { }; if is_tuple_struct { - code.push_text(&format!("pub {typ}"), 0, 0); + code.push_str(&format!("pub {typ}")); } else { - code.push_text(&format!("pub {name}: {typ},"), 2, 0); + code.push_str(&format!("pub {name}: {typ},\n\n")); } } @@ -233,10 +256,11 @@ impl QuotedObject { code += "}\n\n"; } - code.push_text("e_trait_impls_from_obj(arrow_registry, obj), 1, 0); + code.push_str("e_trait_impls_from_obj(arrow_registry, obj)); + code.push('\n'); if kind == &ObjectKind::Archetype { - code.push_text("e_builder_from_obj(obj), 0, 0); + code.push_str("e_builder_from_obj(obj)); } let mut filepath = PathBuf::from(filepath); @@ -266,16 +290,18 @@ impl QuotedObject { let mut code = String::new(); - code.push_text("e_doc_from_docs(docs), 0, 0); + code.push_str("e_doc_from_docs(docs)); if let Some(clause) = quote_derive_clause_from_obj(obj) { - code.push_text(&clause, 1, 0); + code.push_str(&clause); + code.push('\n'); } if let Some(clause) = quote_repr_clause_from_obj(obj) { - code.push_text(&clause, 1, 0); + code.push_str(&clause); + code.push('\n'); } - code.push_text(&format!("pub enum {name} {{"), 1, 0); + code.push_str(&format!("pub enum {name} {{\n")); for field in fields { let ObjectField { @@ -290,16 +316,17 @@ impl QuotedObject { deprecated: _, } = field; - code.push_text("e_doc_from_docs(docs), 0, 0); + code.push_str("e_doc_from_docs(docs)); let (typ, _) = quote_field_type_from_field(field, false); - code.push_text(&format!("{name}({typ}),"), 2, 0); + code.push_str(&format!("{name}({typ}),\n\n")); } code += "}\n\n"; - code.push_text("e_trait_impls_from_obj(arrow_registry, obj), 1, 0); + code.push_str("e_trait_impls_from_obj(arrow_registry, obj)); + code.push('\n'); let mut filepath = PathBuf::from(filepath); filepath.set_extension("rs"); @@ -327,14 +354,9 @@ fn quote_doc_from_docs(docs: &Docs) -> String { } /// Returns type name as string and whether it was force unwrapped. -/// -/// Specifying `unwrap = true` will unwrap the final type before returning it, e.g. `Vec` -/// becomes just `String`. -/// The returned boolean indicates whether there was anything to unwrap at all. fn quote_field_type_from_field(field: &ObjectField, unwrap: bool) -> (String, bool) { let mut unwrapped = false; - let typ = &field.typ; - let typ = match typ { + let typ = match &field.typ { Type::UInt8 => "u8".to_owned(), Type::UInt16 => "u16".to_owned(), Type::UInt32 => "u32".to_owned(), @@ -344,9 +366,10 @@ fn quote_field_type_from_field(field: &ObjectField, unwrap: bool) -> (String, bo Type::Int32 => "i32".to_owned(), Type::Int64 => "i64".to_owned(), Type::Bool => "bool".to_owned(), - Type::Float16 => unimplemented!("{typ:#?}"), // NOLINT + Type::Float16 => unimplemented!("ResolvedType::Float16"), // NOLINT Type::Float32 => "f32".to_owned(), Type::Float64 => "f64".to_owned(), + // TODO(cmc): ref for deserialization? Type::String => "String".to_owned(), Type::Array { elem_type, length } => { let typ = quote_type_from_element_type(elem_type); @@ -383,28 +406,29 @@ fn quote_type_from_element_type(typ: &ElementType) -> String { ElementType::Int32 => "i32".to_owned(), ElementType::Int64 => "i64".to_owned(), ElementType::Bool => "bool".to_owned(), - ElementType::Float16 => unimplemented!("{typ:#?}"), // NOLINT + ElementType::Float16 => unimplemented!("ResolvedType::Float16"), // NOLINT ElementType::Float32 => "f32".to_owned(), ElementType::Float64 => "f64".to_owned(), + // TODO(cmc): ref for deserialization? ElementType::String => "String".to_owned(), ElementType::Object(fqname) => fqname.replace('.', "::").replace("rerun", "crate"), } } fn quote_derive_clause_from_obj(obj: &Object) -> Option { - obj.try_get_attr::(RUST_ATTR_DERIVE) + obj.try_get_attr::(ATTR_DERIVE) .map(|what| format!("#[derive({what})]")) } fn quote_repr_clause_from_obj(obj: &Object) -> Option { - obj.try_get_attr::(RUST_ATTR_REPR) + obj.try_get_attr::(ATTR_REPR) .map(|what| format!("#[repr({what})]")) } fn is_tuple_struct_from_obj(obj: &Object) -> bool { obj.is_struct() && obj.fields.len() == 1 - && obj.try_get_attr::(RUST_ATTR_TUPLE_STRUCT).is_some() + && obj.try_get_attr::(ATTR_TUPLE_STRUCT).is_some() } fn quote_trait_impls_from_obj(arrow_registry: &ArrowRegistry, obj: &Object) -> String { @@ -512,7 +536,6 @@ fn quote_trait_impls_from_obj(arrow_registry: &ArrowRegistry, obj: &Object) -> S #[allow(clippy::unimplemented)] fn to_arrow_datatypes() -> Vec {{ - // TODO(#2368): dump the arrow registry into the generated code unimplemented!("query the registry for all fqnames"); // NOLINT }} }} @@ -549,7 +572,7 @@ fn quote_builder_from_obj(obj: &Object) -> String { let mut code = String::new(); - code.push_text(&format!("impl {name} {{"), 1, 0); + code.push_str(&format!("impl {name} {{\n")); { // --- impl new() --- @@ -569,7 +592,7 @@ fn quote_builder_from_obj(obj: &Object) -> String { }) .collect::>() .join(", "); - code.push_text(&format!("pub fn new({new_params}) -> Self {{"), 1, 0); + code.push_str(&format!("pub fn new({new_params}) -> Self {{\n")); { code += "Self {\n"; { @@ -577,20 +600,16 @@ fn quote_builder_from_obj(obj: &Object) -> String { let (_, unwrapped) = quote_field_type_from_field(field, true); if unwrapped { // This was originally a vec/array! - code.push_text( - &format!( - "{}: {}.into_iter().map(Into::into).collect(),", - field.name, field.name - ), - 1, - 0, - ); + code.push_str(&format!( + "{}: {}.into_iter().map(Into::into).collect(),\n", + field.name, field.name + )); } else { - code.push_text(&format!("{}: {}.into(),", field.name, field.name), 1, 0); + code.push_str(&format!("{}: {}.into(),\n", field.name, field.name)); } } for field in &optional { - code.push_text(&format!("{}: None,", field.name), 1, 0); + code.push_str(&format!("{}: None,\n", field.name)); } } code += "}\n"; @@ -605,27 +624,21 @@ fn quote_builder_from_obj(obj: &Object) -> String { if unwrapped { // This was originally a vec/array! - code.push_text(&format!( - "pub fn with_{name}(mut self, {name}: impl IntoIterator>) -> Self {{", - ), 1, 0); + code.push_str(&format!( + "pub fn with_{name}(mut self, {name}: impl IntoIterator>) -> Self {{\n", + )); { - code.push_text( - &format!( - "self.{name} = Some({name}.into_iter().map(Into::into).collect());" - ), - 1, - 0, - ); + code.push_str(&format!( + "self.{name} = Some({name}.into_iter().map(Into::into).collect());\n" + )); code += "self\n"; } } else { - code.push_text( - &format!("pub fn with_{name}(mut self, {name}: impl Into<{typ}>) -> Self {{",), - 1, - 0, - ); + code.push_str(&format!( + "pub fn with_{name}(mut self, {name}: impl Into<{typ}>) -> Self {{\n", + )); { - code.push_text(&format!("self.{name} = Some({name}.into());"), 1, 0); + code.push_str(&format!("self.{name} = Some({name}.into());\n")); code += "self\n"; } } diff --git a/crates/re_types_builder/src/lib.rs b/crates/re_types_builder/src/lib.rs index 71669189d7f6..b15e3704e1b0 100644 --- a/crates/re_types_builder/src/lib.rs +++ b/crates/re_types_builder/src/lib.rs @@ -18,7 +18,7 @@ //! ####. 2. Run the semantic pass. //! //! The semantic pass transforms the low-level raw reflection data generated by the first phase -//! into higher level objects that are much easier to inspect/manipulate and overall friendlier +//! into higher level objects that are much easier to inspect/manipulate and overall friendler //! to work with. //! //! Look for `objects.rs`. @@ -61,8 +61,6 @@ //! //! Make sure to test the behavior of its output though: `re_types`! -// TODO(#2365): support for external IDL definitions - // --- // NOTE: Official generated code from flatbuffers; ignore _everything_. @@ -99,31 +97,12 @@ pub use self::objects::{ Attributes, Docs, ElementType, Object, ObjectField, ObjectKind, Objects, Type, }; -// --- Attributes --- - -pub const ARROW_ATTR_TRANSPARENT: &str = "arrow.attr.transparent"; -pub const ARROW_ATTR_SPARSE_UNION: &str = "arrow.attr.sparse_union"; - -pub const RERUN_ATTR_COMPONENT_REQUIRED: &str = "rerun.attr.component_required"; -pub const RERUN_ATTR_COMPONENT_RECOMMENDED: &str = "rerun.attr.component_recommended"; -pub const RERUN_ATTR_COMPONENT_OPTIONAL: &str = "rerun.attr.component_optional"; - -pub const PYTHON_ATTR_TRANSPARENT: &str = "python.attr.transparent"; -pub const PYTHON_ATTR_ALIASES: &str = "python.attr.aliases"; -pub const PYTHON_ATTR_ARRAY_ALIASES: &str = "python.attr.array_aliases"; - -pub const RUST_ATTR_DERIVE: &str = "rust.attr.derive"; -pub const RUST_ATTR_REPR: &str = "rust.attr.repr"; -pub const RUST_ATTR_TUPLE_STRUCT: &str = "rust.attr.tuple_struct"; - // --- Entrypoints --- use std::path::{Path, PathBuf}; /// Compiles binary reflection dumps from flatbuffers definitions. /// -/// Requires `flatc` available in $PATH. -/// /// Panics on error. /// /// - `include_dir_path`: path to the root directory of the fbs definition tree. @@ -160,14 +139,28 @@ pub fn compile_binary_schemas( .unwrap(); } -/// Handles the first 3 language-agnostic passes of the codegen pipeline: -/// 1. Generate binary reflection dumps for our definitions. -/// 2. Run the semantic pass -/// 3. Compute the Arrow registry -fn generate_lang_agnostic( +/// Generates Rust code from a set of flatbuffers definitions. +/// +/// Panics on error. +/// +/// - `include_dir_path`: path to the root directory of the fbs definition tree. +/// - `output_crate_path`: path to the root of the output crate. +/// - `entrypoint_path`: path to the root file of the fbs definition tree. +/// - `source_hash`: optional sha256 hash of the source definition files. +/// +/// E.g.: +/// ```no_run +/// re_types_builder::generate_rust_code( +/// "./definitions", +/// ".", +/// "./definitions/rerun/archetypes.fbs", +/// ); +/// ``` +pub fn generate_rust_code( include_dir_path: impl AsRef, + output_crate_path: impl AsRef, entrypoint_path: impl AsRef, -) -> (Objects, ArrowRegistry) { +) { use xshell::Shell; let sh = Shell::new().unwrap(); @@ -195,33 +188,6 @@ fn generate_lang_agnostic( arrow_registry.register(obj); } - (objects, arrow_registry) -} - -/// Generates Rust code from a set of flatbuffers definitions. -/// -/// Panics on error. -/// -/// - `include_dir_path`: path to the root directory of the fbs definition tree. -/// - `output_crate_path`: path to the root of the output crate. -/// - `entrypoint_path`: path to the root file of the fbs definition tree. -/// -/// E.g.: -/// ```no_run -/// re_types_builder::generate_rust_code( -/// "./definitions", -/// ".", -/// "./definitions/rerun/archetypes.fbs", -/// ); -/// ``` -pub fn generate_rust_code( - include_dir_path: impl AsRef, - output_crate_path: impl AsRef, - entrypoint_path: impl AsRef, -) { - // passes 1 through 3: bfbs, semantic, arrow registry - let (objects, arrow_registry) = generate_lang_agnostic(include_dir_path, entrypoint_path); - // generate rust code let mut gen = RustCodeGenerator::new(output_crate_path.as_ref()); let _filepaths = gen.quote(&objects, &arrow_registry); @@ -248,8 +214,32 @@ pub fn generate_python_code( output_pkg_path: impl AsRef, entrypoint_path: impl AsRef, ) { - // passes 1 through 3: bfbs, semantic, arrow registry - let (objects, arrow_registry) = generate_lang_agnostic(include_dir_path, entrypoint_path); + use xshell::Shell; + + let sh = Shell::new().unwrap(); + let tmp = sh.create_temp_dir().unwrap(); + + let entrypoint_path = entrypoint_path.as_ref(); + let entrypoint_filename = entrypoint_path.file_name().unwrap(); + + // generate bfbs definitions + compile_binary_schemas(include_dir_path, tmp.path(), entrypoint_path); + + let mut binary_entrypoint_path = PathBuf::from(entrypoint_filename); + binary_entrypoint_path.set_extension("bfbs"); + + // semantic pass: high level objects from low-level reflection data + let objects = Objects::from_buf( + sh.read_binary_file(tmp.path().join(binary_entrypoint_path)) + .unwrap() + .as_slice(), + ); + + // create and fill out arrow registry + let mut arrow_registry = ArrowRegistry::default(); + for obj in objects.ordered_objects(None) { + arrow_registry.register(obj); + } // generate python code let mut gen = PythonCodeGenerator::new(output_pkg_path.as_ref()); diff --git a/crates/re_types_builder/src/objects.rs b/crates/re_types_builder/src/objects.rs index 677779a7c7b3..4f6168c9667d 100644 --- a/crates/re_types_builder/src/objects.rs +++ b/crates/re_types_builder/src/objects.rs @@ -124,22 +124,6 @@ pub enum ObjectKind { Archetype, } -impl ObjectKind { - // TODO(#2364): use an attr instead of the path - pub fn from_pkg_name(pkg_name: impl AsRef) -> Self { - let pkg_name = pkg_name.as_ref(); - if pkg_name.starts_with("rerun.datatypes") { - ObjectKind::Datatype - } else if pkg_name.starts_with("rerun.components") { - ObjectKind::Component - } else if pkg_name.starts_with("rerun.archetypes") { - ObjectKind::Archetype - } else { - panic!("unknown package {pkg_name:?}"); - } - } -} - /// A high-level representation of a flatbuffers object's documentation. #[derive(Debug, Clone)] pub struct Docs { @@ -272,7 +256,18 @@ impl Object { .unwrap(); let docs = Docs::from_raw_docs(obj.documentation()); - let kind = ObjectKind::from_pkg_name(&pkg_name); + + let kind = if pkg_name.starts_with("rerun.datatypes") { + ObjectKind::Datatype + } else if pkg_name.starts_with("rerun.components") { + ObjectKind::Component + } else if pkg_name.starts_with("rerun.archetypes") { + ObjectKind::Archetype + } else { + // TODO(cmc): support IDL definitions from outside the repo + panic!("unknown package {pkg_name:?}"); + }; + let attrs = Attributes::from_raw_attrs(obj.attributes()); let fields = { @@ -317,7 +312,17 @@ impl Object { .unwrap(); let docs = Docs::from_raw_docs(enm.documentation()); - let kind = ObjectKind::from_pkg_name(&pkg_name); + + let kind = if pkg_name.starts_with("rerun.datatypes") { + ObjectKind::Datatype + } else if pkg_name.starts_with("rerun.components") { + ObjectKind::Component + } else if pkg_name.starts_with("rerun.archetypes") { + ObjectKind::Archetype + } else { + // TODO(cmc): support IDL definitions from outside the repo + panic!("unknown package {pkg_name:?}"); + }; let utype = { if enm.underlying_type().base_type() == FbsBaseType::UType { @@ -443,13 +448,12 @@ pub struct ObjectField { /// Whether the field is required. /// - /// Always true for IDL definitions using flatbuffers' `struct` type (as opposed to `table`). + /// Always true for `struct` types. pub required: bool, /// Whether the field is deprecated. // - // TODO(#2366): do something with this - // TODO(#2367): implement custom attr to specify deprecation reason + // TODO(cmc): implement custom attr to specify deprecation reason pub deprecated: bool, } @@ -566,7 +570,7 @@ impl ObjectField { } } -/// The underlying type of an [`ObjectField`]. +/// The underlying type of a `ResolvedObjectField`. #[derive(Debug, Clone)] pub enum Type { UInt8, @@ -592,35 +596,24 @@ pub enum Type { Object(String), // fqname } -impl From for Type { - fn from(typ: ElementType) -> Self { - match typ { - ElementType::UInt8 => Self::UInt8, - ElementType::UInt16 => Self::UInt16, - ElementType::UInt32 => Self::UInt32, - ElementType::UInt64 => Self::UInt64, - ElementType::Int8 => Self::Int8, - ElementType::Int16 => Self::Int16, - ElementType::Int32 => Self::Int32, - ElementType::Int64 => Self::Int64, - ElementType::Bool => Self::Bool, - ElementType::Float16 => Self::Float16, - ElementType::Float32 => Self::Float32, - ElementType::Float64 => Self::Float64, - ElementType::String => Self::String, - ElementType::Object(fqname) => Self::Object(fqname), - } - } -} - impl Type { pub fn from_raw_type( enums: &[FbsEnum<'_>], objs: &[FbsObject<'_>], field_type: FbsType<'_>, ) -> Self { - let typ = field_type.base_type(); - match typ { + fn flatten_scalar_wrappers(obj: &FbsObject<'_>) -> Type { + if obj.name().starts_with("fbs.scalars.") { + match obj.name() { + "fbs.scalars.Float32" => Type::Float32, + _ => unimplemented!(), // NOLINT + } + } else { + Type::Object(obj.name().to_owned()) + } + } + + match field_type.base_type() { FbsBaseType::Bool => Self::Bool, FbsBaseType::Byte => Self::Int8, FbsBaseType::UByte => Self::UInt8, @@ -636,12 +629,14 @@ impl Type { FbsBaseType::String => Self::String, FbsBaseType::Obj => { let obj = &objs[field_type.index() as usize]; - flatten_scalar_wrappers(obj).into() + flatten_scalar_wrappers(obj) } FbsBaseType::Union => { let union = &enums[field_type.index() as usize]; Self::Object(union.name().to_owned()) } + // NOTE: flatbuffers doesn't support directly nesting multiple layers of arrays, they + // always have to be wrapped into intermediate layers of structs or tables. FbsBaseType::Array => Self::Array { elem_type: ElementType::from_raw_base_type( enums, @@ -659,11 +654,10 @@ impl Type { field_type.element(), ), }, - FbsBaseType::None | FbsBaseType::UType | FbsBaseType::Vector64 => { - unimplemented!("{typ:#?}") // NOLINT - } // NOLINT - // NOTE: `FbsBaseType` isn't actually an enum, it's just a bunch of constants... - _ => unreachable!("{typ:#?}"), + FbsBaseType::None => unimplemented!(), // NOLINT + FbsBaseType::UType => unimplemented!(), // NOLINT + FbsBaseType::Vector64 => unimplemented!(), // NOLINT + _ => unreachable!(), } } } @@ -697,7 +691,18 @@ impl ElementType { outer_type: FbsType<'_>, inner_type: FbsBaseType, ) -> Self { - #[allow(clippy::match_same_arms)] + /// Helper to turn wrapped scalars into actual scalars. + fn flatten_scalar_wrappers(obj: &FbsObject<'_>) -> ElementType { + if obj.name().starts_with("fbs.scalars.") { + match obj.name() { + "fbs.scalars.Float32" => ElementType::Float32, + _ => unimplemented!(), // NOLINT + } + } else { + ElementType::Object(obj.name().to_owned()) + } + } + match inner_type { FbsBaseType::Bool => Self::Bool, FbsBaseType::Byte => Self::Int8, @@ -708,6 +713,7 @@ impl ElementType { FbsBaseType::UInt => Self::UInt32, FbsBaseType::Long => Self::Int64, FbsBaseType::ULong => Self::UInt64, + // TODO(cmc): half support FbsBaseType::Float => Self::Float32, FbsBaseType::Double => Self::Float64, FbsBaseType::String => Self::String, @@ -715,14 +721,15 @@ impl ElementType { let obj = &objs[outer_type.index() as usize]; flatten_scalar_wrappers(obj) } - FbsBaseType::Union => unimplemented!("{inner_type:#?}"), // NOLINT - FbsBaseType::None - | FbsBaseType::UType - | FbsBaseType::Array - | FbsBaseType::Vector - | FbsBaseType::Vector64 => unreachable!("{inner_type:#?}"), - // NOTE: `FbsType` isn't actually an enum, it's just a bunch of constants... - _ => unreachable!("{inner_type:#?}"), + FbsBaseType::Union => unimplemented!(), // NOLINT + // NOTE: flatbuffers doesn't support directly nesting multiple layers of arrays, they + // always have to be wrapped into intermediate layers of structs or tables. + FbsBaseType::Array => unimplemented!(), // NOLINT + FbsBaseType::None => unimplemented!(), // NOLINT + FbsBaseType::UType => unimplemented!(), // NOLINT + FbsBaseType::Vector => unimplemented!(), // NOLINT + FbsBaseType::Vector64 => unimplemented!(), // NOLINT + _ => unreachable!(), } } } @@ -770,10 +777,9 @@ impl Attributes { value_str .parse() .with_context(|| { - let type_of_t = std::any::type_name::(); format!( "invalid `{name}` attribute for `{owner_fqname}`: \ - expected {type_of_t}, got `{value_str}` instead" + expected unsigned integer, got `{value_str}` instead" ) }) .unwrap() @@ -797,26 +803,12 @@ impl Attributes { value_str .parse() .with_context(|| { - let type_of_t = std::any::type_name::(); format!( "invalid `{name}` attribute for `{owner_fqname}`: \ - expected {type_of_t}, got `{value_str}` instead" + expected unsigned integer, got `{value_str}` instead" ) }) .unwrap(), ) } } - -/// Helper to turn wrapped scalars into actual scalars. -fn flatten_scalar_wrappers(obj: &FbsObject<'_>) -> ElementType { - let name = obj.name(); - if name.starts_with("fbs.scalars.") { - match name { - "fbs.scalars.Float32" => ElementType::Float32, - _ => unimplemented!("{name:#?}"), // NOLINT - } - } else { - ElementType::Object(name.to_owned()) - } -} From bf37e5057327cf09a5f936cf9a9218ee6f19402a Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Sun, 11 Jun 2023 17:45:02 +0200 Subject: [PATCH 05/18] generate reflection code --- crates/re_types_builder/source_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/re_types_builder/source_hash.txt b/crates/re_types_builder/source_hash.txt index 2d51980aefb3..f1766f06bb26 100644 --- a/crates/re_types_builder/source_hash.txt +++ b/crates/re_types_builder/source_hash.txt @@ -1,4 +1,4 @@ # This is a sha256 hash for all direct and indirect dependencies of this crate's build script. # It can be safely removed at anytime to force the build script to run again. # Check out build.rs to see how it's computed. -674450e64722effea9b29a84c34fab81b9d57497136a8f161561b8e19f7441b2 \ No newline at end of file +72d936d50287d6c16d0c1b91f86bd74120642c8fc08e885f08dd2b92bb52e8a4 \ No newline at end of file From a4e8daa2698747e7c42de03022ca2782fabc7e26 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Sun, 11 Jun 2023 17:56:36 +0200 Subject: [PATCH 06/18] unindent 0.1 everywhere --- Cargo.lock | 14 ++++---------- crates/re_types_builder/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index be455137ee2a..b5c6926c1ca1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3729,7 +3729,7 @@ dependencies = [ "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent 0.1.11", + "unindent", ] [[package]] @@ -3938,7 +3938,7 @@ dependencies = [ "glob", "sha2", "time", - "unindent 0.1.11", + "unindent", "walkdir", ] @@ -4227,7 +4227,7 @@ dependencies = [ "thiserror", "tobj", "type-map", - "unindent 0.2.1", + "unindent", "walkdir", "wasm-bindgen-futures", "web-sys", @@ -4507,7 +4507,7 @@ dependencies = [ "flatbuffers", "indent", "re_build_tools", - "unindent 0.2.1", + "unindent", "xshell", ] @@ -5801,12 +5801,6 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" -[[package]] -name = "unindent" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa30f5ea51ff7edfc797c6d3f9ec8cbd8cfedef5371766b7181d33977f4814f" - [[package]] name = "untrusted" version = "0.7.1" diff --git a/crates/re_types_builder/Cargo.toml b/crates/re_types_builder/Cargo.toml index 01b3cc8efc4a..ffdd334219bd 100644 --- a/crates/re_types_builder/Cargo.toml +++ b/crates/re_types_builder/Cargo.toml @@ -25,7 +25,7 @@ arrow2.workspace = true convert_case = "0.6" flatbuffers = "23.0" indent = "0.1" -unindent = "0.2" +unindent.workspace = true xshell = "0.2" From cf290a7eb258e5fdce12174f6321740e652d3a96 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Sun, 11 Jun 2023 20:55:20 +0200 Subject: [PATCH 07/18] self-review --- crates/re_types_builder/build.rs | 6 +- crates/re_types_builder/src/arrow_registry.rs | 5 +- crates/re_types_builder/src/codegen/mod.rs | 1 - crates/re_types_builder/src/codegen/python.rs | 12 +- crates/re_types_builder/src/codegen/rust.rs | 15 +- crates/re_types_builder/src/lib.rs | 3 +- crates/re_types_builder/src/objects.rs | 140 ++++++++++-------- 7 files changed, 95 insertions(+), 87 deletions(-) diff --git a/crates/re_types_builder/build.rs b/crates/re_types_builder/build.rs index 199ee8e2a8bb..5c949bf4f298 100644 --- a/crates/re_types_builder/build.rs +++ b/crates/re_types_builder/build.rs @@ -50,6 +50,8 @@ fn main() { } } + // NOTE: This requires `flatc` to be in $PATH, but only for contributors, not end users. + // Even for contributors, `flatc` won't be needed unless they edit some of the .fbs files. let sh = Shell::new().unwrap(); cmd!( sh, @@ -60,8 +62,8 @@ fn main() { // NOTE: We're purposefully ignoring the error here. // - // In the very unlikely chance that the user doesn't have `rustfmt` in their $PATH, there's - // still no good reason to fail the build. + // In the very unlikely chance that the user doesn't have the `fmt` component installed, + // there's still no good reason to fail the build. // // The CI will catch the unformatted file at PR time and complain appropriately anyhow. cmd!(sh, "cargo fmt").run().ok(); diff --git a/crates/re_types_builder/src/arrow_registry.rs b/crates/re_types_builder/src/arrow_registry.rs index dd07e7fba4d1..e0edc01cddf5 100644 --- a/crates/re_types_builder/src/arrow_registry.rs +++ b/crates/re_types_builder/src/arrow_registry.rs @@ -8,7 +8,6 @@ use crate::{ElementType, Object, Type}; // --- -// TODO(cmc): find a way to extract attr name constants directly from the IDL definitions pub const ARROW_ATTR_TRANSPARENT: &str = "arrow.attr.transparent"; pub const ARROW_ATTR_SPARSE_UNION: &str = "arrow.attr.sparse_union"; @@ -56,7 +55,6 @@ impl ArrowRegistry { fn arrow_datatype_from_object(&self, obj: &Object) -> LazyDatatype { let is_struct = obj.is_struct(); - let is_transparent = obj.try_get_attr::(ARROW_ATTR_TRANSPARENT).is_some(); let num_fields = obj.fields.len(); @@ -170,6 +168,9 @@ impl ArrowRegistry { // --- Field --- /// A yet-to-be-resolved [`arrow2::datatypes::Field`]. +/// +/// Type resolution is a two-pass process as we first need to register all existing types before we +/// can denormalize their definitions into their parents. #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct LazyField { /// Its name diff --git a/crates/re_types_builder/src/codegen/mod.rs b/crates/re_types_builder/src/codegen/mod.rs index 49bdc9d9278f..eb2e4d553265 100644 --- a/crates/re_types_builder/src/codegen/mod.rs +++ b/crates/re_types_builder/src/codegen/mod.rs @@ -15,7 +15,6 @@ pub trait CodeGenerator { pub const AUTOGEN_WARNING: &str = "NOTE: This file was autogenerated by re_types_builder; DO NOT EDIT."; -// TODO(cmc): find a way to extract attr name constants directly from the IDL definitions pub const RERUN_ATTR_COMPONENT_REQUIRED: &str = "rerun.attr.component_required"; pub const RERUN_ATTR_COMPONENT_RECOMMENDED: &str = "rerun.attr.component_recommended"; pub const RERUN_ATTR_COMPONENT_OPTIONAL: &str = "rerun.attr.component_optional"; diff --git a/crates/re_types_builder/src/codegen/python.rs b/crates/re_types_builder/src/codegen/python.rs index 9b6f85970405..2cba90d0a457 100644 --- a/crates/re_types_builder/src/codegen/python.rs +++ b/crates/re_types_builder/src/codegen/python.rs @@ -17,7 +17,6 @@ use crate::{ // NOTE: `rerun2` while we figure out how to integrate back into the main SDK. const MODULE_NAME: &str = "rerun2"; -// TODO(cmc): find a way to extract attr name constants directly from the IDL definitions pub const ATTR_TRANSPARENT: &str = "python.attr.transparent"; pub const ATTR_ALIASES: &str = "python.attr.aliases"; pub const ATTR_ARRAY_ALIASES: &str = "python.attr.array_aliases"; @@ -440,7 +439,7 @@ impl QuotedObject { // --- Code generators --- fn quote_module_prelude() -> String { - // NOTE: All the extraneous stull will be cleaned up courtesy of `ruff`. + // NOTE: All the extraneous stuff will be cleaned up courtesy of `ruff`. unindent::unindent( r#" from __future__ import annotations @@ -722,7 +721,6 @@ fn quote_type_from_element_type(typ: &ElementType) -> String { ElementType::Object(fqname) => { let (from, class) = fqname.rsplit_once('.').unwrap_or(("", fqname.as_str())); if from.starts_with("rerun.datatypes") { - // NOTE: Only need the class name, pre-generated import clause takes care of the rest. format!("datatypes.{class}") } else if from.starts_with("rerun.components") { format!("components.{class}") @@ -808,7 +806,7 @@ fn quote_arrow_support_from_obj(arrow_registry: &ArrowRegistry, obj: &Object) -> many_aliases={many_aliases}, arrow={arrow}, ) - "# + "# )) } ObjectKind::Archetype => String::new(), @@ -936,11 +934,7 @@ fn quote_arrow_datatype(datatype: &DataType) -> String { .join(", "); format!("pa.struct([{fields}])") } - DataType::Extension(_, datatype, _) => { - // TODO(cmc): not sure we need all that for the python backend since we already - // do the wrapping trick...? - quote_arrow_datatype(datatype) - } + DataType::Extension(_, datatype, _) => quote_arrow_datatype(datatype), _ => unimplemented!("{datatype:#?}"), // NOLINT } } diff --git a/crates/re_types_builder/src/codegen/rust.rs b/crates/re_types_builder/src/codegen/rust.rs index 588c5ebc7663..0a928185ae47 100644 --- a/crates/re_types_builder/src/codegen/rust.rs +++ b/crates/re_types_builder/src/codegen/rust.rs @@ -18,7 +18,6 @@ use crate::{ // --- -// TODO(cmc): find a way to extract attr name constants directly from the IDL definitions pub const ATTR_DERIVE: &str = "rust.attr.derive"; pub const ATTR_REPR: &str = "rust.attr.repr"; pub const ATTR_TUPLE_STRUCT: &str = "rust.attr.tuple_struct"; @@ -149,7 +148,7 @@ fn quote_objects( for module in mods.keys() { code.push_str(&format!("mod {module};\n")); - // NOTE: detect if someone manually created an extension file, and automatically + // Detect if someone manually created an extension file, and automatically // import it if so. let mut ext_path = out_path.join(format!("{module}_ext")); ext_path.set_extension("rs"); @@ -230,7 +229,7 @@ impl QuotedObject { typ: _, attrs: _, required, - // TODO(cmc): support for deprecation notices + // TODO(#2366): support for deprecation notices deprecated: _, } = field; @@ -356,7 +355,8 @@ fn quote_doc_from_docs(docs: &Docs) -> String { /// Returns type name as string and whether it was force unwrapped. fn quote_field_type_from_field(field: &ObjectField, unwrap: bool) -> (String, bool) { let mut unwrapped = false; - let typ = match &field.typ { + let typ = &field.typ; + let typ = match typ { Type::UInt8 => "u8".to_owned(), Type::UInt16 => "u16".to_owned(), Type::UInt32 => "u32".to_owned(), @@ -366,10 +366,9 @@ fn quote_field_type_from_field(field: &ObjectField, unwrap: bool) -> (String, bo Type::Int32 => "i32".to_owned(), Type::Int64 => "i64".to_owned(), Type::Bool => "bool".to_owned(), - Type::Float16 => unimplemented!("ResolvedType::Float16"), // NOLINT + Type::Float16 => unimplemented!("{typ:#?}"), // NOLINT Type::Float32 => "f32".to_owned(), Type::Float64 => "f64".to_owned(), - // TODO(cmc): ref for deserialization? Type::String => "String".to_owned(), Type::Array { elem_type, length } => { let typ = quote_type_from_element_type(elem_type); @@ -406,10 +405,9 @@ fn quote_type_from_element_type(typ: &ElementType) -> String { ElementType::Int32 => "i32".to_owned(), ElementType::Int64 => "i64".to_owned(), ElementType::Bool => "bool".to_owned(), - ElementType::Float16 => unimplemented!("ResolvedType::Float16"), // NOLINT + ElementType::Float16 => unimplemented!("{typ:#?}"), // NOLINT ElementType::Float32 => "f32".to_owned(), ElementType::Float64 => "f64".to_owned(), - // TODO(cmc): ref for deserialization? ElementType::String => "String".to_owned(), ElementType::Object(fqname) => fqname.replace('.', "::").replace("rerun", "crate"), } @@ -536,6 +534,7 @@ fn quote_trait_impls_from_obj(arrow_registry: &ArrowRegistry, obj: &Object) -> S #[allow(clippy::unimplemented)] fn to_arrow_datatypes() -> Vec {{ + // TODO(#2368): dump the arrow registry into the generated code unimplemented!("query the registry for all fqnames"); // NOLINT }} }} diff --git a/crates/re_types_builder/src/lib.rs b/crates/re_types_builder/src/lib.rs index b15e3704e1b0..23ee304525b7 100644 --- a/crates/re_types_builder/src/lib.rs +++ b/crates/re_types_builder/src/lib.rs @@ -61,6 +61,8 @@ //! //! Make sure to test the behavior of its output though: `re_types`! +// TODO(#2365): support for external IDL definitions + // --- // NOTE: Official generated code from flatbuffers; ignore _everything_. @@ -146,7 +148,6 @@ pub fn compile_binary_schemas( /// - `include_dir_path`: path to the root directory of the fbs definition tree. /// - `output_crate_path`: path to the root of the output crate. /// - `entrypoint_path`: path to the root file of the fbs definition tree. -/// - `source_hash`: optional sha256 hash of the source definition files. /// /// E.g.: /// ```no_run diff --git a/crates/re_types_builder/src/objects.rs b/crates/re_types_builder/src/objects.rs index 4f6168c9667d..343e40583ed1 100644 --- a/crates/re_types_builder/src/objects.rs +++ b/crates/re_types_builder/src/objects.rs @@ -124,6 +124,22 @@ pub enum ObjectKind { Archetype, } +impl ObjectKind { + // TODO(#2364): use an attr instead of the path + pub fn from_pkg_name(pkg_name: impl AsRef) -> Self { + let pkg_name = pkg_name.as_ref(); + if pkg_name.starts_with("rerun.datatypes") { + ObjectKind::Datatype + } else if pkg_name.starts_with("rerun.components") { + ObjectKind::Component + } else if pkg_name.starts_with("rerun.archetypes") { + ObjectKind::Archetype + } else { + panic!("unknown package {pkg_name:?}"); + } + } +} + /// A high-level representation of a flatbuffers object's documentation. #[derive(Debug, Clone)] pub struct Docs { @@ -256,18 +272,7 @@ impl Object { .unwrap(); let docs = Docs::from_raw_docs(obj.documentation()); - - let kind = if pkg_name.starts_with("rerun.datatypes") { - ObjectKind::Datatype - } else if pkg_name.starts_with("rerun.components") { - ObjectKind::Component - } else if pkg_name.starts_with("rerun.archetypes") { - ObjectKind::Archetype - } else { - // TODO(cmc): support IDL definitions from outside the repo - panic!("unknown package {pkg_name:?}"); - }; - + let kind = ObjectKind::from_pkg_name(&pkg_name); let attrs = Attributes::from_raw_attrs(obj.attributes()); let fields = { @@ -312,17 +317,7 @@ impl Object { .unwrap(); let docs = Docs::from_raw_docs(enm.documentation()); - - let kind = if pkg_name.starts_with("rerun.datatypes") { - ObjectKind::Datatype - } else if pkg_name.starts_with("rerun.components") { - ObjectKind::Component - } else if pkg_name.starts_with("rerun.archetypes") { - ObjectKind::Archetype - } else { - // TODO(cmc): support IDL definitions from outside the repo - panic!("unknown package {pkg_name:?}"); - }; + let kind = ObjectKind::from_pkg_name(&pkg_name); let utype = { if enm.underlying_type().base_type() == FbsBaseType::UType { @@ -453,7 +448,8 @@ pub struct ObjectField { /// Whether the field is deprecated. // - // TODO(cmc): implement custom attr to specify deprecation reason + // TODO(#2366): do something with this + // TODO(#2367): implement custom attr to specify deprecation reason pub deprecated: bool, } @@ -570,7 +566,7 @@ impl ObjectField { } } -/// The underlying type of a `ResolvedObjectField`. +/// The underlying type of an [`ObjectField`]. #[derive(Debug, Clone)] pub enum Type { UInt8, @@ -596,24 +592,35 @@ pub enum Type { Object(String), // fqname } +impl From for Type { + fn from(typ: ElementType) -> Self { + match typ { + ElementType::UInt8 => Self::UInt8, + ElementType::UInt16 => Self::UInt16, + ElementType::UInt32 => Self::UInt32, + ElementType::UInt64 => Self::UInt64, + ElementType::Int8 => Self::Int8, + ElementType::Int16 => Self::Int16, + ElementType::Int32 => Self::Int32, + ElementType::Int64 => Self::Int64, + ElementType::Bool => Self::Bool, + ElementType::Float16 => Self::Float16, + ElementType::Float32 => Self::Float32, + ElementType::Float64 => Self::Float64, + ElementType::String => Self::String, + ElementType::Object(fqname) => Self::Object(fqname), + } + } +} + impl Type { pub fn from_raw_type( enums: &[FbsEnum<'_>], objs: &[FbsObject<'_>], field_type: FbsType<'_>, ) -> Self { - fn flatten_scalar_wrappers(obj: &FbsObject<'_>) -> Type { - if obj.name().starts_with("fbs.scalars.") { - match obj.name() { - "fbs.scalars.Float32" => Type::Float32, - _ => unimplemented!(), // NOLINT - } - } else { - Type::Object(obj.name().to_owned()) - } - } - - match field_type.base_type() { + let typ = field_type.base_type(); + match typ { FbsBaseType::Bool => Self::Bool, FbsBaseType::Byte => Self::Int8, FbsBaseType::UByte => Self::UInt8, @@ -629,7 +636,7 @@ impl Type { FbsBaseType::String => Self::String, FbsBaseType::Obj => { let obj = &objs[field_type.index() as usize]; - flatten_scalar_wrappers(obj) + flatten_scalar_wrappers(obj).into() } FbsBaseType::Union => { let union = &enums[field_type.index() as usize]; @@ -654,10 +661,11 @@ impl Type { field_type.element(), ), }, - FbsBaseType::None => unimplemented!(), // NOLINT - FbsBaseType::UType => unimplemented!(), // NOLINT - FbsBaseType::Vector64 => unimplemented!(), // NOLINT - _ => unreachable!(), + FbsBaseType::None | FbsBaseType::UType | FbsBaseType::Vector64 => { + unimplemented!("{typ:#?}") // NOLINT + } // NOLINT + // NOTE: `FbsBaseType` isn't actually an enum, it's just a bunch of constants... + _ => unreachable!("{typ:#?}"), } } } @@ -691,18 +699,7 @@ impl ElementType { outer_type: FbsType<'_>, inner_type: FbsBaseType, ) -> Self { - /// Helper to turn wrapped scalars into actual scalars. - fn flatten_scalar_wrappers(obj: &FbsObject<'_>) -> ElementType { - if obj.name().starts_with("fbs.scalars.") { - match obj.name() { - "fbs.scalars.Float32" => ElementType::Float32, - _ => unimplemented!(), // NOLINT - } - } else { - ElementType::Object(obj.name().to_owned()) - } - } - + #[allow(clippy::match_same_arms)] match inner_type { FbsBaseType::Bool => Self::Bool, FbsBaseType::Byte => Self::Int8, @@ -713,7 +710,6 @@ impl ElementType { FbsBaseType::UInt => Self::UInt32, FbsBaseType::Long => Self::Int64, FbsBaseType::ULong => Self::UInt64, - // TODO(cmc): half support FbsBaseType::Float => Self::Float32, FbsBaseType::Double => Self::Float64, FbsBaseType::String => Self::String, @@ -721,15 +717,16 @@ impl ElementType { let obj = &objs[outer_type.index() as usize]; flatten_scalar_wrappers(obj) } - FbsBaseType::Union => unimplemented!(), // NOLINT + FbsBaseType::Union => unimplemented!("{inner_type:#?}"), // NOLINT // NOTE: flatbuffers doesn't support directly nesting multiple layers of arrays, they // always have to be wrapped into intermediate layers of structs or tables. - FbsBaseType::Array => unimplemented!(), // NOLINT - FbsBaseType::None => unimplemented!(), // NOLINT - FbsBaseType::UType => unimplemented!(), // NOLINT - FbsBaseType::Vector => unimplemented!(), // NOLINT - FbsBaseType::Vector64 => unimplemented!(), // NOLINT - _ => unreachable!(), + FbsBaseType::None + | FbsBaseType::UType + | FbsBaseType::Array + | FbsBaseType::Vector + | FbsBaseType::Vector64 => unreachable!("{inner_type:#?}"), + // NOTE: `FbsType` isn't actually an enum, it's just a bunch of constants... + _ => unreachable!("{inner_type:#?}"), } } } @@ -777,9 +774,10 @@ impl Attributes { value_str .parse() .with_context(|| { + let type_of_t = std::any::type_name::(); format!( "invalid `{name}` attribute for `{owner_fqname}`: \ - expected unsigned integer, got `{value_str}` instead" + expected {type_of_t}, got `{value_str}` instead" ) }) .unwrap() @@ -803,12 +801,26 @@ impl Attributes { value_str .parse() .with_context(|| { + let type_of_t = std::any::type_name::(); format!( "invalid `{name}` attribute for `{owner_fqname}`: \ - expected unsigned integer, got `{value_str}` instead" + expected {type_of_t}, got `{value_str}` instead" ) }) .unwrap(), ) } } + +/// Helper to turn wrapped scalars into actual scalars. +fn flatten_scalar_wrappers(obj: &FbsObject<'_>) -> ElementType { + let name = obj.name(); + if name.starts_with("fbs.scalars.") { + match name { + "fbs.scalars.Float32" => ElementType::Float32, + _ => unimplemented!("{name:#?}"), // NOLINT + } + } else { + ElementType::Object(name.to_owned()) + } +} From fb1d9e6d93cc13de9aefc16023b78b4a5f59f181 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Mon, 12 Jun 2023 11:13:24 +0200 Subject: [PATCH 08/18] adhering to py38+ style guide --- crates/re_types_builder/src/codegen/python.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/re_types_builder/src/codegen/python.rs b/crates/re_types_builder/src/codegen/python.rs index 2cba90d0a457..4d06dcc60e86 100644 --- a/crates/re_types_builder/src/codegen/python.rs +++ b/crates/re_types_builder/src/codegen/python.rs @@ -294,7 +294,7 @@ impl QuotedObject { let typ = if field.required { typ } else { - format!("Optional[{typ}] = None") + format!("{typ} | None = None") }; code.push_str(&indent::indent_all_by(4, format!("{name}: {typ}\n"))); @@ -397,7 +397,7 @@ impl QuotedObject { // NOTE: It's always optional since only one of the fields can be set at a time. code.push_str(&indent::indent_all_by( 4, - format!("{name}: Optional[{typ}] = None\n"), + format!("{name}: {typ} | None = None\n"), )); code.push_str(&indent::indent_all_by(4, quote_doc_from_docs(docs))); @@ -794,7 +794,7 @@ fn quote_arrow_support_from_obj(arrow_registry: &ArrowRegistry, obj: &Object) -> class {many}(pa.ExtensionArray, {many}Ext): # type: ignore[misc] @staticmethod - def from_similar(data: Optional[{many_aliases}]): + def from_similar(data: {many_aliases} | None): if data is None: return {arrow}().wrap_array(pa.array([], type={arrow}().storage_type)) else: @@ -849,9 +849,9 @@ fn quote_builder_from_obj(objects: &Objects, obj: &Object) -> String { let (typ, unwrapped) = quote_field_type_from_field(objects, field, true); if unwrapped { // This was originally a vec/array! - format!("{}: Optional[{typ}ArrayLike] = None", field.name) + format!("{}: {typ}ArrayLike | None = None", field.name) } else { - format!("{}: Optional[{typ}Like] = None", field.name) + format!("{}: {typ}Like | None = None", field.name) } }) .collect::>() From d8c094e1fc31813100d2c9affa8a860716c42e85 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 12 Jun 2023 17:25:01 +0200 Subject: [PATCH 09/18] vscode flatbuffer things. Fix comment typo --- crates/re_types_builder/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/re_types_builder/src/lib.rs b/crates/re_types_builder/src/lib.rs index 23ee304525b7..436126e7b885 100644 --- a/crates/re_types_builder/src/lib.rs +++ b/crates/re_types_builder/src/lib.rs @@ -18,7 +18,7 @@ //! ####. 2. Run the semantic pass. //! //! The semantic pass transforms the low-level raw reflection data generated by the first phase -//! into higher level objects that are much easier to inspect/manipulate and overall friendler +//! into higher level objects that are much easier to inspect/manipulate and overall friendlier //! to work with. //! //! Look for `objects.rs`. From 7ae14c3d5cb990c9a04b49ba962c4438211ba00e Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Tue, 13 Jun 2023 10:13:30 +0200 Subject: [PATCH 10/18] turn the inner from_similar into _from_similar to appease linters --- crates/re_types_builder/src/codegen/python.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/re_types_builder/src/codegen/python.rs b/crates/re_types_builder/src/codegen/python.rs index 4d06dcc60e86..548c5ebfebd0 100644 --- a/crates/re_types_builder/src/codegen/python.rs +++ b/crates/re_types_builder/src/codegen/python.rs @@ -798,7 +798,7 @@ fn quote_arrow_support_from_obj(arrow_registry: &ArrowRegistry, obj: &Object) -> if data is None: return {arrow}().wrap_array(pa.array([], type={arrow}().storage_type)) else: - return {many}Ext.from_similar( + return {many}Ext._from_similar( data, mono={mono}, mono_aliases={mono_aliases}, From 0bc204cba4d83d288056d30fb6b462d1d73130c4 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Tue, 13 Jun 2023 11:21:13 +0200 Subject: [PATCH 11/18] generate __all__ everywhere to make python tools behave --- crates/re_types_builder/src/codegen/python.rs | 66 ++++++++++++++----- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/crates/re_types_builder/src/codegen/python.rs b/crates/re_types_builder/src/codegen/python.rs index 548c5ebfebd0..9c4bb20f72d4 100644 --- a/crates/re_types_builder/src/codegen/python.rs +++ b/crates/re_types_builder/src/codegen/python.rs @@ -93,17 +93,24 @@ fn quote_lib(out_path: impl AsRef, archetype_names: &[String]) -> PathBuf .unwrap(); let path = out_path.join("__init__.py"); + let all_names = archetype_names + .iter() + .map(|name| format!("{name:?}")) + .collect::>() + .join(", "); let archetype_names = archetype_names.join(", "); let mut code = String::new(); - // NOTE: noqa F401 (unused import) because while unnecessary these listings are - // very useful to look at. code += &unindent::unindent(&format!( r#" # {AUTOGEN_WARNING} - from .archetypes import {archetype_names} # noqa: F401 + from __future__ import annotations + + __all__ = [{all_names}] + + from .archetypes import {archetype_names} "# )); @@ -154,12 +161,19 @@ fn quote_objects( for (filepath, objs) in files { let names = objs .iter() - .map(|obj| match obj.kind { + .flat_map(|obj| match obj.kind { ObjectKind::Datatype | ObjectKind::Component => { let name = &obj.name; - format!("{name}, {name}Like, {name}Array, {name}ArrayLike, {name}Type") + + vec![ + format!("{name}"), + format!("{name}Like"), + format!("{name}Array"), + format!("{name}ArrayLike"), + format!("{name}Type"), + ] } - ObjectKind::Archetype => obj.name.clone(), + ObjectKind::Archetype => vec![obj.name.clone()], }) .collect::>(); @@ -167,10 +181,10 @@ fn quote_objects( // and archetypes separately (and even then it's a bit shady, eh). match mods.entry(filepath.file_stem().unwrap().to_string_lossy().to_string()) { std::collections::hash_map::Entry::Occupied(mut entry) => { - entry.get_mut().extend(names); + entry.get_mut().extend(names.iter().cloned()); } std::collections::hash_map::Entry::Vacant(entry) => { - entry.insert(names); + entry.insert(names.clone()); } }; @@ -182,6 +196,20 @@ fn quote_objects( let mut code = String::new(); code.push_str(&format!("# {AUTOGEN_WARNING}\n\n")); + let names = names + .into_iter() + .map(|name| format!("{name:?}")) + .collect::>() + .join(", "); + code.push_str(&unindent::unindent(&format!( + " + from __future__ import annotations + + __all__ = [{names}] + + ", + ))); + for obj in objs { code.push_str(&obj.code); code.push('\n'); @@ -197,20 +225,26 @@ fn quote_objects( let mut code = String::new(); + let all_names = mods + .iter() + .flat_map(|(_, names)| names.iter().map(|name| format!("{name:?}"))) + .collect::>() + .join(", "); + code.push_str(&format!("# {AUTOGEN_WARNING}\n\n")); - code.push_str(&unindent::unindent( + code.push_str(&unindent::unindent(&format!( " - # NOTE: - # - we use fully qualified paths to prevent lazy circular imports - # - `noqa F401` (unused import) everywhere because, while not strictly necessary, - # these imports are very nice for end users. + from __future__ import annotations + __all__ = [{all_names}] + + # NOTE: we use fully qualified paths to prevent lazy circular imports. ", - )); + ))); for (module, names) in &mods { let names = names.join(", "); - code.push_str(&format!("from .{module} import {names} # noqa: F401\n")); + code.push_str(&format!("from .{module} import {names}\n")); } filepaths.push(path.clone()); @@ -442,8 +476,6 @@ fn quote_module_prelude() -> String { // NOTE: All the extraneous stuff will be cleaned up courtesy of `ruff`. unindent::unindent( r#" - from __future__ import annotations - import numpy as np import numpy.typing as npt import pyarrow as pa From 6db76c76be8da6180c1f2bc7516814359fb8cf64 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Tue, 13 Jun 2023 12:21:49 +0200 Subject: [PATCH 12/18] make sure __all__ manifests are lexically sorted --- crates/re_types_builder/src/codegen/python.rs | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/crates/re_types_builder/src/codegen/python.rs b/crates/re_types_builder/src/codegen/python.rs index 9c4bb20f72d4..78f5c1e9cf93 100644 --- a/crates/re_types_builder/src/codegen/python.rs +++ b/crates/re_types_builder/src/codegen/python.rs @@ -93,11 +93,7 @@ fn quote_lib(out_path: impl AsRef, archetype_names: &[String]) -> PathBuf .unwrap(); let path = out_path.join("__init__.py"); - let all_names = archetype_names - .iter() - .map(|name| format!("{name:?}")) - .collect::>() - .join(", "); + let manifest = quote_manifest(archetype_names); let archetype_names = archetype_names.join(", "); let mut code = String::new(); @@ -108,7 +104,7 @@ fn quote_lib(out_path: impl AsRef, archetype_names: &[String]) -> PathBuf from __future__ import annotations - __all__ = [{all_names}] + __all__ = [{manifest}] from .archetypes import {archetype_names} "# @@ -196,16 +192,12 @@ fn quote_objects( let mut code = String::new(); code.push_str(&format!("# {AUTOGEN_WARNING}\n\n")); - let names = names - .into_iter() - .map(|name| format!("{name:?}")) - .collect::>() - .join(", "); + let manifest = quote_manifest(names); code.push_str(&unindent::unindent(&format!( " from __future__ import annotations - __all__ = [{names}] + __all__ = [{manifest}] ", ))); @@ -225,18 +217,14 @@ fn quote_objects( let mut code = String::new(); - let all_names = mods - .iter() - .flat_map(|(_, names)| names.iter().map(|name| format!("{name:?}"))) - .collect::>() - .join(", "); + let manifest = quote_manifest(mods.iter().flat_map(|(_, names)| names.iter())); code.push_str(&format!("# {AUTOGEN_WARNING}\n\n")); code.push_str(&unindent::unindent(&format!( " from __future__ import annotations - __all__ = [{all_names}] + __all__ = [{manifest}] # NOTE: we use fully qualified paths to prevent lazy circular imports. ", @@ -472,6 +460,16 @@ impl QuotedObject { // --- Code generators --- +fn quote_manifest(names: impl IntoIterator>) -> String { + let mut quoted_names: Vec<_> = names + .into_iter() + .map(|name| format!("{:?}", name.as_ref())) + .collect(); + quoted_names.sort(); + + quoted_names.join(", ") +} + fn quote_module_prelude() -> String { // NOTE: All the extraneous stuff will be cleaned up courtesy of `ruff`. unindent::unindent( From 8d69d7ebc42cbccc1aaee82854256bb9a1c0674f Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 14 Jun 2023 10:49:25 +0200 Subject: [PATCH 13/18] adressing PR comments --- crates/re_types_builder/build.rs | 14 +- .../definitions/reflection.fbs | 3 + crates/re_types_builder/source_hash.txt | 2 +- crates/re_types_builder/src/arrow_registry.rs | 9 +- crates/re_types_builder/src/codegen/common.rs | 19 ++ crates/re_types_builder/src/codegen/mod.rs | 6 +- crates/re_types_builder/src/codegen/python.rs | 264 ++++++++---------- crates/re_types_builder/src/codegen/rust.rs | 142 +++++----- crates/re_types_builder/src/lib.rs | 99 ++++--- crates/re_types_builder/src/objects.rs | 6 +- 10 files changed, 274 insertions(+), 290 deletions(-) diff --git a/crates/re_types_builder/build.rs b/crates/re_types_builder/build.rs index 5c949bf4f298..bac5e2d4deb6 100644 --- a/crates/re_types_builder/build.rs +++ b/crates/re_types_builder/build.rs @@ -3,8 +3,7 @@ use xshell::{cmd, Shell}; use re_build_tools::{ - compute_file_hash, is_tracked_env_var_set, read_versioning_hash, rerun_if_changed, - rerun_if_changed_or_doesnt_exist, write_versioning_hash, + compute_file_hash, is_tracked_env_var_set, read_versioning_hash, write_versioning_hash, }; // --- @@ -33,9 +32,14 @@ fn main() { return; } - rerun_if_changed_or_doesnt_exist(SOURCE_HASH_PATH); - rerun_if_changed(FBS_REFLECTION_DEFINITION_PATH); - + // We're building an actual build graph here, and Cargo has no idea about it. + // + // Worse: some nodes in our build graph actually output artifacts into the src/ directory, + // which Cargo always interprets as "need to rebuild everything ASAP", leading to an infinite + // feedback loop. + // + // For these reasons, we manually compute and track signature hashes for the graph nodes we + // depend on, and make sure to exit early if everything's already up to date. let cur_hash = read_versioning_hash(SOURCE_HASH_PATH); let new_hash = compute_file_hash(FBS_REFLECTION_DEFINITION_PATH); diff --git a/crates/re_types_builder/definitions/reflection.fbs b/crates/re_types_builder/definitions/reflection.fbs index 513311f1b9c3..cf6cb9e3c4e4 100644 --- a/crates/re_types_builder/definitions/reflection.fbs +++ b/crates/re_types_builder/definitions/reflection.fbs @@ -1,3 +1,6 @@ +// Copied verbatim from the official flatbuffers source tree: +// https://github.com/google/flatbuffers/blob/63b7b25289447313ab6e79191fa1733748dca0da/reflection/reflection.fbs + // This schema defines objects that represent a parsed schema, like // the binary version of a .fbs file. // This could be used to operate on unknown FlatBuffers at runtime. diff --git a/crates/re_types_builder/source_hash.txt b/crates/re_types_builder/source_hash.txt index f1766f06bb26..2d51980aefb3 100644 --- a/crates/re_types_builder/source_hash.txt +++ b/crates/re_types_builder/source_hash.txt @@ -1,4 +1,4 @@ # This is a sha256 hash for all direct and indirect dependencies of this crate's build script. # It can be safely removed at anytime to force the build script to run again. # Check out build.rs to see how it's computed. -72d936d50287d6c16d0c1b91f86bd74120642c8fc08e885f08dd2b92bb52e8a4 \ No newline at end of file +674450e64722effea9b29a84c34fab81b9d57497136a8f161561b8e19f7441b2 \ No newline at end of file diff --git a/crates/re_types_builder/src/arrow_registry.rs b/crates/re_types_builder/src/arrow_registry.rs index e0edc01cddf5..505d0ccd253a 100644 --- a/crates/re_types_builder/src/arrow_registry.rs +++ b/crates/re_types_builder/src/arrow_registry.rs @@ -4,12 +4,7 @@ use anyhow::Context as _; use arrow2::datatypes::{DataType, Field, UnionMode}; use std::collections::{BTreeMap, HashMap}; -use crate::{ElementType, Object, Type}; - -// --- - -pub const ARROW_ATTR_TRANSPARENT: &str = "arrow.attr.transparent"; -pub const ARROW_ATTR_SPARSE_UNION: &str = "arrow.attr.sparse_union"; +use crate::{ElementType, Object, Type, ARROW_ATTR_SPARSE_UNION, ARROW_ATTR_TRANSPARENT}; // --- Registry --- @@ -59,7 +54,7 @@ impl ArrowRegistry { let num_fields = obj.fields.len(); assert!( - !(is_transparent && (!is_struct || num_fields != 1)), + !is_transparent || (is_struct && num_fields == 1), "cannot have a transparent arrow object with any number of fields but 1: {:?} has {num_fields}", obj.fqname, ); diff --git a/crates/re_types_builder/src/codegen/common.rs b/crates/re_types_builder/src/codegen/common.rs index 4317b76c481c..d0bd52a5716c 100644 --- a/crates/re_types_builder/src/codegen/common.rs +++ b/crates/re_types_builder/src/codegen/common.rs @@ -39,3 +39,22 @@ pub fn quote_doc_from_docs(docs: &Docs, tags: &[&str]) -> Vec { lines } + +pub trait StringExt { + fn push_text(&mut self, text: impl AsRef, linefeeds: usize, indent: usize) -> &mut Self; + fn push_unindented_text(&mut self, text: impl AsRef, linefeeds: usize) -> &mut Self; +} + +impl StringExt for String { + fn push_text(&mut self, text: impl AsRef, linefeeds: usize, indent: usize) -> &mut Self { + self.push_str(&indent::indent_all_by(indent, text.as_ref())); + self.push_str(&vec!["\n"; linefeeds].join("")); + self + } + + fn push_unindented_text(&mut self, text: impl AsRef, linefeeds: usize) -> &mut Self { + self.push_str(&unindent::unindent(text.as_ref())); + self.push_str(&vec!["\n"; linefeeds].join("")); + self + } +} diff --git a/crates/re_types_builder/src/codegen/mod.rs b/crates/re_types_builder/src/codegen/mod.rs index eb2e4d553265..240fabd14e9a 100644 --- a/crates/re_types_builder/src/codegen/mod.rs +++ b/crates/re_types_builder/src/codegen/mod.rs @@ -15,14 +15,10 @@ pub trait CodeGenerator { pub const AUTOGEN_WARNING: &str = "NOTE: This file was autogenerated by re_types_builder; DO NOT EDIT."; -pub const RERUN_ATTR_COMPONENT_REQUIRED: &str = "rerun.attr.component_required"; -pub const RERUN_ATTR_COMPONENT_RECOMMENDED: &str = "rerun.attr.component_recommended"; -pub const RERUN_ATTR_COMPONENT_OPTIONAL: &str = "rerun.attr.component_optional"; - // --- mod common; -use self::common::quote_doc_from_docs; +use self::common::{quote_doc_from_docs, StringExt}; mod python; mod rust; diff --git a/crates/re_types_builder/src/codegen/python.rs b/crates/re_types_builder/src/codegen/python.rs index 78f5c1e9cf93..a695cbee5970 100644 --- a/crates/re_types_builder/src/codegen/python.rs +++ b/crates/re_types_builder/src/codegen/python.rs @@ -8,8 +8,9 @@ use std::{ }; use crate::{ - codegen::AUTOGEN_WARNING, ArrowRegistry, CodeGenerator, Docs, ElementType, Object, ObjectField, - ObjectKind, Objects, Type, + codegen::{StringExt as _, AUTOGEN_WARNING}, + ArrowRegistry, CodeGenerator, Docs, ElementType, Object, ObjectField, ObjectKind, Objects, + Type, PYTHON_ATTR_ALIASES, PYTHON_ATTR_ARRAY_ALIASES, PYTHON_ATTR_TRANSPARENT, }; // --- @@ -17,10 +18,6 @@ use crate::{ // NOTE: `rerun2` while we figure out how to integrate back into the main SDK. const MODULE_NAME: &str = "rerun2"; -pub const ATTR_TRANSPARENT: &str = "python.attr.transparent"; -pub const ATTR_ALIASES: &str = "python.attr.aliases"; -pub const ATTR_ARRAY_ALIASES: &str = "python.attr.array_aliases"; - pub struct PythonCodeGenerator { pkg_path: PathBuf, } @@ -138,16 +135,9 @@ fn quote_objects( } else { QuotedObject::from_union(arrow_registry, all_objects, obj) }; - let filepath = out_path.join(obj.filepath.file_name().unwrap()); - match files.entry(filepath.clone()) { - std::collections::hash_map::Entry::Occupied(mut entry) => { - entry.get_mut().push(obj); - } - std::collections::hash_map::Entry::Vacant(entry) => { - entry.insert(vec![obj]); - } - }; + let filepath = out_path.join(obj.filepath.file_name().unwrap()); + files.entry(filepath.clone()).or_default().push(obj); } // (module_name, [object_name]) @@ -175,14 +165,9 @@ fn quote_objects( // NOTE: Isolating the file stem only works because we're handling datatypes, components // and archetypes separately (and even then it's a bit shady, eh). - match mods.entry(filepath.file_stem().unwrap().to_string_lossy().to_string()) { - std::collections::hash_map::Entry::Occupied(mut entry) => { - entry.get_mut().extend(names.iter().cloned()); - } - std::collections::hash_map::Entry::Vacant(entry) => { - entry.insert(names.clone()); - } - }; + mods.entry(filepath.file_stem().unwrap().to_string_lossy().to_string()) + .or_default() + .extend(names.iter().cloned()); filepaths.push(filepath.clone()); let mut file = std::fs::File::create(&filepath) @@ -190,21 +175,23 @@ fn quote_objects( .unwrap(); let mut code = String::new(); - code.push_str(&format!("# {AUTOGEN_WARNING}\n\n")); + code.push_text(&format!("# {AUTOGEN_WARNING}"), 2, 0); let manifest = quote_manifest(names); - code.push_str(&unindent::unindent(&format!( - " - from __future__ import annotations + code.push_unindented_text( + format!( + " + from __future__ import annotations - __all__ = [{manifest}] + __all__ = [{manifest}] - ", - ))); + ", + ), + 0, + ); for obj in objs { - code.push_str(&obj.code); - code.push('\n'); + code.push_text(&obj.code, 1, 0); } file.write_all(code.as_bytes()) .with_context(|| format!("{filepath:?}")) @@ -219,20 +206,23 @@ fn quote_objects( let manifest = quote_manifest(mods.iter().flat_map(|(_, names)| names.iter())); - code.push_str(&format!("# {AUTOGEN_WARNING}\n\n")); - code.push_str(&unindent::unindent(&format!( - " - from __future__ import annotations + code.push_text(&format!("# {AUTOGEN_WARNING}"), 2, 0); + code.push_unindented_text( + format!( + " + from __future__ import annotations - __all__ = [{manifest}] + __all__ = [{manifest}] - # NOTE: we use fully qualified paths to prevent lazy circular imports. - ", - ))); + # NOTE: we use fully qualified paths to prevent lazy circular imports. + ", + ), + 0, + ); for (module, names) in &mods { let names = names.join(", "); - code.push_str(&format!("from .{module} import {names}\n")); + code.push_text(&format!("from .{module} import {names}"), 1, 0); } filepaths.push(path.clone()); @@ -272,26 +262,28 @@ impl QuotedObject { let mut code = String::new(); - code.push_str("e_module_prelude()); + code.push_text("e_module_prelude(), 0, 0); for clause in obj .fields .iter() .filter_map(quote_import_clauses_from_field) { - code.push_str(&clause); - code.push('\n'); + code.push_text(&clause, 1, 0); } - code.push_str(&unindent::unindent(&format!( - r#" + code.push_unindented_text( + format!( + r#" - @dataclass - class {name}: - "# - ))); + @dataclass + class {name}: + "# + ), + 0, + ); - code.push_str(&indent::indent_all_by(4, quote_doc_from_docs(docs))); + code.push_text(quote_doc_from_docs(docs), 0, 4); for field in fields { let ObjectField { @@ -319,39 +311,22 @@ impl QuotedObject { format!("{typ} | None = None") }; - code.push_str(&indent::indent_all_by(4, format!("{name}: {typ}\n"))); + code.push_text(format!("{name}: {typ}"), 1, 4); - code.push_str(&indent::indent_all_by(4, quote_doc_from_docs(docs))); + code.push_text(quote_doc_from_docs(docs), 0, 4); } - code.push_str(&indent::indent_all_by(4, quote_str_repr_from_obj(obj))); - code.push('\n'); - - code.push_str(&indent::indent_all_by( - 4, - quote_array_method_from_obj(objects, obj), - )); - code.push('\n'); - - code.push_str(&indent::indent_all_by( - 4, - quote_str_method_from_obj(objects, obj), - )); - code.push('\n'); + code.push_text(quote_str_repr_from_obj(obj), 1, 4); + code.push_text(quote_array_method_from_obj(objects, obj), 1, 4); + code.push_text(quote_str_method_from_obj(objects, obj), 1, 4); if obj.kind == ObjectKind::Archetype { - code.push_str(&indent::indent_all_by( - 4, - quote_builder_from_obj(objects, obj), - )); - code.push('\n'); + code.push_text(quote_builder_from_obj(objects, obj), 1, 4); } else { - code.push_str("e_aliases_from_object(obj)); - code.push('\n'); + code.push_text(quote_aliases_from_object(obj), 1, 4); } - code.push_str("e_arrow_support_from_obj(arrow_registry, obj)); - code.push('\n'); + code.push_text(quote_arrow_support_from_obj(arrow_registry, obj), 1, 4); let mut filepath = PathBuf::from(filepath); filepath.set_extension("py"); @@ -381,26 +356,28 @@ impl QuotedObject { let mut code = String::new(); - code.push_str("e_module_prelude()); + code.push_text("e_module_prelude(), 0, 0); for clause in obj .fields .iter() .filter_map(quote_import_clauses_from_field) { - code.push_str(&clause); - code.push('\n'); + code.push_text(&clause, 1, 0); } - code.push_str(&unindent::unindent(&format!( - r#" + code.push_unindented_text( + format!( + r#" - @dataclass - class {name}: - "# - ))); + @dataclass + class {name}: + "# + ), + 0, + ); - code.push_str(&indent::indent_all_by(4, quote_doc_from_docs(docs))); + code.push_text(quote_doc_from_docs(docs), 0, 4); for field in fields { let ObjectField { @@ -417,34 +394,16 @@ impl QuotedObject { let (typ, _) = quote_field_type_from_field(objects, field, false); // NOTE: It's always optional since only one of the fields can be set at a time. - code.push_str(&indent::indent_all_by( - 4, - format!("{name}: {typ} | None = None\n"), - )); + code.push_text(format!("{name}: {typ} | None = None"), 1, 4); - code.push_str(&indent::indent_all_by(4, quote_doc_from_docs(docs))); + code.push_text(quote_doc_from_docs(docs), 0, 4); } - code.push_str(&indent::indent_all_by(4, quote_str_repr_from_obj(obj))); - code.push('\n'); - - code.push_str(&indent::indent_all_by( - 4, - quote_array_method_from_obj(objects, obj), - )); - code.push('\n'); - - code.push_str(&indent::indent_all_by( - 4, - quote_str_method_from_obj(objects, obj), - )); - code.push('\n'); - - code.push_str("e_aliases_from_object(obj)); - code.push('\n'); - - code.push_str("e_arrow_support_from_obj(arrow_registry, obj)); - code.push('\n'); + code.push_text(quote_str_repr_from_obj(obj), 1, 4); + code.push_text(quote_array_method_from_obj(objects, obj), 1, 4); + code.push_text(quote_str_method_from_obj(objects, obj), 1, 4); + code.push_text(quote_aliases_from_object(obj), 1, 4); + code.push_text(quote_arrow_support_from_obj(arrow_registry, obj), 1, 4); let mut filepath = PathBuf::from(filepath); filepath.set_extension("py"); @@ -585,38 +544,43 @@ fn quote_str_method_from_obj(objects: &Objects, obj: &Object) -> String { fn quote_aliases_from_object(obj: &Object) -> String { assert!(obj.kind != ObjectKind::Archetype); - let aliases = obj.try_get_attr::(ATTR_ALIASES); + let aliases = obj.try_get_attr::(PYTHON_ATTR_ALIASES); let array_aliases = obj - .try_get_attr::(ATTR_ARRAY_ALIASES) + .try_get_attr::(PYTHON_ATTR_ARRAY_ALIASES) .unwrap_or_default(); let name = &obj.name; let mut code = String::new(); - code.push_str(&if let Some(aliases) = aliases { - unindent::unindent(&format!( + code.push_unindented_text( + &if let Some(aliases) = aliases { + format!( + r#" + {name}Like = Union[ + {name}, + {aliases} + ] + "#, + ) + } else { + format!("{name}Like = {name}") + }, + 1, + ); + + code.push_unindented_text( + format!( r#" - {name}Like = Union[ - {name}, - {aliases} + {name}ArrayLike = Union[ + {name}Like, + Sequence[{name}Like], + {array_aliases} ] - "#, - )) - } else { - format!("{name}Like = {name}\n") - }); - - code.push_str(&unindent::unindent(&format!( - r#" - {name}ArrayLike = Union[ - {name}Like, - Sequence[{name}Like], - {array_aliases} - ] - "#, - ))); + ), + 0, + ); code } @@ -657,6 +621,10 @@ fn quote_import_clauses_from_field(field: &ObjectField) -> Option { } /// Returns type name as string and whether it was force unwrapped. +/// +/// Specifying `unwrap = true` will unwrap the final type before returning it, e.g. `Vec` +/// becomes just `String`. +/// The returned boolean indicates whether there was anything to unwrap at all. fn quote_field_type_from_field( objects: &Objects, field: &ObjectField, @@ -713,7 +681,9 @@ fn quote_field_type_from_field( // TODO(cmc): it is a bit weird to be doing the transparency logic (which is language // agnostic) in a python specific quoting function... a static helper at the very least // would be nice. - let is_transparent = field.try_get_attr::(ATTR_TRANSPARENT).is_some(); + let is_transparent = field + .try_get_attr::(PYTHON_ATTR_TRANSPARENT) + .is_some(); if is_transparent { let target = objects.get(fqname); assert!( @@ -887,30 +857,34 @@ fn quote_builder_from_obj(objects: &Objects, obj: &Object) -> String { .collect::>() .join(", "); - code.push_str(&format!( - "def __init__(self, {required_args}, *, {optional_args}) -> None:\n" - )); + code.push_text( + format!("def __init__(self, {required_args}, *, {optional_args}) -> None:"), + 1, + 0, + ); - code.push_str(&indent::indent_all_by(4, "# Required components\n")); + code.push_text("# Required components", 1, 4); for field in required { let name = &field.name; let (typ, _) = quote_field_type_from_field(objects, field, true); - code.push_str(&indent::indent_all_by( + code.push_text( + format!("self.{name} = {typ}Array.from_similar({name})"), + 1, 4, - format!("self.{name} = {typ}Array.from_similar({name})\n"), - )); + ); } code.push('\n'); - code.push_str(&indent::indent_all_by(4, "# Optional components\n")); + code.push_text("# Optional components\n", 1, 4); for field in optional { let name = &field.name; let (typ, _) = quote_field_type_from_field(objects, field, true); - code.push_str(&indent::indent_all_by( + code.push_text( + format!("self.{name} = {typ}Array.from_similar({name})"), + 1, 4, - format!("self.{name} = {typ}Array.from_similar({name})\n"), - )); + ); } code diff --git a/crates/re_types_builder/src/codegen/rust.rs b/crates/re_types_builder/src/codegen/rust.rs index 0a928185ae47..ae87dedf9398 100644 --- a/crates/re_types_builder/src/codegen/rust.rs +++ b/crates/re_types_builder/src/codegen/rust.rs @@ -8,20 +8,14 @@ use std::{ }; use crate::{ - codegen::{ - AUTOGEN_WARNING, RERUN_ATTR_COMPONENT_OPTIONAL, RERUN_ATTR_COMPONENT_RECOMMENDED, - RERUN_ATTR_COMPONENT_REQUIRED, - }, + codegen::{StringExt as _, AUTOGEN_WARNING}, ArrowRegistry, CodeGenerator, Docs, ElementType, Object, ObjectField, ObjectKind, Objects, - Type, + Type, RERUN_ATTR_COMPONENT_OPTIONAL, RERUN_ATTR_COMPONENT_RECOMMENDED, + RERUN_ATTR_COMPONENT_REQUIRED, RUST_ATTR_DERIVE, RUST_ATTR_REPR, RUST_ATTR_TUPLE_STRUCT, }; // --- -pub const ATTR_DERIVE: &str = "rust.attr.derive"; -pub const ATTR_REPR: &str = "rust.attr.repr"; -pub const ATTR_TUPLE_STRUCT: &str = "rust.attr.tuple_struct"; - pub struct RustCodeGenerator { crate_path: PathBuf, } @@ -92,15 +86,7 @@ fn quote_objects( }; let filepath = out_path.join(obj.filepath.file_name().unwrap()); - - match files.entry(filepath.clone()) { - std::collections::hash_map::Entry::Occupied(mut entry) => { - entry.get_mut().push(obj); - } - std::collections::hash_map::Entry::Vacant(entry) => { - entry.insert(vec![obj]); - } - }; + files.entry(filepath.clone()).or_default().push(obj); } // (module_name, [object_name]) @@ -111,14 +97,9 @@ fn quote_objects( // NOTE: Isolating the file stem only works because we're handling datatypes, components // and archetypes separately (and even then it's a bit shady, eh). let names = objs.iter().map(|obj| obj.name.clone()).collect::>(); - match mods.entry(filepath.file_stem().unwrap().to_string_lossy().to_string()) { - std::collections::hash_map::Entry::Occupied(mut entry) => { - entry.get_mut().extend(names); - } - std::collections::hash_map::Entry::Vacant(entry) => { - entry.insert(names); - } - }; + mods.entry(filepath.file_stem().unwrap().to_string_lossy().to_string()) + .or_default() + .extend(names); filepaths.push(filepath.clone()); let mut file = std::fs::File::create(&filepath) @@ -126,11 +107,10 @@ fn quote_objects( .unwrap(); let mut code = String::new(); - code.push_str(&format!("// {AUTOGEN_WARNING}\n\n")); + code.push_text(format!("// {AUTOGEN_WARNING}"), 2, 0); for obj in objs { - code.push_str(&obj.code); - code.push('\n'); + code.push_text(&obj.code, 1, 0); } file.write_all(code.as_bytes()) .with_context(|| format!("{filepath:?}")) @@ -143,17 +123,17 @@ fn quote_objects( let mut code = String::new(); - code.push_str(&format!("// {AUTOGEN_WARNING}\n\n")); + code.push_text(format!("// {AUTOGEN_WARNING}"), 2, 0); for module in mods.keys() { - code.push_str(&format!("mod {module};\n")); + code.push_text(format!("mod {module};"), 1, 0); // Detect if someone manually created an extension file, and automatically // import it if so. let mut ext_path = out_path.join(format!("{module}_ext")); ext_path.set_extension("rs"); if ext_path.exists() { - code.push_str(&format!("mod {module}_ext;\n")); + code.push_text(format!("mod {module}_ext;"), 1, 0); } } @@ -161,7 +141,7 @@ fn quote_objects( for (module, names) in &mods { let names = names.join(", "); - code.push_str(&format!("pub use self::{module}::{{{names}}};\n")); + code.push_text(format!("pub use self::{module}::{{{names}}};"), 1, 0); } filepaths.push(path.clone()); @@ -200,23 +180,21 @@ impl QuotedObject { let mut code = String::new(); - code.push_str("e_doc_from_docs(docs)); + code.push_text("e_doc_from_docs(docs), 0, 0); if let Some(clause) = quote_derive_clause_from_obj(obj) { - code.push_str(&clause); - code.push('\n'); + code.push_text(&clause, 1, 0); } if let Some(clause) = quote_repr_clause_from_obj(obj) { - code.push_str(&clause); - code.push('\n'); + code.push_text(&clause, 1, 0); } let is_tuple_struct = is_tuple_struct_from_obj(obj); if is_tuple_struct { - code.push_str(&format!("pub struct {name}(")); + code.push_text(&format!("pub struct {name}("), 0, 0); } else { - code.push_str(&format!("pub struct {name} {{\n")); + code.push_text(&format!("pub struct {name} {{"), 1, 0); } for field in fields { @@ -233,7 +211,7 @@ impl QuotedObject { deprecated: _, } = field; - code.push_str("e_doc_from_docs(docs)); + code.push_text("e_doc_from_docs(docs), 0, 0); let (typ, _) = quote_field_type_from_field(field, false); let typ = if *required { @@ -243,9 +221,9 @@ impl QuotedObject { }; if is_tuple_struct { - code.push_str(&format!("pub {typ}")); + code.push_text(&format!("pub {typ}"), 0, 0); } else { - code.push_str(&format!("pub {name}: {typ},\n\n")); + code.push_text(&format!("pub {name}: {typ},"), 2, 0); } } @@ -255,11 +233,10 @@ impl QuotedObject { code += "}\n\n"; } - code.push_str("e_trait_impls_from_obj(arrow_registry, obj)); - code.push('\n'); + code.push_text("e_trait_impls_from_obj(arrow_registry, obj), 1, 0); if kind == &ObjectKind::Archetype { - code.push_str("e_builder_from_obj(obj)); + code.push_text("e_builder_from_obj(obj), 0, 0); } let mut filepath = PathBuf::from(filepath); @@ -289,18 +266,16 @@ impl QuotedObject { let mut code = String::new(); - code.push_str("e_doc_from_docs(docs)); + code.push_text("e_doc_from_docs(docs), 0, 0); if let Some(clause) = quote_derive_clause_from_obj(obj) { - code.push_str(&clause); - code.push('\n'); + code.push_text(&clause, 1, 0); } if let Some(clause) = quote_repr_clause_from_obj(obj) { - code.push_str(&clause); - code.push('\n'); + code.push_text(&clause, 1, 0); } - code.push_str(&format!("pub enum {name} {{\n")); + code.push_text(&format!("pub enum {name} {{"), 1, 0); for field in fields { let ObjectField { @@ -315,17 +290,16 @@ impl QuotedObject { deprecated: _, } = field; - code.push_str("e_doc_from_docs(docs)); + code.push_text("e_doc_from_docs(docs), 0, 0); let (typ, _) = quote_field_type_from_field(field, false); - code.push_str(&format!("{name}({typ}),\n\n")); + code.push_text(&format!("{name}({typ}),"), 2, 0); } code += "}\n\n"; - code.push_str("e_trait_impls_from_obj(arrow_registry, obj)); - code.push('\n'); + code.push_text("e_trait_impls_from_obj(arrow_registry, obj), 1, 0); let mut filepath = PathBuf::from(filepath); filepath.set_extension("rs"); @@ -353,6 +327,10 @@ fn quote_doc_from_docs(docs: &Docs) -> String { } /// Returns type name as string and whether it was force unwrapped. +/// +/// Specifying `unwrap = true` will unwrap the final type before returning it, e.g. `Vec` +/// becomes just `String`. +/// The returned boolean indicates whether there was anything to unwrap at all. fn quote_field_type_from_field(field: &ObjectField, unwrap: bool) -> (String, bool) { let mut unwrapped = false; let typ = &field.typ; @@ -414,19 +392,19 @@ fn quote_type_from_element_type(typ: &ElementType) -> String { } fn quote_derive_clause_from_obj(obj: &Object) -> Option { - obj.try_get_attr::(ATTR_DERIVE) + obj.try_get_attr::(RUST_ATTR_DERIVE) .map(|what| format!("#[derive({what})]")) } fn quote_repr_clause_from_obj(obj: &Object) -> Option { - obj.try_get_attr::(ATTR_REPR) + obj.try_get_attr::(RUST_ATTR_REPR) .map(|what| format!("#[repr({what})]")) } fn is_tuple_struct_from_obj(obj: &Object) -> bool { obj.is_struct() && obj.fields.len() == 1 - && obj.try_get_attr::(ATTR_TUPLE_STRUCT).is_some() + && obj.try_get_attr::(RUST_ATTR_TUPLE_STRUCT).is_some() } fn quote_trait_impls_from_obj(arrow_registry: &ArrowRegistry, obj: &Object) -> String { @@ -571,7 +549,7 @@ fn quote_builder_from_obj(obj: &Object) -> String { let mut code = String::new(); - code.push_str(&format!("impl {name} {{\n")); + code.push_text(&format!("impl {name} {{"), 1, 0); { // --- impl new() --- @@ -591,7 +569,7 @@ fn quote_builder_from_obj(obj: &Object) -> String { }) .collect::>() .join(", "); - code.push_str(&format!("pub fn new({new_params}) -> Self {{\n")); + code.push_text(&format!("pub fn new({new_params}) -> Self {{"), 1, 0); { code += "Self {\n"; { @@ -599,16 +577,20 @@ fn quote_builder_from_obj(obj: &Object) -> String { let (_, unwrapped) = quote_field_type_from_field(field, true); if unwrapped { // This was originally a vec/array! - code.push_str(&format!( - "{}: {}.into_iter().map(Into::into).collect(),\n", - field.name, field.name - )); + code.push_text( + &format!( + "{}: {}.into_iter().map(Into::into).collect(),", + field.name, field.name + ), + 1, + 0, + ); } else { - code.push_str(&format!("{}: {}.into(),\n", field.name, field.name)); + code.push_text(&format!("{}: {}.into(),", field.name, field.name), 1, 0); } } for field in &optional { - code.push_str(&format!("{}: None,\n", field.name)); + code.push_text(&format!("{}: None,", field.name), 1, 0); } } code += "}\n"; @@ -623,21 +605,27 @@ fn quote_builder_from_obj(obj: &Object) -> String { if unwrapped { // This was originally a vec/array! - code.push_str(&format!( - "pub fn with_{name}(mut self, {name}: impl IntoIterator>) -> Self {{\n", - )); + code.push_text(&format!( + "pub fn with_{name}(mut self, {name}: impl IntoIterator>) -> Self {{", + ), 1, 0); { - code.push_str(&format!( - "self.{name} = Some({name}.into_iter().map(Into::into).collect());\n" - )); + code.push_text( + &format!( + "self.{name} = Some({name}.into_iter().map(Into::into).collect());" + ), + 1, + 0, + ); code += "self\n"; } } else { - code.push_str(&format!( - "pub fn with_{name}(mut self, {name}: impl Into<{typ}>) -> Self {{\n", - )); + code.push_text( + &format!("pub fn with_{name}(mut self, {name}: impl Into<{typ}>) -> Self {{",), + 1, + 0, + ); { - code.push_str(&format!("self.{name} = Some({name}.into());\n")); + code.push_text(&format!("self.{name} = Some({name}.into());"), 1, 0); code += "self\n"; } } diff --git a/crates/re_types_builder/src/lib.rs b/crates/re_types_builder/src/lib.rs index 436126e7b885..71669189d7f6 100644 --- a/crates/re_types_builder/src/lib.rs +++ b/crates/re_types_builder/src/lib.rs @@ -99,12 +99,31 @@ pub use self::objects::{ Attributes, Docs, ElementType, Object, ObjectField, ObjectKind, Objects, Type, }; +// --- Attributes --- + +pub const ARROW_ATTR_TRANSPARENT: &str = "arrow.attr.transparent"; +pub const ARROW_ATTR_SPARSE_UNION: &str = "arrow.attr.sparse_union"; + +pub const RERUN_ATTR_COMPONENT_REQUIRED: &str = "rerun.attr.component_required"; +pub const RERUN_ATTR_COMPONENT_RECOMMENDED: &str = "rerun.attr.component_recommended"; +pub const RERUN_ATTR_COMPONENT_OPTIONAL: &str = "rerun.attr.component_optional"; + +pub const PYTHON_ATTR_TRANSPARENT: &str = "python.attr.transparent"; +pub const PYTHON_ATTR_ALIASES: &str = "python.attr.aliases"; +pub const PYTHON_ATTR_ARRAY_ALIASES: &str = "python.attr.array_aliases"; + +pub const RUST_ATTR_DERIVE: &str = "rust.attr.derive"; +pub const RUST_ATTR_REPR: &str = "rust.attr.repr"; +pub const RUST_ATTR_TUPLE_STRUCT: &str = "rust.attr.tuple_struct"; + // --- Entrypoints --- use std::path::{Path, PathBuf}; /// Compiles binary reflection dumps from flatbuffers definitions. /// +/// Requires `flatc` available in $PATH. +/// /// Panics on error. /// /// - `include_dir_path`: path to the root directory of the fbs definition tree. @@ -141,27 +160,14 @@ pub fn compile_binary_schemas( .unwrap(); } -/// Generates Rust code from a set of flatbuffers definitions. -/// -/// Panics on error. -/// -/// - `include_dir_path`: path to the root directory of the fbs definition tree. -/// - `output_crate_path`: path to the root of the output crate. -/// - `entrypoint_path`: path to the root file of the fbs definition tree. -/// -/// E.g.: -/// ```no_run -/// re_types_builder::generate_rust_code( -/// "./definitions", -/// ".", -/// "./definitions/rerun/archetypes.fbs", -/// ); -/// ``` -pub fn generate_rust_code( +/// Handles the first 3 language-agnostic passes of the codegen pipeline: +/// 1. Generate binary reflection dumps for our definitions. +/// 2. Run the semantic pass +/// 3. Compute the Arrow registry +fn generate_lang_agnostic( include_dir_path: impl AsRef, - output_crate_path: impl AsRef, entrypoint_path: impl AsRef, -) { +) -> (Objects, ArrowRegistry) { use xshell::Shell; let sh = Shell::new().unwrap(); @@ -189,6 +195,33 @@ pub fn generate_rust_code( arrow_registry.register(obj); } + (objects, arrow_registry) +} + +/// Generates Rust code from a set of flatbuffers definitions. +/// +/// Panics on error. +/// +/// - `include_dir_path`: path to the root directory of the fbs definition tree. +/// - `output_crate_path`: path to the root of the output crate. +/// - `entrypoint_path`: path to the root file of the fbs definition tree. +/// +/// E.g.: +/// ```no_run +/// re_types_builder::generate_rust_code( +/// "./definitions", +/// ".", +/// "./definitions/rerun/archetypes.fbs", +/// ); +/// ``` +pub fn generate_rust_code( + include_dir_path: impl AsRef, + output_crate_path: impl AsRef, + entrypoint_path: impl AsRef, +) { + // passes 1 through 3: bfbs, semantic, arrow registry + let (objects, arrow_registry) = generate_lang_agnostic(include_dir_path, entrypoint_path); + // generate rust code let mut gen = RustCodeGenerator::new(output_crate_path.as_ref()); let _filepaths = gen.quote(&objects, &arrow_registry); @@ -215,32 +248,8 @@ pub fn generate_python_code( output_pkg_path: impl AsRef, entrypoint_path: impl AsRef, ) { - use xshell::Shell; - - let sh = Shell::new().unwrap(); - let tmp = sh.create_temp_dir().unwrap(); - - let entrypoint_path = entrypoint_path.as_ref(); - let entrypoint_filename = entrypoint_path.file_name().unwrap(); - - // generate bfbs definitions - compile_binary_schemas(include_dir_path, tmp.path(), entrypoint_path); - - let mut binary_entrypoint_path = PathBuf::from(entrypoint_filename); - binary_entrypoint_path.set_extension("bfbs"); - - // semantic pass: high level objects from low-level reflection data - let objects = Objects::from_buf( - sh.read_binary_file(tmp.path().join(binary_entrypoint_path)) - .unwrap() - .as_slice(), - ); - - // create and fill out arrow registry - let mut arrow_registry = ArrowRegistry::default(); - for obj in objects.ordered_objects(None) { - arrow_registry.register(obj); - } + // passes 1 through 3: bfbs, semantic, arrow registry + let (objects, arrow_registry) = generate_lang_agnostic(include_dir_path, entrypoint_path); // generate python code let mut gen = PythonCodeGenerator::new(output_pkg_path.as_ref()); diff --git a/crates/re_types_builder/src/objects.rs b/crates/re_types_builder/src/objects.rs index 343e40583ed1..677779a7c7b3 100644 --- a/crates/re_types_builder/src/objects.rs +++ b/crates/re_types_builder/src/objects.rs @@ -443,7 +443,7 @@ pub struct ObjectField { /// Whether the field is required. /// - /// Always true for `struct` types. + /// Always true for IDL definitions using flatbuffers' `struct` type (as opposed to `table`). pub required: bool, /// Whether the field is deprecated. @@ -642,8 +642,6 @@ impl Type { let union = &enums[field_type.index() as usize]; Self::Object(union.name().to_owned()) } - // NOTE: flatbuffers doesn't support directly nesting multiple layers of arrays, they - // always have to be wrapped into intermediate layers of structs or tables. FbsBaseType::Array => Self::Array { elem_type: ElementType::from_raw_base_type( enums, @@ -718,8 +716,6 @@ impl ElementType { flatten_scalar_wrappers(obj) } FbsBaseType::Union => unimplemented!("{inner_type:#?}"), // NOLINT - // NOTE: flatbuffers doesn't support directly nesting multiple layers of arrays, they - // always have to be wrapped into intermediate layers of structs or tables. FbsBaseType::None | FbsBaseType::UType | FbsBaseType::Array From dabd85bf9b6e258b1e70be443fd310d1313719ae Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Sun, 11 Jun 2023 21:56:46 +0200 Subject: [PATCH 14/18] introduce re_types --- Cargo.lock | 16 +++ Cargo.toml | 1 + crates/re_types/Cargo.toml | 61 ++++++++++ crates/re_types/README.md | 12 ++ crates/re_types/build.rs | 104 ++++++++++++++++++ .../re_types/definitions/arrow/attributes.fbs | 12 ++ .../re_types/definitions/fbs/attributes.fbs | 16 +++ crates/re_types/definitions/fbs/scalars.fbs | 10 ++ .../definitions/python/attributes.fbs | 16 +++ .../re_types/definitions/rerun/archetypes.fbs | 1 + .../re_types/definitions/rust/attributes.fbs | 15 +++ crates/re_types/source_hash.txt | 4 + crates/re_types/src/archetypes/mod.rs | 1 + crates/re_types/src/components/mod.rs | 1 + crates/re_types/src/datatypes/mod.rs | 1 + crates/re_types/src/lib.rs | 104 ++++++++++++++++++ 16 files changed, 375 insertions(+) create mode 100644 crates/re_types/Cargo.toml create mode 100644 crates/re_types/README.md create mode 100644 crates/re_types/build.rs create mode 100644 crates/re_types/definitions/arrow/attributes.fbs create mode 100644 crates/re_types/definitions/fbs/attributes.fbs create mode 100644 crates/re_types/definitions/fbs/scalars.fbs create mode 100644 crates/re_types/definitions/python/attributes.fbs create mode 100644 crates/re_types/definitions/rerun/archetypes.fbs create mode 100644 crates/re_types/definitions/rust/attributes.fbs create mode 100644 crates/re_types/source_hash.txt create mode 100644 crates/re_types/src/archetypes/mod.rs create mode 100644 crates/re_types/src/components/mod.rs create mode 100644 crates/re_types/src/datatypes/mod.rs create mode 100644 crates/re_types/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index b5c6926c1ca1..306ba7d675c4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4497,6 +4497,22 @@ dependencies = [ "web-time", ] +[[package]] +name = "re_types" +version = "0.7.0-alpha.0" +dependencies = [ + "arrow2", + "bytemuck", + "document-features", + "ecolor", + "glam", + "itertools", + "macaw", + "re_build_tools", + "re_types_builder", + "xshell", +] + [[package]] name = "re_types_builder" version = "0.7.0-alpha.0" diff --git a/Cargo.toml b/Cargo.toml index 13b1df640125..f0a47151f6b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,6 +56,7 @@ re_tensor_ops = { path = "crates/re_tensor_ops", version = "0.7.0-alpha.0", defa re_time_panel = { path = "crates/re_time_panel", version = "=0.7.0-alpha.0", default-features = false } re_tracing = { path = "crates/re_tracing", version = "0.7.0-alpha.0", default-features = false } re_tuid = { path = "crates/re_tuid", version = "0.7.0-alpha.0", default-features = false } +re_types = { path = "crates/re_types", version = "=0.7.0-alpha.0", default-features = false } re_types_builder = { path = "crates/re_types_builder", version = "=0.7.0-alpha.0", default-features = false } re_ui = { path = "crates/re_ui", version = "0.7.0-alpha.0", default-features = false } re_viewer = { path = "crates/re_viewer", version = "0.7.0-alpha.0", default-features = false } diff --git a/crates/re_types/Cargo.toml b/crates/re_types/Cargo.toml new file mode 100644 index 000000000000..e3b1b9dc1b2a --- /dev/null +++ b/crates/re_types/Cargo.toml @@ -0,0 +1,61 @@ +[package] +name = "re_types" +authors.workspace = true +description = "The standard Rerun data types, component types, and archetypes." +edition.workspace = true +homepage.workspace = true +include.workspace = true +license.workspace = true +publish = true +readme = "README.md" +repository.workspace = true +rust-version.workspace = true +version.workspace = true + + +[package.metadata.docs.rs] +all-features = true + + +[features] +default = [] + +## Enable color conversions. +ecolor = ["dep:ecolor"] + +## Add support for some math operations using [`glam`](https://crates.io/crates/glam/). +glam = ["dep:glam", "dep:macaw"] + + +[dependencies] + +# External +arrow2 = { workspace = true, features = [ + "io_ipc", + "io_print", + "compute_concatenate", +] } +bytemuck = { version = "1.11", features = ["derive", "extern_crate_alloc"] } +document-features = "0.2" + +# External (optional) +ecolor = { workspace = true, optional = true } +glam = { workspace = true, optional = true } +macaw = { workspace = true, optional = true } + + +[dev-dependencies] + +# External +glam.workspace = true +itertools.workspace = true + + +[build-dependencies] + +# Rerun +re_build_tools.workspace = true +re_types_builder.workspace = true + +# External +xshell = "0.2" diff --git a/crates/re_types/README.md b/crates/re_types/README.md new file mode 100644 index 000000000000..62c1ebddf06a --- /dev/null +++ b/crates/re_types/README.md @@ -0,0 +1,12 @@ +# re_types + +Part of the [`rerun`](https://github.com/rerun-io/rerun) family of crates. + +[![Latest version](https://img.shields.io/crates/v/re_types.svg)](https://crates.io/crates/re_types) +[![Documentation](https://docs.rs/re_types/badge.svg)](https://docs.rs/re_types) +![MIT](https://img.shields.io/badge/license-MIT-blue.svg) +![Apache](https://img.shields.io/badge/license-Apache-blue.svg) + +The standard Rerun data types, component types, and archetypes. + +This crate includes both the language-agnostic definitions (flatbuffers IDL) as well as the generated code. diff --git a/crates/re_types/build.rs b/crates/re_types/build.rs new file mode 100644 index 000000000000..34b976a07ea2 --- /dev/null +++ b/crates/re_types/build.rs @@ -0,0 +1,104 @@ +//! Generates Rust & Python code from flatbuffers definitions. + +use xshell::{cmd, Shell}; + +use re_build_tools::{ + compute_crate_hash, compute_dir_hash, compute_strings_hash, is_tracked_env_var_set, iter_dir, + read_versioning_hash, rerun_if_changed, rerun_if_changed_or_doesnt_exist, + write_versioning_hash, +}; + +// NOTE: Don't need to add extra context to xshell invocations, it does so on its own. + +// --- + +const SOURCE_HASH_PATH: &str = "./source_hash.txt"; +const DEFINITIONS_DIR_PATH: &str = "./definitions"; +const RUST_OUTPUT_DIR_PATH: &str = "."; +const PYTHON_OUTPUT_DIR_PATH: &str = "../../rerun_py/rerun2"; + +fn main() { + if std::env::var("CI").is_ok() { + // Don't run on CI! + // + // The code we're generating here is actual source code that gets committed into the + // repository. + return; + } + + if !is_tracked_env_var_set("IS_IN_RERUN_WORKSPACE") { + // Only run if we are in the rerun workspace, not on users machines. + return; + } + if is_tracked_env_var_set("RERUN_IS_PUBLISHING") { + // We don't need to rebuild - we should have done so beforehand! + // See `RELEASES.md` + return; + } + + rerun_if_changed_or_doesnt_exist(SOURCE_HASH_PATH); + for path in iter_dir(DEFINITIONS_DIR_PATH, Some(&[".fbs"])) { + rerun_if_changed(&path); + } + + // NOTE: We need to hash both the flatbuffers definitions as well as the source code of the + // code generator itself! + let re_types_builder_hash = compute_crate_hash("re_types_builder"); + let cur_hash = read_versioning_hash(SOURCE_HASH_PATH); + let new_hash = compute_dir_hash(DEFINITIONS_DIR_PATH, Some(&[".fbs"])); + let new_hash_final = compute_strings_hash(&[&re_types_builder_hash, &new_hash]); + + // Leave these be please, very useful when debugging. + eprintln!("re_types_builder_hash: {re_types_builder_hash:?}"); + eprintln!("cur_hash: {cur_hash:?}"); + eprintln!("new_hash: {new_hash:?}"); + eprintln!("new_hash_final: {new_hash_final:?}"); + + if let Some(cur_hash) = cur_hash { + if cur_hash == new_hash_final { + // Neither the source of the code generator nor the IDL definitions have changed, no need + // to do anything at this point. + return; + } + } + + let sh = Shell::new().unwrap(); + + re_types_builder::generate_rust_code( + DEFINITIONS_DIR_PATH, + RUST_OUTPUT_DIR_PATH, + "./definitions/rerun/archetypes.fbs", + ); + + // NOTE: We're purposefully ignoring the error here. + // + // In the very unlikely chance that the user doesn't have `cargo` in their $PATH, there's + // still no good reason to fail the build. + // + // The CI will catch the unformatted files at PR time and complain appropriately anyhow. + cmd!(sh, "cargo fmt").run().ok(); + + re_types_builder::generate_python_code( + DEFINITIONS_DIR_PATH, + PYTHON_OUTPUT_DIR_PATH, + "./definitions/rerun/archetypes.fbs", + ); + + // NOTE: We're purposefully ignoring the error here. + // + // If the user doesn't have `black` in their $PATH, there's still no good reason to fail + // the build. + // + // The CI will catch the unformatted files at PR time and complain appropriately anyhow. + cmd!(sh, "black {PYTHON_OUTPUT_DIR_PATH}").run().ok(); + + // NOTE: We're purposefully ignoring the error here. + // + // If the user doesn't have `ruff` in their $PATH, there's still no good reason to fail + // the build. + // + // The CI will catch the unformatted files at PR time and complain appropriately anyhow. + cmd!(sh, "ruff --fix {PYTHON_OUTPUT_DIR_PATH}").run().ok(); + + write_versioning_hash(SOURCE_HASH_PATH, new_hash_final); +} diff --git a/crates/re_types/definitions/arrow/attributes.fbs b/crates/re_types/definitions/arrow/attributes.fbs new file mode 100644 index 000000000000..54374cc07643 --- /dev/null +++ b/crates/re_types/definitions/arrow/attributes.fbs @@ -0,0 +1,12 @@ +namespace arrow; + +/// Marks a union as sparse from Arrow's standpoint, affecting its Arrow datatype. +/// +/// Only applies to unions. +attribute "arrow.attr.sparse_union"; + +/// Marks a single-field object as transparent from Arrow's standpoint, affecting its Arrow +/// datatype. +/// +/// This is generally most useful for getting rid of extraneous `struct` layers. +attribute "arrow.attr.transparent"; diff --git a/crates/re_types/definitions/fbs/attributes.fbs b/crates/re_types/definitions/fbs/attributes.fbs new file mode 100644 index 000000000000..9487163b54fb --- /dev/null +++ b/crates/re_types/definitions/fbs/attributes.fbs @@ -0,0 +1,16 @@ +namespace fbs.attributes; + +/// Mandatory attribute that applies to all kinds of objects: structs, enums, unions and even the +/// fields within. +/// +/// This defines a stable order between objects of the same kind, e.g. the order in which fields of a +/// struct should be laid out when generating code. +/// This is always required since flatbuffers works entirely with unordered maps internally, which +/// would results in flaky code generation. +/// +/// In unions, this effectively defines the arrow type of each variant, since that depends on the +/// fields's order! +/// +/// NOTE: We do not use flatbuffers' builtin `id` attribute as it only works on `table`s, whereas we +/// need a stable order for all kinds of things. +attribute "order"; diff --git a/crates/re_types/definitions/fbs/scalars.fbs b/crates/re_types/definitions/fbs/scalars.fbs new file mode 100644 index 000000000000..bb6fcd75063b --- /dev/null +++ b/crates/re_types/definitions/fbs/scalars.fbs @@ -0,0 +1,10 @@ +/// Unions cannot directly refer to scalar types, they need to be wrapped in a struct or table +/// first. +/// This package provides pre-wrapped scalars that will be automatically flattened down to their +/// inner type by our parsers. +namespace fbs.scalars; + +/// Flattens down to a 32-bit float. +struct Float32 { + v: float; +} diff --git a/crates/re_types/definitions/python/attributes.fbs b/crates/re_types/definitions/python/attributes.fbs new file mode 100644 index 000000000000..99b357f72abf --- /dev/null +++ b/crates/re_types/definitions/python/attributes.fbs @@ -0,0 +1,16 @@ +namespace python.attributes; + +/// Marks a field as transparent, meaning its type will be replaced by the underlying type. +/// +/// Only applies to field whose types have a single-field. +attribute "python.attr.transparent"; + +/// Defines the type aliases for a component, e.g. the types that make up `ComponentLike`. +/// +/// Only applies to structs/unions that are components. +attribute "python.attr.aliases"; + +/// Defines the array type aliases for a component, e.g. the types that make up `ComponentArrayLike`. +/// +/// Only applies to structs/unions that are components. +attribute "python.attr.array_aliases"; diff --git a/crates/re_types/definitions/rerun/archetypes.fbs b/crates/re_types/definitions/rerun/archetypes.fbs new file mode 100644 index 000000000000..e8f07787a567 --- /dev/null +++ b/crates/re_types/definitions/rerun/archetypes.fbs @@ -0,0 +1 @@ +namespace rerun.archetypes; diff --git a/crates/re_types/definitions/rust/attributes.fbs b/crates/re_types/definitions/rust/attributes.fbs new file mode 100644 index 000000000000..58a8fac6b070 --- /dev/null +++ b/crates/re_types/definitions/rust/attributes.fbs @@ -0,0 +1,15 @@ +namespace rust.attributes; + +/// Apply to a struct or table object to generate a tuple struct. +/// +/// The type definition of the target object must have exactly a single field. +attribute "rust.attr.tuple_struct"; + +/// Apply to any object to generate a #derive clause. +/// +/// The value of the attribute will be trimmed out but otherwise left as-is. +/// E.g. "rust.attr.derive": "Debug, Clone, Copy"`. +attribute "rust.attr.derive"; + +/// Apply to any object to generate a #repr clause with the specified value. +attribute "rust.attr.repr"; diff --git a/crates/re_types/source_hash.txt b/crates/re_types/source_hash.txt new file mode 100644 index 000000000000..6be879f6278b --- /dev/null +++ b/crates/re_types/source_hash.txt @@ -0,0 +1,4 @@ +# This is a sha256 hash for all direct and indirect dependencies of this crate's build script. +# It can be safely removed at anytime to force the build script to run again. +# Check out build.rs to see how it's computed. +e4b94e84d8869a38e732e777b6ab8e7d4ae7780e04cc3c89512e444cdda5ee50 \ No newline at end of file diff --git a/crates/re_types/src/archetypes/mod.rs b/crates/re_types/src/archetypes/mod.rs new file mode 100644 index 000000000000..cf43c16c6a0e --- /dev/null +++ b/crates/re_types/src/archetypes/mod.rs @@ -0,0 +1 @@ +// NOTE: This file was autogenerated by re_types_builder; DO NOT EDIT. diff --git a/crates/re_types/src/components/mod.rs b/crates/re_types/src/components/mod.rs new file mode 100644 index 000000000000..cf43c16c6a0e --- /dev/null +++ b/crates/re_types/src/components/mod.rs @@ -0,0 +1 @@ +// NOTE: This file was autogenerated by re_types_builder; DO NOT EDIT. diff --git a/crates/re_types/src/datatypes/mod.rs b/crates/re_types/src/datatypes/mod.rs new file mode 100644 index 000000000000..cf43c16c6a0e --- /dev/null +++ b/crates/re_types/src/datatypes/mod.rs @@ -0,0 +1 @@ +// NOTE: This file was autogenerated by re_types_builder; DO NOT EDIT. diff --git a/crates/re_types/src/lib.rs b/crates/re_types/src/lib.rs new file mode 100644 index 000000000000..e6ddbfc3f92f --- /dev/null +++ b/crates/re_types/src/lib.rs @@ -0,0 +1,104 @@ +//! The standard Rerun data types, component types, and archetypes. +//! +//! This crate contains both the IDL definitions for Rerun types (flatbuffers) as well as the code +//! generated from those using `re_types_builder`. +//! +//! +//! ### Organization +//! +//! - `definitions/` contains IDL definitions for all Rerun types (data, components, archetypes). +//! - `src/` contains the code generated for Rust. +//! - `rerun_py/rerun2/` (at the root of this workspace) contains the code generated for Python. +//! +//! While most of the code in this crate is auto-generated, some manual extensions are littered +//! throughout: look for files ending in `_ext.rs` or `_ext.py` (also see the "Extensions" section +//! of this document). +//! +//! +//! ### Build cache +//! +//! Updating either the source code of the code generator itself (`re_types_builder`) or any of the +//! .fbs files should re-trigger the code generation process the next time `re_types` is built. +//! Manual extension files will be left untouched. +//! +//! Caching is controlled by a versioning hash that is stored in `store_hash.txt`. +//! If you suspect something is wrong with the caching mechanism and that your changes aren't taken +//! into account when they should, try and remove `source_hash.txt`. +//! If that fixes the issue, you've found a bug. +//! +//! +//! ### How-to: add a new datatype/component/archetype +//! +//! Create the appropriate .fbs file in the appropriate place, and make sure it gets included in +//! some way (most likely indirectly) by `archetypes.fbs`, which is the main entrypoint for +//! codegen. +//! Generally, the easiest thing to do is to add your new type to one of the centralized manifests, +//! e.g. for a new component, include it into `components.fbs`. +//! +//! Your file should get picked up automatically by the code generator. +//! Once the code for your new component has been generated, implement whatever extensions you need +//! and make sure to tests any custom constructors you add. +//! +//! +//! ### How-to: remove an existing datatype/component/archetype +//! +//! Simply get rid of the type in question and rebuild `re_types` to trigger codegen. +//! +//! Beware though: if you remove a whole definition file re-running codegen will not remove the +//! associated generated files, you'll have to do that yourself. +//! +//! +//! ### Extensions +//! +//! +//! #### Rust +//! +//! Generated Rust code can be manually extended by adding sibling files with the `_ext.rs` +//! prefix. E.g. to extend `vec2d.rs`, create a `vec2d_ext.rs`. +//! +//! Trigger the codegen (e.g. by removing `source_hash.txt`) to generate the right `mod` clauses +//! automatically. +//! +//! The simplest way to get started is to look at any of the existing examples. +//! +//! +//! #### Python +//! +//! Generated Python code can be manually extended by adding a sibling file with the `_ext.py` +//! prefix. E.g. to extend `vec2d.py`, create a `vec2d_ext.py`. +//! +//! This sibling file needs to implement an extension class that is mixed in with the +//! auto-generated class. +//! The simplest way to get started is to look at any of the existing examples. + +use std::borrow::Cow; + +/// A [`Datatype`] describes plain old data. +pub trait Datatype { + fn name() -> Cow<'static, str>; + + fn to_arrow_datatype() -> arrow2::datatypes::DataType; +} + +pub trait Component { + fn name() -> Cow<'static, str>; + + fn to_arrow_datatype() -> arrow2::datatypes::DataType; +} + +pub trait Archetype { + fn name() -> Cow<'static, str>; + + fn required_components() -> Vec>; + fn recommended_components() -> Vec>; + fn optional_components() -> Vec>; + + fn to_arrow_datatypes() -> Vec; +} + +/// Number of decimals shown for all vector display methods. +pub const DISPLAY_PRECISION: usize = 3; + +pub mod archetypes; +pub mod components; +pub mod datatypes; From b5fd082ef90703203bd9afe4af91c563aaf83c89 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Sun, 11 Jun 2023 22:35:07 +0200 Subject: [PATCH 15/18] self-review --- crates/re_types/build.rs | 24 ++++++++++++------- .../re_types/definitions/arrow/attributes.fbs | 9 ++++--- .../re_types/definitions/fbs/attributes.fbs | 4 ++-- .../definitions/python/attributes.fbs | 2 +- crates/re_types/source_hash.txt | 2 +- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/crates/re_types/build.rs b/crates/re_types/build.rs index 34b976a07ea2..687d7507d590 100644 --- a/crates/re_types/build.rs +++ b/crates/re_types/build.rs @@ -43,19 +43,19 @@ fn main() { // NOTE: We need to hash both the flatbuffers definitions as well as the source code of the // code generator itself! - let re_types_builder_hash = compute_crate_hash("re_types_builder"); let cur_hash = read_versioning_hash(SOURCE_HASH_PATH); - let new_hash = compute_dir_hash(DEFINITIONS_DIR_PATH, Some(&[".fbs"])); - let new_hash_final = compute_strings_hash(&[&re_types_builder_hash, &new_hash]); + let re_types_builder_hash = compute_crate_hash("re_types_builder"); + let definitions_hash = compute_dir_hash(DEFINITIONS_DIR_PATH, Some(&[".fbs"])); + let new_hash = compute_strings_hash(&[&re_types_builder_hash, &definitions_hash]); // Leave these be please, very useful when debugging. eprintln!("re_types_builder_hash: {re_types_builder_hash:?}"); eprintln!("cur_hash: {cur_hash:?}"); + eprintln!("definitions_hash: {definitions_hash:?}"); eprintln!("new_hash: {new_hash:?}"); - eprintln!("new_hash_final: {new_hash_final:?}"); if let Some(cur_hash) = cur_hash { - if cur_hash == new_hash_final { + if cur_hash == new_hash { // Neither the source of the code generator nor the IDL definitions have changed, no need // to do anything at this point. return; @@ -72,10 +72,10 @@ fn main() { // NOTE: We're purposefully ignoring the error here. // - // In the very unlikely chance that the user doesn't have `cargo` in their $PATH, there's - // still no good reason to fail the build. + // In the very unlikely chance that the user doesn't have the `fmt` component installed, + // there's still no good reason to fail the build. // - // The CI will catch the unformatted files at PR time and complain appropriately anyhow. + // The CI will catch the unformatted file at PR time and complain appropriately anyhow. cmd!(sh, "cargo fmt").run().ok(); re_types_builder::generate_python_code( @@ -84,6 +84,12 @@ fn main() { "./definitions/rerun/archetypes.fbs", ); + // NOTE: This requires both `black` and `ruff` to be in $PATH, but only for contributors, + // not end users. + // Even for contributors, `black` and `ruff` won't be needed unless they edit some of the + // .fbs files... and even then, this won't crash if they are missing, it will just fail to pass + // the CI! + // NOTE: We're purposefully ignoring the error here. // // If the user doesn't have `black` in their $PATH, there's still no good reason to fail @@ -100,5 +106,5 @@ fn main() { // The CI will catch the unformatted files at PR time and complain appropriately anyhow. cmd!(sh, "ruff --fix {PYTHON_OUTPUT_DIR_PATH}").run().ok(); - write_versioning_hash(SOURCE_HASH_PATH, new_hash_final); + write_versioning_hash(SOURCE_HASH_PATH, new_hash); } diff --git a/crates/re_types/definitions/arrow/attributes.fbs b/crates/re_types/definitions/arrow/attributes.fbs index 54374cc07643..33c81c7dfb59 100644 --- a/crates/re_types/definitions/arrow/attributes.fbs +++ b/crates/re_types/definitions/arrow/attributes.fbs @@ -1,12 +1,15 @@ namespace arrow; -/// Marks a union as sparse from Arrow's standpoint, affecting its Arrow datatype. +/// Marks a union as sparse, affecting its Arrow datatype. +/// +/// This does _not_ affect the object generated in Python or Rust, this is a pure Arrow concern! /// /// Only applies to unions. attribute "arrow.attr.sparse_union"; -/// Marks a single-field object as transparent from Arrow's standpoint, affecting its Arrow -/// datatype. +/// Marks a single-field object as transparent, affecting its Arrow datatype. +/// +/// This does _not_ affect the object generated in Python or Rust, this is a pure Arrow concern! /// /// This is generally most useful for getting rid of extraneous `struct` layers. attribute "arrow.attr.transparent"; diff --git a/crates/re_types/definitions/fbs/attributes.fbs b/crates/re_types/definitions/fbs/attributes.fbs index 9487163b54fb..cdf311181ef6 100644 --- a/crates/re_types/definitions/fbs/attributes.fbs +++ b/crates/re_types/definitions/fbs/attributes.fbs @@ -8,8 +8,8 @@ namespace fbs.attributes; /// This is always required since flatbuffers works entirely with unordered maps internally, which /// would results in flaky code generation. /// -/// In unions, this effectively defines the arrow type of each variant, since that depends on the -/// fields's order! +/// In unions, this effectively defines the arrow tag of each variant, since the tag depends on the +/// fields's order in the datatype! /// /// NOTE: We do not use flatbuffers' builtin `id` attribute as it only works on `table`s, whereas we /// need a stable order for all kinds of things. diff --git a/crates/re_types/definitions/python/attributes.fbs b/crates/re_types/definitions/python/attributes.fbs index 99b357f72abf..4160bdc0c429 100644 --- a/crates/re_types/definitions/python/attributes.fbs +++ b/crates/re_types/definitions/python/attributes.fbs @@ -2,7 +2,7 @@ namespace python.attributes; /// Marks a field as transparent, meaning its type will be replaced by the underlying type. /// -/// Only applies to field whose types have a single-field. +/// Only applies to fields whose type is an object with a single-field. attribute "python.attr.transparent"; /// Defines the type aliases for a component, e.g. the types that make up `ComponentLike`. diff --git a/crates/re_types/source_hash.txt b/crates/re_types/source_hash.txt index 6be879f6278b..45db6bc3ce6b 100644 --- a/crates/re_types/source_hash.txt +++ b/crates/re_types/source_hash.txt @@ -1,4 +1,4 @@ # This is a sha256 hash for all direct and indirect dependencies of this crate's build script. # It can be safely removed at anytime to force the build script to run again. # Check out build.rs to see how it's computed. -e4b94e84d8869a38e732e777b6ab8e7d4ae7780e04cc3c89512e444cdda5ee50 \ No newline at end of file +5a4275f71245bf3f59421a959cf6b2f57e7d28c7dbec9a61e7416747c2d79b96 \ No newline at end of file From f630ea332358a9968a944bb5ed203364f58b781d Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Mon, 12 Jun 2023 11:53:30 +0200 Subject: [PATCH 16/18] python shall output to rerun_py/rerun_sdk/rerun2 --- crates/re_types/build.rs | 2 +- crates/re_types/source_hash.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/re_types/build.rs b/crates/re_types/build.rs index 687d7507d590..82c328e0aa4b 100644 --- a/crates/re_types/build.rs +++ b/crates/re_types/build.rs @@ -15,7 +15,7 @@ use re_build_tools::{ const SOURCE_HASH_PATH: &str = "./source_hash.txt"; const DEFINITIONS_DIR_PATH: &str = "./definitions"; const RUST_OUTPUT_DIR_PATH: &str = "."; -const PYTHON_OUTPUT_DIR_PATH: &str = "../../rerun_py/rerun2"; +const PYTHON_OUTPUT_DIR_PATH: &str = "../../rerun_py/rerun_sdk/rerun2"; fn main() { if std::env::var("CI").is_ok() { diff --git a/crates/re_types/source_hash.txt b/crates/re_types/source_hash.txt index 45db6bc3ce6b..1096244fad32 100644 --- a/crates/re_types/source_hash.txt +++ b/crates/re_types/source_hash.txt @@ -1,4 +1,4 @@ # This is a sha256 hash for all direct and indirect dependencies of this crate's build script. # It can be safely removed at anytime to force the build script to run again. # Check out build.rs to see how it's computed. -5a4275f71245bf3f59421a959cf6b2f57e7d28c7dbec9a61e7416747c2d79b96 \ No newline at end of file +e264658cf8184bcb9dbd1302910072b755ea7901e6337b3b5394094ca67e0ef7 \ No newline at end of file From fb864663039ce0bd6798ff95c848626988ca9245 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 14 Jun 2023 11:04:32 +0200 Subject: [PATCH 17/18] addressing PR comments --- crates/re_types/Cargo.toml | 2 +- crates/re_types/definitions/arrow/attributes.fbs | 6 ++++-- crates/re_types/definitions/fbs/attributes.fbs | 2 +- crates/re_types/definitions/fbs/scalars.fbs | 3 +++ crates/re_types/source_hash.txt | 2 +- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/crates/re_types/Cargo.toml b/crates/re_types/Cargo.toml index e3b1b9dc1b2a..4da294c84c50 100644 --- a/crates/re_types/Cargo.toml +++ b/crates/re_types/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "re_types" authors.workspace = true -description = "The standard Rerun data types, component types, and archetypes." +description = "The built-in Rerun data types, component types, and archetypes." edition.workspace = true homepage.workspace = true include.workspace = true diff --git a/crates/re_types/definitions/arrow/attributes.fbs b/crates/re_types/definitions/arrow/attributes.fbs index 33c81c7dfb59..f8e96e4d8bad 100644 --- a/crates/re_types/definitions/arrow/attributes.fbs +++ b/crates/re_types/definitions/arrow/attributes.fbs @@ -2,14 +2,16 @@ namespace arrow; /// Marks a union as sparse, affecting its Arrow datatype. /// -/// This does _not_ affect the object generated in Python or Rust, this is a pure Arrow concern! +/// This does _not_ affect the generated object structure in and of itself, it is a pure Arrow +/// matter that only impacts (de)serialization. /// /// Only applies to unions. attribute "arrow.attr.sparse_union"; /// Marks a single-field object as transparent, affecting its Arrow datatype. /// -/// This does _not_ affect the object generated in Python or Rust, this is a pure Arrow concern! +/// This does _not_ affect the generated object structure in and of itself, it is a pure Arrow +/// matter that only impacts (de)serialization. /// /// This is generally most useful for getting rid of extraneous `struct` layers. attribute "arrow.attr.transparent"; diff --git a/crates/re_types/definitions/fbs/attributes.fbs b/crates/re_types/definitions/fbs/attributes.fbs index cdf311181ef6..d431691e8d96 100644 --- a/crates/re_types/definitions/fbs/attributes.fbs +++ b/crates/re_types/definitions/fbs/attributes.fbs @@ -6,7 +6,7 @@ namespace fbs.attributes; /// This defines a stable order between objects of the same kind, e.g. the order in which fields of a /// struct should be laid out when generating code. /// This is always required since flatbuffers works entirely with unordered maps internally, which -/// would results in flaky code generation. +/// would result in flaky code generation. /// /// In unions, this effectively defines the arrow tag of each variant, since the tag depends on the /// fields's order in the datatype! diff --git a/crates/re_types/definitions/fbs/scalars.fbs b/crates/re_types/definitions/fbs/scalars.fbs index bb6fcd75063b..085df42e26db 100644 --- a/crates/re_types/definitions/fbs/scalars.fbs +++ b/crates/re_types/definitions/fbs/scalars.fbs @@ -2,6 +2,9 @@ /// first. /// This package provides pre-wrapped scalars that will be automatically flattened down to their /// inner type by our parsers. +/// +/// Look e.g. for `fbs.scalars.Float32` in `objects.rs` to see this flatenning in action. + namespace fbs.scalars; /// Flattens down to a 32-bit float. diff --git a/crates/re_types/source_hash.txt b/crates/re_types/source_hash.txt index 1096244fad32..a5681461859d 100644 --- a/crates/re_types/source_hash.txt +++ b/crates/re_types/source_hash.txt @@ -1,4 +1,4 @@ # This is a sha256 hash for all direct and indirect dependencies of this crate's build script. # It can be safely removed at anytime to force the build script to run again. # Check out build.rs to see how it's computed. -e264658cf8184bcb9dbd1302910072b755ea7901e6337b3b5394094ca67e0ef7 \ No newline at end of file +dae77f291d1698807cd865265cbb77731bd1aedf07c0968a6b0ac67c18f94590 \ No newline at end of file From 8c78c010f5b6c3eafa203f02f570131641a1d4d9 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 14 Jun 2023 16:33:20 +0200 Subject: [PATCH 18/18] git lost it --- Cargo.lock | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 306ba7d675c4..f6211650f901 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6681,21 +6681,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dbabb1cbd15a1d6d12d9ed6b35cc6777d4af87ab3ba155ea37215f20beab80c" -[[package]] -name = "xshell" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "962c039b3a7b16cf4e9a4248397c6585c07547412e7d6a6e035389a802dcfe90" -dependencies = [ - "xshell-macros", -] - -[[package]] -name = "xshell-macros" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dbabb1cbd15a1d6d12d9ed6b35cc6777d4af87ab3ba155ea37215f20beab80c" - [[package]] name = "xxhash-rust" version = "0.8.6"