Skip to content

Commit

Permalink
Implement v0.6 Optional Bytes
Browse files Browse the repository at this point in the history
This makes a few changes:
 - It changes generated messages to reference message innards as a type in `__runtime` instead of branching on what fields should be there. That results in much less bifurcation in gencode and lets runtime-agnostic code reference raw message innards.
- It adds a generic mechanism for creating vtable-based mutators. These vtables point to thunks generated for interacting with C++ or upb fields. Right now, the design results in 2-word (msg+vtable) mutators for C++ and 3-word mutators (msg+arena+vtable) for UPB. See upb.rs for an explanation of the design options. I chose the `RawMessage+&Arena` design for mutator data as opposed to a `&MessageInner` design because it did not result in extra-indirection layout changes for message mutators. We could revisit this in the future with performance data, since this results in all field mutators being 3 words large instead of the register-friendly 2 words.
- And lastly, as a nearby change that touches on many of the same topics, it adds some extra SAFETY comments for Send/Sync in message gencode.

PiperOrigin-RevId: 559483437
  • Loading branch information
kupiakos authored and Copybara-Service committed Aug 23, 2023
1 parent 11f5044 commit 9a0bc39
Show file tree
Hide file tree
Showing 17 changed files with 969 additions and 157 deletions.
2 changes: 2 additions & 0 deletions rust/BUILD
Expand Up @@ -57,6 +57,7 @@ rust_library(
"shared.rs",
"string.rs",
"upb.rs",
"vtable.rs",
],
crate_root = "shared.rs",
rustc_flags = ["--cfg=upb_kernel"],
Expand Down Expand Up @@ -92,6 +93,7 @@ rust_library(
"proxied.rs",
"shared.rs",
"string.rs",
"vtable.rs",
],
crate_root = "shared.rs",
rustc_flags = ["--cfg=cpp_kernel"],
Expand Down
43 changes: 42 additions & 1 deletion rust/cpp.rs
Expand Up @@ -30,7 +30,7 @@

// Rust Protobuf runtime using the C++ kernel.

use crate::__internal::RawArena;
use crate::__internal::{Private, RawArena, RawMessage};
use std::alloc::Layout;
use std::cell::UnsafeCell;
use std::fmt;
Expand Down Expand Up @@ -156,6 +156,47 @@ impl fmt::Debug for SerializedData {
}
}

pub type BytesPresentMutData<'msg> = crate::vtable::RawVTableOptionalMutatorData<'msg, [u8]>;
pub type BytesAbsentMutData<'msg> = crate::vtable::RawVTableOptionalMutatorData<'msg, [u8]>;
pub type InnerBytesMut<'msg> = crate::vtable::RawVTableMutator<'msg, [u8]>;

/// The raw contents of every generated message.
#[derive(Debug)]
pub struct MessageInner {
pub msg: RawMessage,
}

/// Mutators that point to their original message use this to do so.
///
/// Since C++ messages manage their own memory, this can just copy the
/// `RawMessage` instead of referencing an arena like UPB must.
///
/// Note: even though this type is `Copy`, it should only be copied by
/// protobuf internals that can maintain mutation invariants.
#[derive(Clone, Copy, Debug)]
pub struct MutatorMessageRef<'msg> {
msg: RawMessage,
_phantom: PhantomData<&'msg mut ()>,
}
impl<'msg> MutatorMessageRef<'msg> {
#[allow(clippy::needless_pass_by_ref_mut)] // Sound construction requires mutable access.
pub fn new(_private: Private, msg: &'msg mut MessageInner) -> Self {
MutatorMessageRef { msg: msg.msg, _phantom: PhantomData }
}

pub fn msg(&self) -> RawMessage {
self.msg
}
}

pub fn copy_bytes_in_arena_if_needed_by_runtime<'a>(
_msg_ref: MutatorMessageRef<'a>,
val: &'a [u8],
) -> &'a [u8] {
// Nothing to do, the message manages its own string memory for C++.
val
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
3 changes: 3 additions & 0 deletions rust/internal.rs
Expand Up @@ -32,6 +32,9 @@
//! exposed to through the `protobuf` path but must be public for use by
//! generated code.

pub use crate::vtable::{
new_vtable_field_entry, BytesMutVTable, BytesOptionalMutVTable, RawVTableMutator,
};
use std::slice;

/// Used to protect internal-only items from being used accidentally.
Expand Down
1 change: 1 addition & 0 deletions rust/shared.rs
Expand Up @@ -67,6 +67,7 @@ pub mod __runtime;
mod optional;
mod proxied;
mod string;
mod vtable;

/// An error that happened during deserialization.
#[derive(Debug, Clone)]
Expand Down
145 changes: 89 additions & 56 deletions rust/string.rs
Expand Up @@ -32,7 +32,8 @@
#![allow(dead_code)]
#![allow(unused)]

use crate::__internal::Private;
use crate::__internal::{Private, PtrAndLen, RawMessage};
use crate::__runtime::{BytesAbsentMutData, BytesPresentMutData, InnerBytesMut};
use crate::{Mut, MutProxy, Proxied, ProxiedWithPresence, SettableValue, View, ViewProxy};
use std::borrow::Cow;
use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
Expand All @@ -43,10 +44,6 @@ use std::iter;
use std::ops::{Deref, DerefMut};
use utf8::Utf8Chunks;

/// This type will be replaced by something else in a future revision.
// TODO(b/285309330): remove this and any `impl`s using it.
pub type Todo<'msg> = (std::convert::Infallible, std::marker::PhantomData<&'msg mut ()>);

/// A mutator for `bytes` fields - this type is `protobuf::Mut<'msg, [u8]>`.
///
/// This type implements `Deref<Target = [u8]>`, so many operations are
Expand All @@ -62,9 +59,29 @@ pub type Todo<'msg> = (std::convert::Infallible, std::marker::PhantomData<&'msg
/// recommended to instead build a `Vec<u8>` or `String` and pass that directly
/// to `set`, which will reuse the allocation if supported by the runtime.
#[derive(Debug)]
pub struct BytesMut<'msg>(Todo<'msg>);
pub struct BytesMut<'msg> {
inner: InnerBytesMut<'msg>,
}

// SAFETY:
// - Protobuf Rust messages don't allow shared mutation across threads.
// - Protobuf Rust messages don't share arenas.
// - All access that touches an arena occurs behind a `&mut`.
// - All mutators that store an arena are `!Send`.
unsafe impl Sync for BytesMut<'_> {}

impl<'msg> BytesMut<'msg> {
/// Constructs a new `BytesMut` from its internal, runtime-dependent part.
#[doc(hidden)]
pub fn from_inner(_private: Private, inner: InnerBytesMut<'msg>) -> Self {
Self { inner }
}

/// Gets the current value of the field.
pub fn get(&self) -> &[u8] {
self.as_view()
}

/// Sets the byte string to the given `val`, cloning any borrowed data.
///
/// This method accepts both owned and borrowed byte strings; if the runtime
Expand All @@ -78,7 +95,7 @@ impl<'msg> BytesMut<'msg> {
///
/// Has no effect if `new_len` is larger than the current `len`.
pub fn truncate(&mut self, new_len: usize) {
todo!("b/285309330")
self.inner.truncate(new_len)
}

/// Clears the byte string to the empty string.
Expand All @@ -93,7 +110,7 @@ impl<'msg> BytesMut<'msg> {
/// `BytesMut::clear` results in the accessor returning an empty string
/// while `FieldEntry::clear` results in the non-empty default.
///
/// However, for a proto3 `bytes` that has implicit presence, there is no
/// However, for a proto3 `bytes` that have implicit presence, there is no
/// distinction between these states: unset `bytes` is the same as empty
/// `bytes` and the default is always the empty string.
///
Expand All @@ -117,7 +134,7 @@ impl Deref for BytesMut<'_> {

impl AsRef<[u8]> for BytesMut<'_> {
fn as_ref(&self) -> &[u8] {
todo!("b/285309330")
unsafe { self.inner.get() }
}
}

Expand All @@ -126,45 +143,20 @@ impl Proxied for [u8] {
type Mut<'msg> = BytesMut<'msg>;
}

impl<'msg> ViewProxy<'msg> for Todo<'msg> {
type Proxied = [u8];
fn as_view(&self) -> &[u8] {
unreachable!()
}
fn into_view<'shorter>(self) -> &'shorter [u8]
where
'msg: 'shorter,
{
unreachable!()
}
}

impl<'msg> MutProxy<'msg> for Todo<'msg> {
fn as_mut(&mut self) -> BytesMut<'msg> {
unreachable!()
}
fn into_mut<'shorter>(self) -> BytesMut<'shorter>
where
'msg: 'shorter,
{
unreachable!()
}
}

impl ProxiedWithPresence for [u8] {
type PresentMutData<'msg> = Todo<'msg>;
type AbsentMutData<'msg> = Todo<'msg>;
type PresentMutData<'msg> = BytesPresentMutData<'msg>;
type AbsentMutData<'msg> = BytesAbsentMutData<'msg>;

fn clear_present_field<'a>(
present_mutator: Self::PresentMutData<'a>,
) -> Self::AbsentMutData<'a> {
todo!("b/285309330")
present_mutator.clear()
}

fn set_absent_to_default<'a>(
absent_mutator: Self::AbsentMutData<'a>,
) -> Self::PresentMutData<'a> {
todo!("b/285309330")
absent_mutator.set_absent_to_default()
}
}

Expand Down Expand Up @@ -194,48 +186,89 @@ impl<'msg> ViewProxy<'msg> for BytesMut<'msg> {
where
'msg: 'shorter,
{
todo!("b/285309330")
self.inner.get()
}
}

impl<'msg> MutProxy<'msg> for BytesMut<'msg> {
fn as_mut(&mut self) -> BytesMut<'_> {
todo!("b/285309330")
BytesMut { inner: self.inner }
}

fn into_mut<'shorter>(self) -> BytesMut<'shorter>
where
'msg: 'shorter,
{
todo!("b/285309330")
BytesMut { inner: self.inner }
}
}

impl SettableValue<[u8]> for &'_ [u8] {
impl<'bytes> SettableValue<[u8]> for &'bytes [u8] {
fn set_on(self, _private: Private, mutator: BytesMut<'_>) {
todo!("b/285309330")
// SAFETY: this is a `bytes` field with no restriction on UTF-8.
unsafe { mutator.inner.set(self) }
}

fn set_on_absent(
self,
_private: Private,
absent_mutator: <[u8] as ProxiedWithPresence>::AbsentMutData<'_>,
) -> <[u8] as ProxiedWithPresence>::PresentMutData<'_> {
// SAFETY: this is a `bytes` field with no restriction on UTF-8.
unsafe { absent_mutator.set(self) }
}

fn set_on_present(
self,
_private: Private,
present_mutator: <[u8] as ProxiedWithPresence>::PresentMutData<'_>,
) {
// SAFETY: this is a `bytes` field with no restriction on UTF-8.
unsafe {
present_mutator.set(self);
}
}
}

impl<const N: usize> SettableValue<[u8]> for &'_ [u8; N] {
fn set_on(self, _private: Private, mutator: BytesMut<'_>) {
self[..].set_on(Private, mutator)
}
macro_rules! impl_forwarding_settable_value {
($proxied:ty, $self:ident => $self_forwarding_expr:expr) => {
fn set_on($self, _private: Private, mutator: BytesMut<'_>) {
($self_forwarding_expr).set_on(Private, mutator)
}

fn set_on_absent(
$self,
_private: Private,
absent_mutator: <$proxied as ProxiedWithPresence>::AbsentMutData<'_>,
) -> <$proxied as ProxiedWithPresence>::PresentMutData<'_> {
($self_forwarding_expr).set_on_absent(Private, absent_mutator)
}

fn set_on_present(
$self,
_private: Private,
present_mutator: <$proxied as ProxiedWithPresence>::PresentMutData<'_>,
) {
($self_forwarding_expr).set_on_present(Private, present_mutator)
}
};
}

impl<'a, const N: usize> SettableValue<[u8]> for &'a [u8; N] {
// forward to `self[..]`
impl_forwarding_settable_value!([u8], self => &self[..]);
}

impl SettableValue<[u8]> for Vec<u8> {
fn set_on(self, _private: Private, mutator: BytesMut<'_>) {
todo!("b/285309330")
}
// TODO(b/293956360): Investigate taking ownership of this when allowed by the
// runtime.
impl_forwarding_settable_value!([u8], self => &self[..]);
}

impl SettableValue<[u8]> for Cow<'_, [u8]> {
fn set_on(self, _private: Private, mutator: BytesMut<'_>) {
match self {
Cow::Borrowed(s) => s.set_on(Private, mutator),
Cow::Owned(v) => v.set_on(Private, mutator),
}
}
// TODO(b/293956360): Investigate taking ownership of this when allowed by the
// runtime.
impl_forwarding_settable_value!([u8], self => &self[..]);
}

impl Hash for BytesMut<'_> {
Expand Down

0 comments on commit 9a0bc39

Please sign in to comment.