Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Box trivial cyclic refs #41

Merged
merged 13 commits into from Mar 16, 2022
86 changes: 84 additions & 2 deletions typify-impl/src/convert.rs
Expand Up @@ -712,6 +712,14 @@ impl TypeSpace {
return Ok((ty, metadata));
}

// Rust can emit "anyOf":[{"$ref":"#/definitions/C"},{"type":"null"} for
// Option, so match against that here to short circuit the check for
// mutual exclusivity below.
if let Some(option_type_entry) = self.maybe_option(type_name.clone(), metadata, subschemas)
{
return Ok((option_type_entry, metadata));
}

// Check if this could be more precisely handled as a "one-of". This
// occurs if each subschema is mutually exclusive i.e. so that exactly
// one of them can match.
Expand Down Expand Up @@ -795,7 +803,7 @@ impl TypeSpace {
return Ok((ty, metadata));
}
let ty = self
.maybe_option_as_enum(type_name.clone(), metadata, subschemas)
.maybe_option(type_name.clone(), metadata, subschemas)
.or_else(|| self.maybe_externally_tagged_enum(type_name.clone(), metadata, subschemas))
.or_else(|| self.maybe_adjacently_tagged_enum(type_name.clone(), metadata, subschemas))
.or_else(|| self.maybe_internally_tagged_enum(type_name.clone(), metadata, subschemas))
Expand Down Expand Up @@ -930,9 +938,10 @@ impl TypeSpace {
mod tests {
use std::num::{NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8};

use schema::Schema;
use schemars::{schema_for, JsonSchema};

use crate::{validate_builtin, Name, TypeSpace};
use crate::{test_util::validate_output, validate_builtin, Name, TypeSpace};
use paste::paste;

fn int_helper<T: JsonSchema>() {
Expand Down Expand Up @@ -1012,6 +1021,79 @@ mod tests {
assert_eq!(type_space.iter_types().count(), 4);
}

#[test]
fn test_trivial_cycle() {
#[derive(JsonSchema, Schema)]
#[allow(dead_code)]
struct A {
a: Box<A>,
}

validate_output::<A>();
}

#[test]
fn test_optional_trivial_cycle() {
#[derive(JsonSchema, Schema)]
#[allow(dead_code)]
struct A {
a: Option<Box<A>>,
}

validate_output::<A>();
}

#[test]
fn test_enum_trivial_cycles() {
#[derive(JsonSchema, Schema)]
#[allow(dead_code)]
enum A {
Variant0(u64),
Varant1 {
a: u64,
b: Vec<A>,
rop: Option<Box<A>>,
},
Variant2 {
a: Box<A>,
},
Variant3(u64, Box<A>),
Variant4(Option<Box<A>>, String),
}

validate_output::<A>();
}

#[test]
fn test_basic_option_flat() {
#[derive(JsonSchema, Schema)]
#[allow(dead_code)]
struct C {}

#[derive(JsonSchema, Schema)]
#[allow(dead_code)]
struct A {
a: Option<C>,
}

validate_output::<A>();
}

#[test]
fn test_unit_option() {
#[derive(JsonSchema, Schema)]
#[allow(dead_code)]
struct Foo;

#[derive(JsonSchema, Schema)]
#[allow(dead_code)]
struct Bar {
a: Option<Foo>,
}

validate_output::<Bar>();
}

// TODO we can turn this on once we generate proper sets.
#[ignore]
#[test]
Expand Down
6 changes: 3 additions & 3 deletions typify-impl/src/enums.rs
Expand Up @@ -17,7 +17,7 @@ use crate::{
};

impl TypeSpace {
pub(crate) fn maybe_option_as_enum(
pub(crate) fn maybe_option(
&mut self,
type_name: Name,
metadata: &Option<Box<schemars::schema::Metadata>>,
Expand Down Expand Up @@ -1245,7 +1245,7 @@ mod tests {
}

#[test]
fn test_maybe_option_as_enum() {
fn test_maybe_option() {
let subschemas = vec![
SchemaObject {
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::String))),
Expand All @@ -1261,7 +1261,7 @@ mod tests {

let mut type_space = TypeSpace::default();
let type_entry = type_space
.maybe_option_as_enum(Name::Unknown, &None, &subschemas)
.maybe_option(Name::Unknown, &None, &subschemas)
.unwrap();

assert_eq!(type_entry.details, TypeEntryDetails::Option(TypeId(1)))
Expand Down
151 changes: 147 additions & 4 deletions typify-impl/src/lib.rs
Expand Up @@ -8,7 +8,7 @@ use quote::quote;
use rustfmt_wrapper::rustfmt;
use schemars::schema::{Metadata, Schema};
use thiserror::Error;
use type_entry::{TypeEntry, TypeEntryDetails, TypeEntryNewtype};
use type_entry::{TypeEntry, TypeEntryDetails, TypeEntryNewtype, VariantDetails};

#[cfg(test)]
mod test_util;
Expand Down Expand Up @@ -48,6 +48,7 @@ pub enum TypeDetails<'a> {
Array(TypeId),
Map(TypeId, TypeId),
Set(TypeId),
Box(TypeId),
Tuple(Box<dyn Iterator<Item = TypeId> + 'a>),
Unit,
Builtin(&'a str),
Expand Down Expand Up @@ -163,7 +164,8 @@ impl TypeSpace {
// Assign IDs to reference types before actually converting them. We'll
// need these in the case of forward (or circular) references.
let base_id = self.next_id;
self.next_id += definitions.len() as u64;
let def_len = definitions.len() as u64;
self.next_id += def_len;

for (index, (ref_name, _)) in definitions.iter().enumerate() {
self.ref_to_id
Expand All @@ -178,7 +180,11 @@ impl TypeSpace {
None => &ref_name,
};

info!("converting type: {}", type_name);
info!(
"converting type: {} with schema {}",
type_name,
serde_json::to_string(&schema).unwrap()
);

let (type_entry, metadata) =
self.convert_schema(Name::Required(type_name.to_string()), &schema)?;
Expand Down Expand Up @@ -219,11 +225,142 @@ impl TypeSpace {
// of derives than we might otherwise. This notably prevents us from
// using a HashSet or BTreeSet type where we might like to.

// TODO Look for containment cycles that we need to break with a Box<T>
// Once all ref types are in, look for containment cycles that we need
// to break with a Box<T>.
for index in 0..def_len {
let type_id = TypeId(base_id + index);

let mut box_id = None;

let mut type_entry = self.id_to_entry.get_mut(&type_id).unwrap().clone();
self.break_trivial_cyclic_refs(&type_id, &mut type_entry, &mut box_id);
let _ = self.id_to_entry.insert(type_id, type_entry);
}

Ok(())
}

/// If a type refers to itself, this creates a cycle that will eventually be
/// emit as a Rust struct that cannot be constructed. Break those cycles
/// here.
///
/// While we aren't yet handling the general case of type containment
/// cycles, it's not that bad to look at trivial cycles such as:
///
/// 1) A type refering to itself: A -> A
/// 2) A type optionally referring to itself: A -> Option<A>
/// 3) An enum variant referring to itself, either optionally or directly.
///
/// TODO currently only trivial cycles are broken. A more generic solution
/// may be required, but it may also a point to ask oneself why such a
/// complicated type is required :) A generic solution is difficult because
/// certain cycles introduce a question of *where* to Box to break the
/// cycle, and there's no one answer to this.
///
fn break_trivial_cyclic_refs(
&mut self,
type_id: &TypeId,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might you want to rename this parent_type_id to make the comments below more explicit? same for parent_type_entry

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed, done in 961dbc1

type_entry: &mut TypeEntry,
box_id: &mut Option<TypeId>,
) {
match &mut type_entry.details {
// Look for the case where an option refers to the parent type
TypeEntryDetails::Option(option_type_id) => {
if *option_type_id == *type_id {
*option_type_id = box_id
.get_or_insert_with(|| self.id_to_box(type_id))
.clone();
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we exercising this code path with a test? I'm not sure I can imagine how we would get here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one is mainly hit when recursing, aka

struct A {
  a: Option<Box<A>>
}


// Look for the case where a struct property refers to the parent
// type
TypeEntryDetails::Struct(s) => {
for prop in &mut s.properties {
if prop.type_id == *type_id {
// A struct property directly refers to the parent type
prop.type_id = box_id
.get_or_insert_with(|| self.id_to_box(type_id))
.clone();
} else {
// A struct property optionally refers to the parent type
let mut prop_type_entry =
self.id_to_entry.get_mut(&prop.type_id).unwrap().clone();
self.break_trivial_cyclic_refs(&type_id, &mut prop_type_entry, box_id);
let _ = self
.id_to_entry
.insert(prop.type_id.clone(), prop_type_entry);
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just noting that I think this logic is repeated 3 times

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the prompt - I originally assumed I could not pull that into a method because it wouldn't make the borrow checker happy, but turns out it's possible. Done in 3d9210c

}
}

// Look for the cases where an enum variant refers to the parent
// type
TypeEntryDetails::Enum(type_entry_enum) => {
for variant in &mut type_entry_enum.variants {
match &mut variant.details {
// Simple variants will not refer to anything
VariantDetails::Simple => {}
// Look for a tuple entry that refers to the parent type
VariantDetails::Tuple(vec_type_id) => {
for tuple_type_id in vec_type_id {
// A tuple entry directly refers to the parent type
if *tuple_type_id == *type_id {
*tuple_type_id = box_id
.get_or_insert_with(|| self.id_to_box(type_id))
.clone();
} else {
// A tuple entry optionally refers to the parent type
let mut tuple_type_entry =
self.id_to_entry.get_mut(&tuple_type_id).unwrap().clone();
self.break_trivial_cyclic_refs(
&type_id,
&mut tuple_type_entry,
box_id,
);
let _ = self
.id_to_entry
.insert(tuple_type_id.clone(), tuple_type_entry);
}
}
}
// Look for a struct property that refers to the parent type
VariantDetails::Struct(vec_struct_property) => {
for struct_property in vec_struct_property {
let vec_type_id = &mut struct_property.type_id;
// A struct property refers to the parent type
if *vec_type_id == *type_id {
*vec_type_id = box_id
.get_or_insert_with(|| self.id_to_box(type_id))
.clone();
} else {
// A struct property optionally refers to
// the parent type
let mut prop_type_entry =
self.id_to_entry.get_mut(vec_type_id).unwrap().clone();
self.break_trivial_cyclic_refs(
&type_id,
&mut prop_type_entry,
box_id,
);
let _ = self
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to explicitly ignore the return, but it certainly doesn't hurt.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has moved to the end of add_ref_types, and I removed the comment. The intention is clear from the code that we're modifying existing entries in id_to_entry unconditionally.

.id_to_entry
.insert(vec_type_id.clone(), prop_type_entry);
}
}
}
}
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we want to handle the TypeDetails::Newtype case? That's the only other one I can imagine hitting trivially. It would look like struct A(Box<A>)... (and talk about a useless type!!) Certainly we'll need to consider it when if/when we deal with the general case...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done in 63481f1

// Containers that can be size 0 are *not* cyclic references for that type
TypeEntryDetails::Array(_) => {}
TypeEntryDetails::Set(_) => {}
TypeEntryDetails::Map(_, _) => {}
// Everything else can be ignored
_ => {}
}
}

/// Add a new type and return a type identifier that may be used in
/// function signatures or embedded within other types.
pub fn add_type(&mut self, schema: &Schema) -> Result<TypeId> {
Expand Down Expand Up @@ -358,6 +495,11 @@ impl TypeSpace {
fn type_to_option(&mut self, ty: TypeEntry) -> TypeEntry {
TypeEntryDetails::Option(self.assign_type(ty)).into()
}

/// Create a Box<T> from a pre-assigned TypeId and assign it an ID.
fn id_to_box(&mut self, id: &TypeId) -> TypeId {
self.assign_type(TypeEntryDetails::Box(id.clone()).into())
}
}

impl ToString for TypeSpace {
Expand Down Expand Up @@ -439,6 +581,7 @@ impl<'a> Type<'a> {
TypeDetails::Map(key_id.clone(), value_id.clone())
}
TypeEntryDetails::Set(type_id) => TypeDetails::Set(type_id.clone()),
TypeEntryDetails::Box(type_id) => TypeDetails::Box(type_id.clone()),
TypeEntryDetails::Tuple(types) => TypeDetails::Tuple(Box::new(types.iter().cloned())),

// Builtin types
Expand Down
19 changes: 16 additions & 3 deletions typify-impl/src/test_util.rs
Expand Up @@ -34,12 +34,25 @@ fn validate_output_impl<T: JsonSchema + Schema>(ignore_variant_names: bool) {
let name = type_name::<T>().rsplit_once("::").unwrap().1.to_string();

let mut type_space = TypeSpace::default();

// Convert all references
type_space
.add_ref_types(schema.definitions.clone())
.unwrap();
let (ty, _) = type_space
.convert_schema_object(Name::Required(name), &schema.schema)
.unwrap();

// If we have converted the type already, use that, otherwise convert schema object
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// If we have converted the type already, use that, otherwise convert schema object
// In some situations, `schema_for!(T)` may actually give us two copies of the type: one in the definitions and one in the schema. This will occur in particular for cyclic types i.e. those for which the type itself is a reference.
// If we have converted the type already, use that, otherwise convert schema object

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done in bc4a146

let ty = if let Some(already_type_id) = type_space.ref_to_id.get(&name) {
type_space
.id_to_entry
.get(&already_type_id)
.unwrap()
.clone()
} else {
let (ty, _) = type_space
.convert_schema_object(Name::Required(name), &schema.schema)
.unwrap();
ty
ahl marked this conversation as resolved.
Show resolved Hide resolved
};

let output = ty.output(&type_space);

Expand Down