From 7a2e9f532d0eb34af6420aa562b748e90009a3a6 Mon Sep 17 00:00:00 2001 From: Chris Beck Date: Sat, 18 Feb 2023 12:00:38 -0700 Subject: [PATCH 1/2] When a serde(untagged) enum can't deser, show all the reasons why I was writing some code recently that uses `serde(untagged)` for an enum when deserializing certain JSON schema, but I was unhappy with the quality of the error messages I was getting, because it doesn't really tell you why each possibility doesn't work. I noticed that there is a TODO item in the `serde_derive` proc macro code that generates this deserialization. I decided that trying to improve the error messages upstream in serde-derive is easier than trying to change how I'm using serde, so I took a stab at implementing this TODO, and updating the tests that test error messages, and writing some more tests. I have tried to follow the patterns and conventions that I have seen elsewhere in the serde-derive source code, and I think that the code gen is good in that it uses `format_args!` like other parts of serde error handling and avoids making a new dynamic memory allocation. When untagged deserialization fails, the errors are collected on the stack rather than in a new dynamically-sized container. Let me know if you think this is a good direction, I'm happy to iterate if this patch is interesting to you. Thanks for building serde, it's great. --- serde_derive/src/de.rs | 38 +++++++++++----- test_suite/tests/test_annotations.rs | 3 +- test_suite/tests/test_macros.rs | 67 +++++++++++++++++++++++++++- 3 files changed, 94 insertions(+), 14 deletions(-) diff --git a/serde_derive/src/de.rs b/serde_derive/src/de.rs index a703adaf7..eec1d23fe 100644 --- a/serde_derive/src/de.rs +++ b/serde_derive/src/de.rs @@ -1674,28 +1674,44 @@ fn deserialize_untagged_enum( )) }); - // TODO this message could be better by saving the errors from the failed - // attempts. The heuristic used by TOML was to count the number of fields - // processed before an error, and use the error that happened after the - // largest number of fields. I'm not sure I like that. Maybe it would be - // better to save all the errors and combine them into one message that - // explains why none of the variants matched. let fallthrough_msg = format!( "data did not match any variant of untagged enum {}", params.type_name() ); let fallthrough_msg = cattrs.expecting().unwrap_or(&fallthrough_msg); - quote_block! { + // If all variants cannot be deserialized, we will try to write an error + // message explaining why it failed for each one + // The format string we are building will have the following structure: + // "data did not match any variant of untagged enum{}\nvar1: {}\nvar2: {}\nvar3: {}" + let mut err_format_string = fallthrough_msg.to_owned(); + let mut num_variants = 0usize; + for var in variants.iter().filter(|variant| !variant.attrs.skip_deserializing()) { + err_format_string.push_str("\n"); + err_format_string.push_str(&var.ident.to_string()); + err_format_string.push_str(": {}"); + num_variants += 1; + } + + // We need two copies of this iterator, and it's not cloneable + let err_identifiers1 = (0..num_variants).map(|idx| format_ident!("_err{}", idx)); + let err_identifiers2 = (0..num_variants).map(|idx| format_ident!("_err{}", idx)); + + quote_block! { let __content = try!(<_serde::__private::de::Content as _serde::Deserialize>::deserialize(__deserializer)); #( - if let _serde::__private::Ok(__ok) = #attempts { - return _serde::__private::Ok(__ok); - } + let #err_identifiers1 = match #attempts { + _serde::__private::Ok(__ok) => { + return _serde::__private::Ok(__ok); + }, + _serde::__private::Err(__err) => { + __err + }, + }; )* - _serde::__private::Err(_serde::de::Error::custom(#fallthrough_msg)) + _serde::__private::Err(_serde::de::Error::custom(format_args!(#err_format_string, #(#err_identifiers2),*))) } } diff --git a/test_suite/tests/test_annotations.rs b/test_suite/tests/test_annotations.rs index f32c22c1a..748210383 100644 --- a/test_suite/tests/test_annotations.rs +++ b/test_suite/tests/test_annotations.rs @@ -2776,7 +2776,8 @@ fn test_expecting_message_untagged_tagged_enum() { Untagged, } - assert_de_tokens_error::(&[Token::Str("Untagged")], r#"something strange..."#); + assert_de_tokens_error::(&[Token::Str("Untagged")], r#"something strange... +Untagged: invalid type: string "Untagged", expected unit variant Enum::Untagged"#); } #[test] diff --git a/test_suite/tests/test_macros.rs b/test_suite/tests/test_macros.rs index c6973db90..430e92a10 100644 --- a/test_suite/tests/test_macros.rs +++ b/test_suite/tests/test_macros.rs @@ -662,7 +662,13 @@ fn test_untagged_enum() { assert_de_tokens_error::( &[Token::Tuple { len: 1 }, Token::U8(1), Token::TupleEnd], - "data did not match any variant of untagged enum Untagged", + r#"data did not match any variant of untagged enum Untagged +A: invalid type: sequence, expected struct variant Untagged::A +B: invalid type: sequence, expected struct variant Untagged::B +C: invalid type: sequence, expected unit variant Untagged::C +D: invalid type: sequence, expected u8 +E: invalid type: sequence, expected a string +F: invalid length 1, expected tuple variant Untagged::F with 2 elements"#, ); assert_de_tokens_error::( @@ -673,10 +679,67 @@ fn test_untagged_enum() { Token::U8(3), Token::TupleEnd, ], - "data did not match any variant of untagged enum Untagged", + r#"data did not match any variant of untagged enum Untagged +A: invalid type: sequence, expected struct variant Untagged::A +B: invalid type: sequence, expected struct variant Untagged::B +C: invalid type: sequence, expected unit variant Untagged::C +D: invalid type: sequence, expected u8 +E: invalid type: sequence, expected a string +F: invalid length 3, expected 2 elements in sequence"#, ); } +#[test] +fn test_untagged_enum_with_disallow_unknown_fields() { + #[derive(Debug, PartialEq, Serialize, Deserialize)] + #[serde(untagged)] + #[serde(deny_unknown_fields)] + enum Untagged { + A { a: String }, + B { b: String, #[serde(default)] c: Vec }, + } + + assert_de_tokens_error::( + &[ + Token::Map { len: Some(1) }, + Token::Str("d"), + Token::Str("foo"), + Token::MapEnd + ], + r#"data did not match any variant of untagged enum Untagged +A: unknown field `d`, expected `a` +B: unknown field `d`, expected `b` or `c`"#); + + assert_de_tokens_error::( + &[ + Token::Map { len: Some(2) }, + Token::Str("a"), + Token::Str("foo"), + Token::Str("c"), + Token::Seq { len: Some(1) }, + Token::Str("bar"), + Token::SeqEnd, + Token::MapEnd + ], + r#"data did not match any variant of untagged enum Untagged +A: unknown field `c`, expected `a` +B: unknown field `a`, expected `b` or `c`"#); + + assert_de_tokens_error::( + &[ + Token::Map { len: Some(1) }, + Token::Str("c"), + Token::Seq { len: Some(1) }, + Token::Str("bar"), + Token::SeqEnd, + Token::MapEnd + ], + r#"data did not match any variant of untagged enum Untagged +A: unknown field `c`, expected `a` +B: missing field `b`"#); + +} + #[test] fn test_internally_tagged_enum() { #[derive(Debug, PartialEq, Serialize, Deserialize)] From a5eabd9aeea832d08938539744f6b5a42343c197 Mon Sep 17 00:00:00 2001 From: Chris Beck Date: Sun, 19 Feb 2023 22:57:56 -0700 Subject: [PATCH 2/2] fixup --- serde_derive/src/de.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/serde_derive/src/de.rs b/serde_derive/src/de.rs index eec1d23fe..55f8ad91a 100644 --- a/serde_derive/src/de.rs +++ b/serde_derive/src/de.rs @@ -1693,9 +1693,9 @@ fn deserialize_untagged_enum( num_variants += 1; } - // We need two copies of this iterator, and it's not cloneable + // We need two copies of this iterator let err_identifiers1 = (0..num_variants).map(|idx| format_ident!("_err{}", idx)); - let err_identifiers2 = (0..num_variants).map(|idx| format_ident!("_err{}", idx)); + let err_identifiers2 = err_identifiers1.clone(); quote_block! { let __content = try!(<_serde::__private::de::Content as _serde::Deserialize>::deserialize(__deserializer));