From 32b3f13337ef0bf747705d058f4772c7fdabd736 Mon Sep 17 00:00:00 2001 From: rkayaith Date: Tue, 25 Oct 2022 15:17:14 -0400 Subject: [PATCH] [YAML] Trim trailing whitespace from plain scalars In some cases plain scalars are currently parsed with a trailing newline. In particular this shows up often when parsing JSON files, e.g. note the `\n` after `456` below: ``` $ cat test.yaml { "foo": 123, "bar": 456 } $ yaml-bench test.yaml -canonical %YAML 1.2 --- !!map { ? !!str "foo" : !!str "123", ? !!str "bar" : !!str "456\n", } ... ``` The trailing whitespace ends up causing the conversion of the scalar to int/bool/etc. to fail, causing the issue seen here: https://github.com/llvm/llvm-project/issues/15877 From reading the YAML spec (https://yaml.org/spec/1.2.2/#733-plain-style) it seems like plain scalars should never end with whitespace, so this change trims all trailing whitespace characters from the value (specifically `b-line-feed`, `b-carriage-return`, `s-space`, and `s-tab`). Reviewed By: scott.linder Differential Revision: https://reviews.llvm.org/D137118 --- llvm/lib/Support/YAMLParser.cpp | 7 +++++-- llvm/test/YAMLParser/json.test | 13 +++++++++++++ llvm/unittests/Support/YAMLIOTest.cpp | 9 +++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 llvm/test/YAMLParser/json.test diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp index b85b1eb83ef89..6ac2c6aeeb46a 100644 --- a/llvm/lib/Support/YAMLParser.cpp +++ b/llvm/lib/Support/YAMLParser.cpp @@ -2041,8 +2041,11 @@ StringRef ScalarNode::getValue(SmallVectorImpl &Storage) const { } return UnquotedValue; } - // Plain or block. - return Value.rtrim(' '); + // Plain. + // Trim whitespace ('b-char' and 's-white'). + // NOTE: Alternatively we could change the scanner to not include whitespace + // here in the first place. + return Value.rtrim("\x0A\x0D\x20\x09"); } StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue diff --git a/llvm/test/YAMLParser/json.test b/llvm/test/YAMLParser/json.test new file mode 100644 index 0000000000000..7d1b24caed987 --- /dev/null +++ b/llvm/test/YAMLParser/json.test @@ -0,0 +1,13 @@ +# RUN: yaml-bench -canonical %s | FileCheck %s + +# CHECK: !!map { +# CHECK: ? !!str "foo" +# CHECK: : !!str "123", +# CHECK: ? !!str "bar" +# CHECK: : !!str "456", +# CHECK: } + +{ + "foo": 123, + "bar": 456 +} diff --git a/llvm/unittests/Support/YAMLIOTest.cpp b/llvm/unittests/Support/YAMLIOTest.cpp index 2ed79cae31edc..f282d23dc500b 100644 --- a/llvm/unittests/Support/YAMLIOTest.cpp +++ b/llvm/unittests/Support/YAMLIOTest.cpp @@ -96,6 +96,15 @@ TEST(YAMLIO, TestMapRead) { EXPECT_EQ(doc.foo, 3); EXPECT_EQ(doc.bar, 5); } + + { + Input yin("{\"foo\": 3\n, \"bar\": 5}"); + yin >> doc; + + EXPECT_FALSE(yin.error()); + EXPECT_EQ(doc.foo, 3); + EXPECT_EQ(doc.bar, 5); + } } TEST(YAMLIO, TestMalformedMapRead) {