Skip to content

Commit

Permalink
Add detection of trailing garbage in deserialization/consumption (#30)
Browse files Browse the repository at this point in the history
- dict/list consumers gain a `.finish()` method that consumes the rest
  of the data to check that everything is valid *and* that the final `e`
  actually ends and the end (i.e. no trailing garbage).
- bt_deserialize now makes sure the entire string gets consumed and
  throws if it doesn't.
  • Loading branch information
jagerman committed Dec 15, 2023
1 parent f6172d5 commit 24fbdb7
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 1 deletion.
43 changes: 42 additions & 1 deletion oxenc/bt_serialize.h
Expand Up @@ -598,9 +598,15 @@ std::string bt_serialize(const T& val) {
/// int value;
/// bt_deserialize(encoded, value); // Sets value to 42
///
/// Note that this method can set a value even if in fails, in particular when the value was parsed
/// successfully but the parsed string still has remaining content.
///
template <typename T, std::enable_if_t<!std::is_const_v<T>, int> = 0>
void bt_deserialize(std::string_view s, T& val) {
return detail::bt_deserialize<T>{}(s, val);
detail::bt_deserialize<T>{}(s, val);
if (!s.empty())
throw bt_deserialize_invalid{
"Deserialization failed: did not consume the entire encoded string" + std::to_string(s.size())};
}

/// Deserializes the given string_view into a `T`, which is returned.
Expand Down Expand Up @@ -973,6 +979,23 @@ class bt_list_consumer {
else
throw bt_deserialize_invalid_type{"next bt value has unknown type"};
}

/// Finishes reading the list by reading through (and ignoring) any remaining values until it
/// reaches the end of the list, and confirms that the end of the list is in fact the end of the
/// input. Will throw if anything doesn't parse, or if the list terminates but *isn't* at the
/// end of the buffer being parsed.
///
/// It is not required to call this, but not calling it will not notice if there is invalid data
/// later in the list or after the end of the list.
void finish() {
while (!is_finished())
skip_value();

// If we consumed the entire buffer we should have only the terminating 'e' left (and
// `is_finished()` already checked that it is in fact an `e`).
if (data.size() != 1)
throw bt_deserialize_invalid{"Dict finished without consuming the entire buffer"};
}
};

/// Class that allows you to walk through key-value pairs of a bt-encoded dict in memory without
Expand Down Expand Up @@ -1371,6 +1394,24 @@ class bt_dict_consumer : private bt_list_consumer {
return std::nullopt;
return consume<T>();
}

/// Finishes reading the dict by reading through (and ignoring) any remaining keys until it
/// reaches the end of the dict, and confirms that the end of the dict is in fact the end of the
/// input. Will throw if anything doesn't parse, or if the dict terminates but *isn't* at the
/// end of the buffer being parsed.
///
/// It is not required to call this, but not calling it will not notice if there is invalid data
/// later in the dict or after the end of the dict.
void finish() {
while (!is_finished()) {
flush_key();
skip_value();
}
// If we consumed the entire buffer we should have only the terminating 'e' left (and
// `is_finished()` already checked that it is in fact an `e`).
if (data.size() != 1)
throw bt_deserialize_invalid{"Dict finished without consuming the entire buffer"};
}
};

inline bt_dict_consumer bt_list_consumer::consume_dict_consumer() {
Expand Down
28 changes: 28 additions & 0 deletions tests/test_bt.cpp
Expand Up @@ -645,6 +645,34 @@ TEST_CASE("bt append_signature", "[bt][signature]") {
}
}

TEST_CASE("bt trailing garbage detection", "[bt][deserialization][trailing-garbage]") {
REQUIRE_THROWS(bt_deserialize<bt_dict>("de🤔"));
REQUIRE_NOTHROW(bt_deserialize<bt_dict>("de"));
REQUIRE_THROWS(bt_deserialize<int>("i123eIN YOUR DATA READING YOUR INTS!"));
REQUIRE_NOTHROW(bt_deserialize<int>("i123e"));
REQUIRE_THROWS(bt_deserialize<std::string>("2:hibye"));
REQUIRE_NOTHROW(bt_deserialize<std::string>("2:hi"));

int x;
REQUIRE_NOTHROW(bt_deserialize("i456e", x));
CHECK(x == 456);
REQUIRE_THROWS(bt_deserialize("i789ewhoawhoawhooooaa", x));
CHECK(x == 789); // The integer still get sets, even though we throw

bt_dict_consumer dc1{"d1:ai123ee🤔"};
REQUIRE_THROWS(dc1.finish());

bt_dict_consumer dc2{"d1:ai123e1:bdee🤔"};
dc2.required("a");
CHECK(dc2.consume_integer<int>() == 123);
REQUIRE_THROWS(dc2.finish());

bt_dict_consumer dc3{"d1:ai123e1:bdee"};
dc3.required("a");
CHECK(dc3.consume_integer<int>() == 123);
REQUIRE_NOTHROW(dc3.finish());
}

#ifdef OXENC_APPLE_TO_CHARS_WORKAROUND
TEST_CASE("apple to_chars workaround test", "[bt][apple][sucks]") {
char buf[20];
Expand Down

0 comments on commit 24fbdb7

Please sign in to comment.