Skip to content

Commit

Permalink
merge bitcoin#20457: Make Parse{Int,UInt}{32,64} use locale independe…
Browse files Browse the repository at this point in the history
…nt std::from_chars(…) (C++17) instead of locale dependent strto{l,ll,ul,ull}
  • Loading branch information
practicalswift authored and kwvg committed Oct 21, 2022
1 parent ed79710 commit b306f19
Show file tree
Hide file tree
Showing 5 changed files with 270 additions and 74 deletions.
135 changes: 135 additions & 0 deletions src/test/fuzz/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,99 @@
#include <version.h>

#include <cstdint>
#include <cstdlib>
#include <string>
#include <vector>

namespace {
bool LegacyParsePrechecks(const std::string& str)
{
if (str.empty()) // No empty string allowed
return false;
if (str.size() >= 1 && (IsSpace(str[0]) || IsSpace(str[str.size() - 1]))) // No padding allowed
return false;
if (!ValidAsCString(str)) // No embedded NUL characters allowed
return false;
return true;
}

bool LegacyParseInt32(const std::string& str, int32_t* out)
{
if (!LegacyParsePrechecks(str))
return false;
char* endp = nullptr;
errno = 0; // strtol will not set errno if valid
long int n = strtol(str.c_str(), &endp, 10);
if (out) *out = (int32_t)n;
// Note that strtol returns a *long int*, so even if strtol doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *int32_t*. On 64-bit
// platforms the size of these types may be different.
return endp && *endp == 0 && !errno &&
n >= std::numeric_limits<int32_t>::min() &&
n <= std::numeric_limits<int32_t>::max();
}

bool LegacyParseInt64(const std::string& str, int64_t* out)
{
if (!LegacyParsePrechecks(str))
return false;
char* endp = nullptr;
errno = 0; // strtoll will not set errno if valid
long long int n = strtoll(str.c_str(), &endp, 10);
if (out) *out = (int64_t)n;
// Note that strtoll returns a *long long int*, so even if strtol doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *int64_t*.
return endp && *endp == 0 && !errno &&
n >= std::numeric_limits<int64_t>::min() &&
n <= std::numeric_limits<int64_t>::max();
}

bool LegacyParseUInt32(const std::string& str, uint32_t* out)
{
if (!LegacyParsePrechecks(str))
return false;
if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoul accepts these by default if they fit in the range
return false;
char* endp = nullptr;
errno = 0; // strtoul will not set errno if valid
unsigned long int n = strtoul(str.c_str(), &endp, 10);
if (out) *out = (uint32_t)n;
// Note that strtoul returns a *unsigned long int*, so even if it doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *uint32_t*. On 64-bit
// platforms the size of these types may be different.
return endp && *endp == 0 && !errno &&
n <= std::numeric_limits<uint32_t>::max();
}

bool LegacyParseUInt8(const std::string& str, uint8_t* out)
{
uint32_t u32;
if (!LegacyParseUInt32(str, &u32) || u32 > std::numeric_limits<uint8_t>::max()) {
return false;
}
if (out != nullptr) {
*out = static_cast<uint8_t>(u32);
}
return true;
}

bool LegacyParseUInt64(const std::string& str, uint64_t* out)
{
if (!LegacyParsePrechecks(str))
return false;
if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoull accepts these by default if they fit in the range
return false;
char* endp = nullptr;
errno = 0; // strtoull will not set errno if valid
unsigned long long int n = strtoull(str.c_str(), &endp, 10);
if (out) *out = (uint64_t)n;
// Note that strtoull returns a *unsigned long long int*, so even if it doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *uint64_t*.
return endp && *endp == 0 && !errno &&
n <= std::numeric_limits<uint64_t>::max();
}
}; // namespace

FUZZ_TARGET(string)
{
FuzzedDataProvider fuzzed_data_provider(buffer.data(), buffer.size());
Expand Down Expand Up @@ -127,4 +217,49 @@ FUZZ_TARGET(string)
const bilingual_str bs2{random_string_2, random_string_1};
(void)(bs1 + bs2);
}
{
int32_t i32;
int64_t i64;
uint32_t u32;
uint64_t u64;
uint8_t u8;
const bool ok_i32 = ParseInt32(random_string_1, &i32);
const bool ok_i64 = ParseInt64(random_string_1, &i64);
const bool ok_u32 = ParseUInt32(random_string_1, &u32);
const bool ok_u64 = ParseUInt64(random_string_1, &u64);
const bool ok_u8 = ParseUInt8(random_string_1, &u8);

int32_t i32_legacy;
int64_t i64_legacy;
uint32_t u32_legacy;
uint64_t u64_legacy;
uint8_t u8_legacy;
const bool ok_i32_legacy = LegacyParseInt32(random_string_1, &i32_legacy);
const bool ok_i64_legacy = LegacyParseInt64(random_string_1, &i64_legacy);
const bool ok_u32_legacy = LegacyParseUInt32(random_string_1, &u32_legacy);
const bool ok_u64_legacy = LegacyParseUInt64(random_string_1, &u64_legacy);
const bool ok_u8_legacy = LegacyParseUInt8(random_string_1, &u8_legacy);

assert(ok_i32 == ok_i32_legacy);
assert(ok_i64 == ok_i64_legacy);
assert(ok_u32 == ok_u32_legacy);
assert(ok_u64 == ok_u64_legacy);
assert(ok_u8 == ok_u8_legacy);

if (ok_i32) {
assert(i32 == i32_legacy);
}
if (ok_i64) {
assert(i64 == i64_legacy);
}
if (ok_u32) {
assert(u32 == u32_legacy);
}
if (ok_u64) {
assert(u64 == u64_legacy);
}
if (ok_u8) {
assert(u8 == u8_legacy);
}
}
}
75 changes: 75 additions & 0 deletions src/test/util_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1292,6 +1292,81 @@ BOOST_AUTO_TEST_CASE(test_ParseInt32)
BOOST_CHECK(!ParseInt32("32482348723847471234", nullptr));
}

BOOST_AUTO_TEST_CASE(test_ToIntegral)
{
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("1234").value(), 1'234);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("0").value(), 0);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("01234").value(), 1'234);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("00000000000000001234").value(), 1'234);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-00000000000000001234").value(), -1'234);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("00000000000000000000").value(), 0);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-00000000000000000000").value(), 0);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-1234").value(), -1'234);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-1").value(), -1);

BOOST_CHECK(!ToIntegral<int32_t>(" 1"));
BOOST_CHECK(!ToIntegral<int32_t>("1 "));
BOOST_CHECK(!ToIntegral<int32_t>("1a"));
BOOST_CHECK(!ToIntegral<int32_t>("1.1"));
BOOST_CHECK(!ToIntegral<int32_t>("1.9"));
BOOST_CHECK(!ToIntegral<int32_t>("+01.9"));
BOOST_CHECK(!ToIntegral<int32_t>(" -1"));
BOOST_CHECK(!ToIntegral<int32_t>("-1 "));
BOOST_CHECK(!ToIntegral<int32_t>(" -1 "));
BOOST_CHECK(!ToIntegral<int32_t>("+1"));
BOOST_CHECK(!ToIntegral<int32_t>(" +1"));
BOOST_CHECK(!ToIntegral<int32_t>(" +1 "));
BOOST_CHECK(!ToIntegral<int32_t>("+-1"));
BOOST_CHECK(!ToIntegral<int32_t>("-+1"));
BOOST_CHECK(!ToIntegral<int32_t>("++1"));
BOOST_CHECK(!ToIntegral<int32_t>("--1"));
BOOST_CHECK(!ToIntegral<int32_t>(""));
BOOST_CHECK(!ToIntegral<int32_t>("aap"));
BOOST_CHECK(!ToIntegral<int32_t>("0x1"));
BOOST_CHECK(!ToIntegral<int32_t>("-32482348723847471234"));
BOOST_CHECK(!ToIntegral<int32_t>("32482348723847471234"));

BOOST_CHECK(!ToIntegral<int64_t>("-9223372036854775809"));
BOOST_CHECK_EQUAL(ToIntegral<int64_t>("-9223372036854775808").value(), -9'223'372'036'854'775'807LL - 1LL);
BOOST_CHECK_EQUAL(ToIntegral<int64_t>("9223372036854775807").value(), 9'223'372'036'854'775'807);
BOOST_CHECK(!ToIntegral<int64_t>("9223372036854775808"));

BOOST_CHECK(!ToIntegral<uint64_t>("-1"));
BOOST_CHECK_EQUAL(ToIntegral<uint64_t>("0").value(), 0U);
BOOST_CHECK_EQUAL(ToIntegral<uint64_t>("18446744073709551615").value(), 18'446'744'073'709'551'615ULL);
BOOST_CHECK(!ToIntegral<uint64_t>("18446744073709551616"));

BOOST_CHECK(!ToIntegral<int32_t>("-2147483649"));
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-2147483648").value(), -2'147'483'648LL);
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("2147483647").value(), 2'147'483'647);
BOOST_CHECK(!ToIntegral<int32_t>("2147483648"));

BOOST_CHECK(!ToIntegral<uint32_t>("-1"));
BOOST_CHECK_EQUAL(ToIntegral<uint32_t>("0").value(), 0U);
BOOST_CHECK_EQUAL(ToIntegral<uint32_t>("4294967295").value(), 4'294'967'295U);
BOOST_CHECK(!ToIntegral<uint32_t>("4294967296"));

BOOST_CHECK(!ToIntegral<int16_t>("-32769"));
BOOST_CHECK_EQUAL(ToIntegral<int16_t>("-32768").value(), -32'768);
BOOST_CHECK_EQUAL(ToIntegral<int16_t>("32767").value(), 32'767);
BOOST_CHECK(!ToIntegral<int16_t>("32768"));

BOOST_CHECK(!ToIntegral<uint16_t>("-1"));
BOOST_CHECK_EQUAL(ToIntegral<uint16_t>("0").value(), 0U);
BOOST_CHECK_EQUAL(ToIntegral<uint16_t>("65535").value(), 65'535U);
BOOST_CHECK(!ToIntegral<uint16_t>("65536"));

BOOST_CHECK(!ToIntegral<int8_t>("-129"));
BOOST_CHECK_EQUAL(ToIntegral<int8_t>("-128").value(), -128);
BOOST_CHECK_EQUAL(ToIntegral<int8_t>("127").value(), 127);
BOOST_CHECK(!ToIntegral<int8_t>("128"));

BOOST_CHECK(!ToIntegral<uint8_t>("-1"));
BOOST_CHECK_EQUAL(ToIntegral<uint8_t>("0").value(), 0U);
BOOST_CHECK_EQUAL(ToIntegral<uint8_t>("255").value(), 255U);
BOOST_CHECK(!ToIntegral<uint8_t>("256"));
}

BOOST_AUTO_TEST_CASE(test_ParseInt64)
{
int64_t n;
Expand Down
110 changes: 38 additions & 72 deletions src/util/strencodings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
#include <algorithm>
#include <cstdlib>
#include <cstring>
#include <errno.h>
#include <limits>
#include <optional>

static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";

Expand Down Expand Up @@ -282,6 +281,32 @@ std::string DecodeBase32(const std::string& str, bool* pf_invalid)
return std::string((const char*)vchRet.data(), vchRet.size());
}

[[nodiscard]] static bool ParsePrechecks(const std::string&);

namespace {
template <typename T>
bool ParseIntegral(const std::string& str, T* out)
{
static_assert(std::is_integral<T>::value);
if (!ParsePrechecks(str)) {
return false;
}
// Replicate the exact behavior of strtol/strtoll/strtoul/strtoull when
// handling leading +/- for backwards compatibility.
if (str.length() >= 2 && str[0] == '+' && str[1] == '-') {
return false;
}
const std::optional<T> opt_int = ToIntegral<T>((!str.empty() && str[0] == '+') ? str.substr(1) : str);
if (!opt_int) {
return false;
}
if (out != nullptr) {
*out = *opt_int;
}
return true;
}
}; // namespace

[[nodiscard]] static bool ParsePrechecks(const std::string& str)
{
if (str.empty()) // No empty string allowed
Expand All @@ -293,95 +318,36 @@ std::string DecodeBase32(const std::string& str, bool* pf_invalid)
return true;
}

bool ParseInt32(const std::string& str, int32_t *out)
bool ParseInt32(const std::string& str, int32_t* out)
{
if (!ParsePrechecks(str))
return false;
char *endp = nullptr;
errno = 0; // strtol will not set errno if valid
long int n = strtol(str.c_str(), &endp, 10);
if(out) *out = (int32_t)n;
// Note that strtol returns a *long int*, so even if strtol doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *int32_t*. On 64-bit
// platforms the size of these types may be different.
return endp && *endp == 0 && !errno &&
n >= std::numeric_limits<int32_t>::min() &&
n <= std::numeric_limits<int32_t>::max();
return ParseIntegral<int32_t>(str, out);
}

bool ParseInt64(const std::string& str, int64_t *out)
bool ParseInt64(const std::string& str, int64_t* out)
{
if (!ParsePrechecks(str))
return false;
char *endp = nullptr;
errno = 0; // strtoll will not set errno if valid
long long int n = strtoll(str.c_str(), &endp, 10);
if(out) *out = (int64_t)n;
// Note that strtoll returns a *long long int*, so even if strtol doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *int64_t*.
return endp && *endp == 0 && !errno &&
n >= std::numeric_limits<int64_t>::min() &&
n <= std::numeric_limits<int64_t>::max();
return ParseIntegral<int64_t>(str, out);
}

bool ParseUInt8(const std::string& str, uint8_t *out)
bool ParseUInt8(const std::string& str, uint8_t* out)
{
uint32_t u32;
if (!ParseUInt32(str, &u32) || u32 > std::numeric_limits<uint8_t>::max()) {
return false;
}
if (out != nullptr) {
*out = static_cast<uint8_t>(u32);
}
return true;
return ParseIntegral<uint8_t>(str, out);
}

bool ParseUInt16(const std::string& str, uint16_t* out)
{
uint32_t u32;
if (!ParseUInt32(str, &u32) || u32 > std::numeric_limits<uint16_t>::max()) {
return false;
}
if (out != nullptr) {
*out = static_cast<uint16_t>(u32);
}
return true;
return ParseIntegral<uint16_t>(str, out);
}

bool ParseUInt32(const std::string& str, uint32_t *out)
bool ParseUInt32(const std::string& str, uint32_t* out)
{
if (!ParsePrechecks(str))
return false;
if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoul accepts these by default if they fit in the range
return false;
char *endp = nullptr;
errno = 0; // strtoul will not set errno if valid
unsigned long int n = strtoul(str.c_str(), &endp, 10);
if(out) *out = (uint32_t)n;
// Note that strtoul returns a *unsigned long int*, so even if it doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *uint32_t*. On 64-bit
// platforms the size of these types may be different.
return endp && *endp == 0 && !errno &&
n <= std::numeric_limits<uint32_t>::max();
return ParseIntegral<uint32_t>(str, out);
}

bool ParseUInt64(const std::string& str, uint64_t *out)
bool ParseUInt64(const std::string& str, uint64_t* out)
{
if (!ParsePrechecks(str))
return false;
if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoull accepts these by default if they fit in the range
return false;
char *endp = nullptr;
errno = 0; // strtoull will not set errno if valid
unsigned long long int n = strtoull(str.c_str(), &endp, 10);
if(out) *out = (uint64_t)n;
// Note that strtoull returns a *unsigned long long int*, so even if it doesn't report an over/underflow
// we still have to check that the returned value is within the range of an *uint64_t*.
return endp && *endp == 0 && !errno &&
n <= std::numeric_limits<uint64_t>::max();
return ParseIntegral<uint64_t>(str, out);
}


bool ParseDouble(const std::string& str, double *out)
{
if (!ParsePrechecks(str))
Expand Down
Loading

0 comments on commit b306f19

Please sign in to comment.