Skip to content

Commit

Permalink
🔨 more work on the number parser
Browse files Browse the repository at this point in the history
  • Loading branch information
nlohmann committed Feb 13, 2017
1 parent b84705d commit 265c5b5
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 41 deletions.
58 changes: 38 additions & 20 deletions src/json.hpp
Expand Up @@ -10957,6 +10957,8 @@ class basic_json
const char* const m_start = nullptr;
const char* const m_end = nullptr;

// floating-point conversion

// overloaded wrappers for strtod/strtof/strtold
// that will be called from parse<floating_point_t>
static void strtof(float& f, const char* str, char** endptr)
Expand Down Expand Up @@ -10984,6 +10986,9 @@ class basic_json
std::array<char, 64> buf;
const size_t len = static_cast<size_t>(m_end - m_start);

// lexer will reject empty numbers
assert(len > 0);

// since dealing with strtod family of functions, we're
// getting the decimal point char from the C locale facilities
// instead of C++'s numpunct facet of the current std::locale
Expand Down Expand Up @@ -11023,10 +11028,9 @@ class basic_json
// this calls appropriate overload depending on T
strtof(value, data, &endptr);

// note that reading past the end is OK, the data may be, for
// example, "123.", where the parsed token only contains
// "123", but strtod will read the dot as well.
const bool ok = (endptr >= (data + len)) and (len > 0);
// parsing was successful iff strtof parsed exactly the number
// of characters determined by the lexer (len)
const bool ok = (endptr == (data + len));

if (ok and (value == 0.0) and (*data == '-'))
{
Expand All @@ -11037,6 +11041,8 @@ class basic_json
return ok;
}

// integral conversion

signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const
{
return std::strtoll(m_start, endptr, 10);
Expand Down Expand Up @@ -11087,7 +11093,7 @@ class basic_json
@param[out] result @ref basic_json object to receive the number.
@param[in] token the type of the number token
*/
void get_number(basic_json& result, const token_type token) const
bool get_number(basic_json& result, const token_type token) const
{
assert(m_start != nullptr);
assert(m_start < m_cursor);
Expand All @@ -11105,9 +11111,10 @@ class basic_json
number_unsigned_t val;
if (num_converter.to(val))
{
// parsing successful
result.m_type = value_t::number_unsigned;
result.m_value = val;
return;
return true;
}
break;
}
Expand All @@ -11117,9 +11124,10 @@ class basic_json
number_integer_t val;
if (num_converter.to(val))
{
// parsing successful
result.m_type = value_t::number_integer;
result.m_value = val;
return;
return true;
}
break;
}
Expand All @@ -11133,22 +11141,24 @@ class basic_json
// parse float (either explicitly or because a previous conversion
// failed)
number_float_t val;
if (not num_converter.to(val))
if (num_converter.to(val))
{
// couldn't parse as float_t
result.m_type = value_t::discarded;
return;
}
// parsing successful
result.m_type = value_t::number_float;
result.m_value = val;

result.m_type = value_t::number_float;
result.m_value = val;
// replace infinity and NAN by null
if (not std::isfinite(result.m_value.number_float))
{
result.m_type = value_t::null;
result.m_value = basic_json::json_value();
}

// replace infinity and NAN by null
if (not std::isfinite(result.m_value.number_float))
{
result.m_type = value_t::null;
result.m_value = basic_json::json_value();
return true;
}

// couldn't parse number in any format
return false;
}

private:
Expand Down Expand Up @@ -11396,8 +11406,16 @@ class basic_json
case lexer::token_type::value_integer:
case lexer::token_type::value_float:
{
m_lexer.get_number(result, last_token);
const bool ok = m_lexer.get_number(result, last_token);
get_token();

// if number conversion was unsuccessful, then is is
// because the number was directly followed by an
// unexpected character (e.g. "01" where "1" is unexpected)
if (not ok)
{
unexpect(last_token);
}
break;
}

Expand Down
58 changes: 38 additions & 20 deletions src/json.hpp.re2c
Expand Up @@ -10029,6 +10029,8 @@ class basic_json
const char* const m_start = nullptr;
const char* const m_end = nullptr;

// floating-point conversion

// overloaded wrappers for strtod/strtof/strtold
// that will be called from parse<floating_point_t>
static void strtof(float& f, const char* str, char** endptr)
Expand Down Expand Up @@ -10056,6 +10058,9 @@ class basic_json
std::array<char, 64> buf;
const size_t len = static_cast<size_t>(m_end - m_start);

// lexer will reject empty numbers
assert(len > 0);

// since dealing with strtod family of functions, we're
// getting the decimal point char from the C locale facilities
// instead of C++'s numpunct facet of the current std::locale
Expand Down Expand Up @@ -10095,10 +10100,9 @@ class basic_json
// this calls appropriate overload depending on T
strtof(value, data, &endptr);

// note that reading past the end is OK, the data may be, for
// example, "123.", where the parsed token only contains
// "123", but strtod will read the dot as well.
const bool ok = (endptr >= (data + len)) and (len > 0);
// parsing was successful iff strtof parsed exactly the number
// of characters determined by the lexer (len)
const bool ok = (endptr == (data + len));

if (ok and (value == 0.0) and (*data == '-'))
{
Expand All @@ -10109,6 +10113,8 @@ class basic_json
return ok;
}

// integral conversion

signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const
{
return std::strtoll(m_start, endptr, 10);
Expand Down Expand Up @@ -10159,7 +10165,7 @@ class basic_json
@param[out] result @ref basic_json object to receive the number.
@param[in] token the type of the number token
*/
void get_number(basic_json& result, const token_type token) const
bool get_number(basic_json& result, const token_type token) const
{
assert(m_start != nullptr);
assert(m_start < m_cursor);
Expand All @@ -10177,9 +10183,10 @@ class basic_json
number_unsigned_t val;
if (num_converter.to(val))
{
// parsing successful
result.m_type = value_t::number_unsigned;
result.m_value = val;
return;
return true;
}
break;
}
Expand All @@ -10189,9 +10196,10 @@ class basic_json
number_integer_t val;
if (num_converter.to(val))
{
// parsing successful
result.m_type = value_t::number_integer;
result.m_value = val;
return;
return true;
}
break;
}
Expand All @@ -10205,22 +10213,24 @@ class basic_json
// parse float (either explicitly or because a previous conversion
// failed)
number_float_t val;
if (not num_converter.to(val))
if (num_converter.to(val))
{
// couldn't parse as float_t
result.m_type = value_t::discarded;
return;
}
// parsing successful
result.m_type = value_t::number_float;
result.m_value = val;

result.m_type = value_t::number_float;
result.m_value = val;
// replace infinity and NAN by null
if (not std::isfinite(result.m_value.number_float))
{
result.m_type = value_t::null;
result.m_value = basic_json::json_value();
}

// replace infinity and NAN by null
if (not std::isfinite(result.m_value.number_float))
{
result.m_type = value_t::null;
result.m_value = basic_json::json_value();
return true;
}

// couldn't parse number in any format
return false;
}

private:
Expand Down Expand Up @@ -10468,8 +10478,16 @@ class basic_json
case lexer::token_type::value_integer:
case lexer::token_type::value_float:
{
m_lexer.get_number(result, last_token);
const bool ok = m_lexer.get_number(result, last_token);
get_token();

// if number conversion was unsuccessful, then is is
// because the number was directly followed by an
// unexpected character (e.g. "01" where "1" is unexpected)
if (not ok)
{
unexpect(last_token);
}
break;
}

Expand Down
7 changes: 6 additions & 1 deletion test/src/unit-class_parser.cpp
Expand Up @@ -270,6 +270,11 @@ TEST_CASE("parser class")
}
}

SECTION("overflow")
{
CHECK(json::parser("1.18973e+4932").parse() == json());
}

SECTION("invalid numbers")
{
CHECK_THROWS_AS(json::parser("01").parse(), std::invalid_argument);
Expand All @@ -294,7 +299,7 @@ TEST_CASE("parser class")
CHECK_THROWS_AS(json::parser("+0").parse(), std::invalid_argument);

CHECK_THROWS_WITH(json::parser("01").parse(),
"parse error - unexpected number literal; expected end of input");
"parse error - unexpected number literal");
CHECK_THROWS_WITH(json::parser("--1").parse(), "parse error - unexpected '-'");
CHECK_THROWS_WITH(json::parser("1.").parse(),
"parse error - unexpected '.'; expected end of input");
Expand Down

0 comments on commit 265c5b5

Please sign in to comment.