Skip to content

Commit

Permalink
syntax: adjust number literal parsing and string to number conversion
Browse files Browse the repository at this point in the history
 - Recognize new number literal prefixes `0o` and `0O` for octal as well
   as `0b` and `0B` for binary number literals

 - Treat number literals with leading zeros as octal while parsing but
   as decimal ones on implicit number conversions, means `012` will yield
   `10` while `+"012"` or `"012" + 0` will yield `12`

Signed-off-by: Jo-Philipp Wich <jo@mein.io>
  • Loading branch information
jow- committed Jun 1, 2022
1 parent 9efbe18 commit d996047
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 11 deletions.
1 change: 1 addition & 0 deletions include/ucode/vallist.h
Expand Up @@ -34,6 +34,7 @@ typedef enum {
} uc_value_type_t;

uc_value_t *uc_number_parse(const char *buf, char **end);
uc_value_t *uc_number_parse_octal(const char *buf, char **end);

bool uc_double_pack(double d, char *buf, bool little_endian);
double uc_double_unpack(const char *buf, bool little_endian);
Expand Down
34 changes: 31 additions & 3 deletions lexer.c
Expand Up @@ -696,10 +696,38 @@ is_numeric_char(uc_lexer_t *lex, char c)
{
char prev = lex->lookbehindlen ? lex->lookbehind[lex->lookbehindlen-1] : 0;

if ((prev == 'e' || prev == 'E') && (c == '-' || c == '+'))
switch (c|32) {
case '.':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return true;

return prev ? (isxdigit(c) || c == 'x' || c == 'X' || c == '.') : (isdigit(c) || c == '.');
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'o':
case 'x':
/* require previous char, a number literal cannot start with these */
return prev != 0;

case '+':
case '-':
/* sign is only allowed after an exponent char */
return (prev|32) == 'e';
}

return false;
}

static uc_token_t *
Expand All @@ -713,7 +741,7 @@ parse_number(uc_lexer_t *lex)
if (!buf_remaining(lex) || !is_numeric_char(lex, lex->bufstart[0])) {
lookbehind_append(lex, "\0", 1);

nv = uc_number_parse(lex->lookbehind, &e);
nv = uc_number_parse_octal(lex->lookbehind, &e);

switch (ucv_type(nv)) {
case UC_DOUBLE:
Expand Down
4 changes: 4 additions & 0 deletions tests/custom/00_syntax/10_numeric_literals
Expand Up @@ -8,6 +8,8 @@ doubles internally.
-- Expect stdout --
Integers literals: 123, 127, 2748, 57082
Float literals: 10, 10.3, 1.23456e-65, 16.0625
Octal literals: 63, 118
Binary literals: 7, 11
Special values: Infinity, Infinity, NaN, NaN
Minimum values: -9223372036854775808, -1.79769e+308
Maximum values: 9223372036854775807, 1.79769e+308
Expand All @@ -18,6 +20,8 @@ Maximum truncation: 18446744073709551615, Infinity
-- Testcase --
Integers literals: {{ 123 }}, {{ 0177 }}, {{ 0xabc }}, {{ 0xDEFA }}
Float literals: {{ 10. }}, {{ 10.3 }}, {{ 123.456e-67 }}, {{ 0x10.1 }}
Octal literals: {{ 0o77 }}, {{ 0O166 }}
Binary literals: {{ 0b111 }}, {{ 0B1011 }}
Special values: {{ Infinity }}, {{ 1 / 0 }}, {{ NaN }}, {{ "x" / 1 }}
Minimum values: {{ -9223372036854775808 }}, {{ -1.7976931348623158e+308 }}
Maximum values: {{ 9223372036854775807 }}, {{ 1.7976931348623158e+308 }}
Expand Down
17 changes: 13 additions & 4 deletions tests/custom/03_stdlib/08_int
@@ -1,8 +1,9 @@
The `int()` function converts the given value into a signed integer
value and returns the resulting number.
value and returns the resulting number. In case the value is of type
string, a second optional base argument may be specified which is
passed to the underlying strtoll(3) implementation.

Returns `NaN` if the given argument is not convertible into a number.

Returns `NaN` if the conversion result is out of range.

-- Testcase --
Expand All @@ -19,7 +20,11 @@ Returns `NaN` if the conversion result is out of range.
int("0xffffffffffffffff"),
int("0177"),
int("+145"),
int("-96")
int("-96"),
int("0177", 8),
int("0x1000", 16),
int("1111", 2),
int("0xffffffffffffffff", 16)
]);
%}
-- End --
Expand All @@ -37,6 +42,10 @@ Returns `NaN` if the conversion result is out of range.
0,
177,
145,
-96
-96,
127,
4096,
15,
"NaN"
]
-- End --
53 changes: 49 additions & 4 deletions vallist.c
Expand Up @@ -45,12 +45,13 @@
#define UC_VALLIST_CHUNK_SIZE 8


uc_value_t *
uc_number_parse(const char *buf, char **end)
static uc_value_t *
uc_number_parse_common(const char *buf, bool octal, char **end)
{
unsigned long long u;
const char *p = buf;
bool neg = false;
int base = 10;
double d;
char *e;

Expand All @@ -61,16 +62,48 @@ uc_number_parse(const char *buf, char **end)
neg = true;
p++;
}
else if (*p == '+') {
p++;
}

if (*p != 0 && !isxdigit(*p))
return NULL;

if (!end)
end = &e;

u = strtoull(p, end, 0);
if (p[0] == '0') {
switch (p[1]|32) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
base = octal ? 8 : 10;
break;

case 'x':
base = 16;
break;

case 'b':
base = 2;
p += 2;
break;

case 'o':
base = 8;
p += 2;
break;
}
}

if (**end == '.' || **end == 'e' || **end == 'E') {
u = strtoull(p, end, base);

if (base >= 10 && (**end == '.' || (**end|32) == 'e')) {
d = strtod(p, end);

if (!isspace(**end) && **end != 0)
Expand All @@ -95,6 +128,18 @@ uc_number_parse(const char *buf, char **end)
return ucv_uint64_new(u);
}

uc_value_t *
uc_number_parse(const char *buf, char **end)
{
return uc_number_parse_common(buf, false, end);
}

uc_value_t *
uc_number_parse_octal(const char *buf, char **end)
{
return uc_number_parse_common(buf, true, end);
}

bool
uc_double_pack(double d, char *buf, bool little_endian)
{
Expand Down

0 comments on commit d996047

Please sign in to comment.