Skip to content

Commit

Permalink
Merge branch 'smoke-me/illegal-escape-gh1103'
Browse files Browse the repository at this point in the history
  • Loading branch information
Reini Urban committed Oct 16, 2014
2 parents 3936b50 + 6f1f672 commit 99b936f
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 41 deletions.
3 changes: 3 additions & 0 deletions ChangeLog
@@ -1,5 +1,8 @@
2014-10-21 release 6.9.0
- Core
+ Changed behavior with illegal escape sequences, GH #1103. Do not silently
skip the \ anymore, throw an EXCEPTION_INVALID_CHARACTER error with
"Illegal escape sequence \o in 'string\o'".
+ Add platform encoding functions for darwin, FreeBSD, OpenBSD and NetBSD,
fixing rakudo problems with UTF-8 locales. [GH #1092]
+ Enable trap op (int3) on x86_64 also
Expand Down
96 changes: 71 additions & 25 deletions src/string/api.c
Expand Up @@ -61,16 +61,29 @@ static const STR_VTABLE * string_rep_compatible(

PARROT_DOES_NOT_RETURN
PARROT_COLD
static void throw_illegal_escape(PARROT_INTERP)
__attribute__nonnull__(1);
static void throw_illegal_escape(PARROT_INTERP, ARGIN(const STRING *s))
__attribute__nonnull__(1)
__attribute__nonnull__(2);

PARROT_DOES_NOT_RETURN
PARROT_COLD
static void throw_illegal_escape_char(PARROT_INTERP,
const char c,
ARGIN(const STRING *s))
__attribute__nonnull__(1)
__attribute__nonnull__(3);

#define ASSERT_ARGS_string_max_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(s))
#define ASSERT_ARGS_string_rep_compatible __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(a) \
, PARROT_ASSERT_ARG(b))
#define ASSERT_ARGS_throw_illegal_escape __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp))
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(s))
#define ASSERT_ARGS_throw_illegal_escape_char __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(s))
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
/* HEADERIZER END: static */

Expand Down Expand Up @@ -2580,9 +2593,12 @@ Parrot_str_escape_truncate(PARROT_INTERP,

/*
=item C<static void throw_illegal_escape(PARROT_INTERP)>
=item C<static void throw_illegal_escape(PARROT_INTERP, const STRING *s)>
=item C<static void throw_illegal_escape_char(PARROT_INTERP, const char c, const
STRING *s)>
Helper function to avoid repeated throw calls.
Helper functions to avoid repeated throw calls.
=cut
Expand All @@ -2591,11 +2607,21 @@ Helper function to avoid repeated throw calls.
PARROT_DOES_NOT_RETURN
PARROT_COLD
static void
throw_illegal_escape(PARROT_INTERP)
throw_illegal_escape(PARROT_INTERP, ARGIN(const STRING *s))
{
ASSERT_ARGS(throw_illegal_escape)
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
"Illegal escape sequence");
"Illegal escape sequence in '%Ss'", s);
}

PARROT_DOES_NOT_RETURN
PARROT_COLD
static void
throw_illegal_escape_char(PARROT_INTERP, const char c, ARGIN(const STRING *s))
{
ASSERT_ARGS(throw_illegal_escape_char)
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_CHARACTER,
"Illegal escape sequence \\%c in '%Ss'", c, s);
}

/*
Expand Down Expand Up @@ -2648,22 +2674,27 @@ Parrot_str_unescape_string(PARROT_INTERP, ARGIN(const STRING *src),
if (itersrc.bytepos >= srclen) break;
c = STRING_iter_get_and_advance(interp, src, &itersrc);
switch (c) {
/* Common one char sequences */
case 'a': next = '\a'; break;
case 'b': next = '\b'; break;
case 't': next = '\t'; break;
case 'n': next = '\n'; break;
case 'v': next = '\v'; break;
case 'f': next = '\f'; break;
case 'r': next = '\r'; break;
case 'e': next = '\x1B'; break;
/* Escape character */
case 'c':
/* Allowed escape sequences */
case 'a': next = '\a'; break; /* \x07 Alarm, beep */
case 'b': next = '\b'; break; /* \x08 Backspace */
case 't': next = '\t'; break; /* \x09 horizontal tab */
case 'n': next = '\n'; break; /* \x0a newline */
case 'v': next = '\v'; break; /* \x0b vertical tab */
case 'f': next = '\f'; break; /* \x0c formfeed */
case 'r': next = '\r'; break; /* \x0d carriage return */
case 'e': next = '\x1B'; break; /* \x1b prefix ansi escape */
/* and previously handled in the default case: */
case '\\': next = c; break; /* \x5c */
case '"': next = c; break; /* \x22 */
case '\'': next = c; break; /* \x27 */
case '?': next = c; break; /* \x3f */
/* Escape character */
case 'c':
if (itersrc.bytepos >= srclen) break;
c = STRING_iter_get_and_advance(interp, src, &itersrc);
/* This assumes ascii-alike encoding */
if (c < 'A' || c > 'Z')
throw_illegal_escape(interp);
throw_illegal_escape(interp, src);
next = c - 'A' + 1;
break;
case 'x':
Expand All @@ -2678,13 +2709,13 @@ Parrot_str_unescape_string(PARROT_INTERP, ARGIN(const STRING *src),
if (c == '}')
break;
if (!isxdigit(c))
throw_illegal_escape(interp);
throw_illegal_escape(interp, src);
if (digcount == 8)
break;
digbuf[digcount++] = c;
}
if (c != '}')
throw_illegal_escape(interp);
throw_illegal_escape(interp, src);
}
else {
/* \xhh 1..2 hex digits */
Expand All @@ -2702,7 +2733,7 @@ Parrot_str_unescape_string(PARROT_INTERP, ARGIN(const STRING *src),
}
}
if (digcount == 0)
throw_illegal_escape(interp);
throw_illegal_escape(interp, src);
digbuf[digcount] = '\0';
next = strtol(digbuf, NULL, 16);
break;
Expand All @@ -2712,7 +2743,7 @@ Parrot_str_unescape_string(PARROT_INTERP, ARGIN(const STRING *src),
if (itersrc.bytepos >= srclen) break;
c = STRING_iter_get_and_advance(interp, src, &itersrc);
if (!isxdigit(c))
throw_illegal_escape(interp);
throw_illegal_escape(interp, src);
digbuf[digcount] = c;
}
digbuf[digcount] = '\0';
Expand All @@ -2724,7 +2755,7 @@ Parrot_str_unescape_string(PARROT_INTERP, ARGIN(const STRING *src),
if (itersrc.bytepos >= srclen) break;
c = STRING_iter_get_and_advance(interp, src, &itersrc);
if (!isxdigit(c))
throw_illegal_escape(interp);
throw_illegal_escape(interp, src);
digbuf[digcount] = c;
}
digbuf[digcount] = '\0';
Expand All @@ -2747,7 +2778,18 @@ Parrot_str_unescape_string(PARROT_INTERP, ARGIN(const STRING *src),
pending = 1;
break;
default:
next = c;
/* Die with Illegal escape sequence since 6.9.0 but allow quoting of
special chars. */
/* The C standard requires such "invalid" escape sequences to be diagnosed
(i.e., the compiler must print an error message). GH #1103 */
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
/* next = c; for a deprecation cycle? */
/* catch inproper use of \i, \O, \o */
throw_illegal_escape_char(interp, c, src);
}
else {
next = c; /* ignore the \ in special chars like \[, \}, ... */
}
}
}
STRING_iter_set_and_advance(interp, result, &iterdest, next);
Expand All @@ -2773,6 +2815,10 @@ Unescapes the specified C string. These sequences are covered:
\Uhhhhhhhh 8 hex digits
\a, \b, \t, \n, \v, \f, \r, \e
These sequences are not escaped: C<\\ \" \' \?>
All other escape sequences within C<[a-zA-Z]> are illegal.
=cut
*/
Expand Down
87 changes: 85 additions & 2 deletions t/compilers/imcc/syn/clash.t
@@ -1,13 +1,13 @@
#!perl
# Copyright (C) 2001-2014, Parrot Foundation.
# Copyright (C) 2001-2009,2014, Parrot Foundation.

use strict;
use warnings;
use lib qw( . lib ../lib ../../lib );

use Test::More;
use Parrot::Config;
use Parrot::Test tests => 13;
use Parrot::Test tests => 17;

pir_output_is( <<'CODE', <<'OUT', "if/unless" );
.sub test :main
Expand Down Expand Up @@ -169,6 +169,7 @@ CODE
ok
OUT

# [GH #335]
pir_error_output_like( <<'CODE', <<'OUTPUT', 'new with unknown sub_label_op, no string constant');
.sub test :main
$P1 = new INTVAL
Expand Down Expand Up @@ -232,6 +233,88 @@ CODE
/Multiple declarations of lexical 'foo'/
OUT

# perl6 has a similar issue but there the next testcase failed. RT #116643
# use single-quotes with .lex!
pir_output_is( <<'CODE', <<'OUT', 'legal quoted .lex names', todo => 'GH #1095');
.sub 'main' :main
.lex 'bar\o', $P0 # ok, parsed as "bar\\o"
$P1 = box 'ok 1'
store_lex 'bar\o', $P1 # ok, parsed as "bar\\o"
$P2 = find_lex 'bar\o'
say $P2
.lex "foo\\o", $P3 # wrong, parsed as "foo\\\\o"
$P1 = box 'ok 2'
store_lex "foo\\o", $P1 # wrong, parsed as "foo\\\\o"
$P2 = find_lex "foo\\o"
say $P2
.end
CODE
ok 1
ok 2
OUT

pir_error_output_like( <<'CODE', <<'OUT', 'illegal quoted .lex names');
.sub 'main' :main
.lex "foo\o", $P4 # ok, parsed as "foo\o" (set_lexical)
$P1 = box 'ok 3'
store_lex "foo\o", $P1 # imcc compressed that to "fooo"
$P2 = find_lex "foo\o" # ditto
say $P2
.end
CODE
/Illegal escape sequence \\o in 'foo\\o'/
OUT

pir_output_is( <<'CODE', <<'OUT', 'legal quote with global names');
.sub 'main' :main
$S0 = 'bar\o'
$P1 = box 'ok 1'
set_global $S0, $P1
$P2 = get_global 'bar\o'
say $P2
$S1 = "foo\\o"
$P1 = box 'ok 2'
set_global "foo\\o", $P1 # ok, parsed as "foo\\o"
$P2 = get_global "foo\\o"
say $P2
.end
CODE
ok 1
ok 2
OUT

pir_error_output_like( <<'CODE', <<'OUT', 'illegal quoted global names');
.sub 'main' :main
$S0 = 'bar\o'
$P1 = box 'ok 1'
set_global $S0, $P1
$P2 = get_global 'bar\o'
say $P2
$S1 = "foo\\o"
$P1 = box 'ok 2'
set_global "foo\\o", $P1 # ok, parsed as "foo\\o"
$P2 = get_global "foo\\o"
say $P2
$S2 = "foo\o"
$P1 = box 'ok 3'
$S3 = "fooo"
$P2 = box 'ok 4'
set_global "foo\o", $P1 # wrong, parsed as "fooo"
set_global "fooo", $P2
$P3 = get_global "foo\o"
say $P3
$P3 = get_global "fooo"
say $P3
.end
CODE
/Illegal escape sequence \\o in 'foo\\o'/
OUT

# Local Variables:
# mode: cperl
# cperl-indent-level: 4
Expand Down
24 changes: 12 additions & 12 deletions t/compilers/imcc/syn/const.t
Expand Up @@ -490,31 +490,31 @@ CODE
OUT
pir_output_is(
<<'CODE', <<'OUT', "PIR heredoc: escaped characters, escaped quotes, starting quotes" );
<<'CODE', <<"OUT", "PIR heredoc: escaped characters, escaped quotes, starting quotes" );
.sub test :main
.local string test
test = <<"TEST"
{ \{ \\{
w \w \\w
w \a \\w
" \" \\"
{ \{ \\{
w \w \\w
w \a \\w
" \" \\"
{ \{ \\{
w \w \\w
w \a \\w
TEST
print test
.end
CODE
{ { \{
w w \w
" " \"
{ { \{
w w \w
" " \"
{ { \{
w w \w
{ { \\{
w \a \\w
" " \\"
{ { \\{
w \a \\w
" " \\"
{ { \\{
w \a \\w
OUT
pir_output_is( <<'CODE', <<'OUT', "heredoc not eol 1" );
Expand Down
4 changes: 2 additions & 2 deletions t/op/basic.t
@@ -1,5 +1,5 @@
#!perl
# Copyright (C) 2001-2007, Parrot Foundation.
# Copyright (C) 2001-2014, Parrot Foundation.

use strict;
use warnings;
Expand Down Expand Up @@ -70,7 +70,7 @@ pasm_output_is( <<'CODE', q(Parrot flies), "print string with embedded hex escap
end
CODE

pasm_output_is( <<'CODE', q(Parrot flies), "escaped non-special" );
pasm_error_output_is( <<'CODE', qq(Illegal escape sequence \\i in 'Parrot fl\\ies'\n), "unknown escaped" );
.pcc_sub :main main:
print "Parrot fl\ies"
end
Expand Down

0 comments on commit 99b936f

Please sign in to comment.