Skip to content

Commit

Permalink
[imcc] add mk_string, unify string quote and unescape handling
Browse files Browse the repository at this point in the history
Also for mk_sub_address_fromc, _mk_adress, do_loadlib, .lex, mk_const_ident.
This broke the string GC somewhow.
  • Loading branch information
Reini Urban committed Oct 15, 2014
1 parent 72d101a commit 513ae44
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 67 deletions.
58 changes: 20 additions & 38 deletions compilers/imcc/imcc.y
Expand Up @@ -469,12 +469,14 @@ mk_pmc_const_named(ARGMOD(imc_info_t *imcc), ARGMOD(IMC_Unit *unit),
ASSERT_ARGS(mk_pmc_const_named)
SymReg *rhs;
SymReg *r[3];
const int ascii = (*constant == '\'' || *constant == '"');
char *unquoted_name = mem_sys_strdup(name + 1);
size_t name_length;
//const int ascii = (*constant == '\'' || *constant == '"');
//char *unquoted_name = mem_sys_strdup(name + 1);
//size_t name_length;
int t = 'S';
char *unquoted_name = mk_string(imcc, name, &t);

name_length = strlen(unquoted_name) - 1;
unquoted_name[name_length] = 0;
//name_length = strlen(unquoted_name) - 1;
//unquoted_name[name_length] = 0;

if (left->type == VTADDRESS) { /* IDENTIFIER */
if (imcc->state->pasm_file) {
Expand All @@ -490,13 +492,13 @@ mk_pmc_const_named(ARGMOD(imc_info_t *imcc), ARGMOD(IMC_Unit *unit),
if ((strncmp(unquoted_name, "Sub", name_length) == 0)
|| (strncmp(unquoted_name, "Coroutine", name_length) == 0)) {
rhs = mk_const(imcc, constant, 'p');
if (!ascii)
if (t == 'U')
rhs->type |= VT_ENCODED;
rhs->usage |= U_FIXUP | U_SUBID_LOOKUP;
}
else if (strncmp(unquoted_name, "LexInfo", name_length) == 0) {
rhs = mk_const(imcc, constant, 'l');
if (!ascii)
if (t == 'U')
rhs->type |= VT_ENCODED;
rhs->usage |= U_FIXUP | U_LEXINFO_LOOKUP;
}
Expand Down Expand Up @@ -749,23 +751,11 @@ mk_sub_address_fromc(ARGMOD(imc_info_t *imcc), ARGIN(const char *name))
ASSERT_ARGS(mk_sub_address_fromc)
/* name is a quoted sub name */
SymReg *r;
char *name_copy;
int t = 'S';
char *unquoted_name = mk_string(imcc, name, &t);

/* interpolate only if the first character is a double-quote */
if (*name == '"') {
STRING *unescaped = Parrot_str_unescape(imcc->interp, name+1, '"', NULL);
name_copy = Parrot_str_to_cstring(imcc->interp, unescaped);
}
else if (*name == '\'') {
name_copy = mem_sys_strdup(name+1);
name_copy[strlen(name_copy) - 1] = 0;
}
else {
name_copy = mem_sys_strdup(name);
}

r = mk_sub_address(imcc, name_copy);
mem_sys_free(name_copy);
r = mk_sub_address(imcc, unquoted_name);
mem_sys_free(unquoted_name);

return r;
}
Expand Down Expand Up @@ -1017,7 +1007,8 @@ static void
do_loadlib(ARGMOD(imc_info_t *imcc), ARGIN(const char *lib))
{
ASSERT_ARGS(do_loadlib)
STRING * const s = Parrot_str_unescape(imcc->interp, lib + 1, '"', NULL);
STRING * const s = Parrot_str_new_init(imcc->interp, lib, 0,
Parrot_platform_encoding_ptr, 0);
PMC * const lib_pmc = Parrot_dyn_load_lib(imcc->interp, s, NULL);
if (PMC_IS_NULL(lib_pmc) || !VTABLE_get_bool(imcc->interp, lib_pmc)) {
IMCC_fataly(imcc, EXCEPTION_LIBRARY_ERROR,
Expand Down Expand Up @@ -1161,8 +1152,9 @@ pragma:
hll_def '\n' { $$ = 0; }
| LOADLIB STRINGC '\n'
{
int t = 'S';
$$ = 0;
do_loadlib(imcc, $2);
do_loadlib(imcc, mk_string(imcc, $2, &t));
mem_sys_free($2);
}
;
Expand All @@ -1181,7 +1173,7 @@ annotate_directive:
hll_def:

HLL STRINGC
{
{ /* XXX no '' or bare. first mk_const it, and then get the name from that */
STRING * const hll_name = Parrot_str_unescape(imcc->interp, $2 + 1, '"', NULL);
Parrot_pcc_set_HLL(imcc->interp, CURRENT_CONTEXT(imcc->interp),
Parrot_hll_register_HLL(imcc->interp, hll_name));
Expand Down Expand Up @@ -1254,20 +1246,10 @@ pasm_inst: { clear_state(imcc); }
}
| LEXICAL STRINGC COMMA REG
{
char *name;
SymReg *r = mk_pasm_reg(imcc, $4);
SymReg *n;
if (*$2 == '"') { /* interpolate name with double-quote */
STRING *unescaped = Parrot_str_unescape(imcc->interp, name+1, '"', NULL);
name = Parrot_str_to_cstring(imcc->interp, unescaped);
} else { /* only if we insist on keeping ' or " around (there's no need) */
name = mem_sys_strdup($2 + 1);
name[strlen(name) - 1] = 0;
}
n = mk_const(imcc, name, 'S');
SymReg *r = mk_pasm_reg(imcc, $4);
SymReg *n = mk_const(imcc, $2, 'S');
set_lexical(imcc, r, n);
$$ = 0;
mem_sys_free(name);
mem_sys_free($2);
mem_sys_free($4);
}
Expand Down
94 changes: 65 additions & 29 deletions compilers/imcc/symreg.c
Expand Up @@ -725,12 +725,12 @@ mk_const_ident(ARGMOD(imc_info_t * imcc), ARGIN(const char *name), int t,
IMCC_fataly(imcc, EXCEPTION_SYNTAX_ERROR,
"global PMC constant not allowed");

r = _mk_symreg(imcc, &imcc->ghash, name, t);
r = _mk_symreg(imcc, &imcc->ghash, mk_string(imcc, name, &t), t);

r->type = VT_CONSTP;
}
else {
r = mk_ident(imcc, name, t, VT_CONSTP);
r = mk_ident(imcc, mk_string(imcc, name, &t), t, VT_CONSTP);

if (t == 'P')
return mk_pmc_const_2(imcc, imcc->cur_unit, r, val);
Expand Down Expand Up @@ -833,58 +833,87 @@ int_overflows(ARGIN(const SymReg *r))

/*
=item C<SymReg * mk_const(imc_info_t * imcc, const char *name, int t)>
=item C<char * mk_string(imc_info_t * imcc, const char *name, int *t)>
Makes a new constant and populates the cache of global symbols.
Creates a fresh char *.
Strips surrounding string quotes and unescapes double-quoted strings.
Must be used in the parser.
=cut
*/

PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
SymReg *
mk_const(ARGMOD(imc_info_t * imcc), ARGIN(const char *name), int t)
char *
mk_string(ARGMOD(imc_info_t * imcc), ARGIN(const char *name), ARGMOD(int *t))
{
ASSERT_ARGS(mk_const)
SymHash * const h = &imcc->ghash;
int encoded = 0;
SymReg * result;
char *const_name = (char *)name;

if (!h->data)
create_symhash(imcc, h);
ASSERT_ARGS(mk_string)
char *new_name;

if (t != 'U') {
if (*t != 'U') {
if (*name == '"') {
STRING *unescaped = Parrot_str_unescape(imcc->interp, name+1, '"', NULL);
new_name = unescaped->strstart;
/* but we need to keep escaped \0. represent it encoded */
if (memchr(unescaped->strstart, 0, unescaped->bufused)) {
if (memchr(new_name, 0, unescaped->bufused)) {
int len = strlen(name);
const_name = (char*)mem_internal_allocate(len + 8 + 1); /* 8 = strlen("fixed_8:") */
strcpy(const_name, "fixed_8:");
strcat(const_name, name);
const_name[len + 8 + 1] = 0;
if (t == 'S') t = 'U';
new_name = (char*)mem_internal_allocate(len + 8 + 1);
strcpy(new_name, "fixed_8:");
strcat(new_name, name);
new_name[len + 8 + 1] = 0;
if (*t == 'S') *t = 'U';
}
}
else if (*name == '\'') {
const_name = mem_sys_strdup(name + 1);
const_name[strlen(const_name) - 1] = 0;
new_name = mem_sys_strdup(name + 1);
new_name[strlen(new_name) - 1] = 0;
}
else {
new_name = mem_sys_strdup(name);
}
}
else {
new_name = mem_sys_strdup(name);
}
/* TODO: resolve encoding aliases here with U, not in string_from_reg.
registers should store encoded strings more efficiently: GH #1097 */

IMCC_debug(imcc, DEBUG_MKCONST, "# mk_string '%s' %c\n",
new_name, *t);
return new_name;
}

/*
=item C<SymReg * mk_const(imc_info_t * imcc, const char *name, int t)>
Makes a new constant and populates the cache of global symbols.
Strips surrounding string quotes and unescapes double-quoted strings.
Must be used in the parser.
=cut
*/

PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
SymReg *
mk_const(ARGMOD(imc_info_t * imcc), ARGIN(const char *name), int t)
{
ASSERT_ARGS(mk_const)
SymHash * const h = &imcc->ghash;
int encoded = 0;
SymReg * result;
const char *const_name = mk_string(imcc, name, &t);

if (!h->data)
create_symhash(imcc, h);
IMCC_debug(imcc, DEBUG_MKCONST, "# mk_const '%s' %c\n",
const_name, t);
result = _mk_const(imcc, h, const_name, t);
return result;
}


/*
=item C<static char * add_ns(imc_info_t * imcc, const char *name)>
Expand Down Expand Up @@ -952,17 +981,19 @@ _mk_address(ARGMOD(imc_info_t * imcc), ARGMOD(SymHash *hsh),
{
ASSERT_ARGS(_mk_address)
SymReg *r;
int t = 'S';
char *unquoted_name = mk_string(imcc, name, &t);

if (uniq == U_add_all) {
int is_lexical = 0;
r = get_sym_by_name(&imcc->ghash, name);
r = get_sym_by_name(&imcc->ghash, unquoted_name);

if (r && r->usage & U_LEXICAL)
is_lexical = 1;

r = mem_gc_allocate_zeroed_typed(imcc->interp, SymReg);
r->type = VTADDRESS;
r->name = mem_sys_strdup(name);
r->name = unquoted_name;
_store_symreg(imcc, hsh, r);

if (is_lexical)
Expand All @@ -973,8 +1004,8 @@ _mk_address(ARGMOD(imc_info_t * imcc), ARGMOD(SymHash *hsh),
char *aux_name = NULL;
const char * const sub_name = (uniq == U_add_uniq_sub)
/* remember to free this name; add_ns malloc()s it */
? (aux_name = add_ns(imcc, name))
: name;
? (aux_name = add_ns(imcc, unquoted_name))
: unquoted_name;

r = _get_sym(hsh, sub_name);

Expand All @@ -984,6 +1015,7 @@ _mk_address(ARGMOD(imc_info_t * imcc), ARGMOD(SymHash *hsh),
IMCC_fataly(imcc, EXCEPTION_SYNTAX_ERROR,
"Label '%s' already defined\n", sub_name);
else if (uniq == U_add_uniq_sub) {
mem_sys_free(unquoted_name);
mem_sys_free(aux_name);
IMCC_fataly(imcc, EXCEPTION_SYNTAX_ERROR,
"Subroutine '%s' already defined\n", name);
Expand All @@ -997,7 +1029,11 @@ _mk_address(ARGMOD(imc_info_t * imcc), ARGMOD(SymHash *hsh),
r->lhs_use_count++;
if (uniq == U_add_uniq_sub)
mem_sys_free(aux_name);
else
mem_sys_free(unquoted_name);
}
else
mem_sys_free(unquoted_name);
}

return r;
Expand Down
16 changes: 16 additions & 0 deletions compilers/imcc/symreg.h
Expand Up @@ -366,6 +366,18 @@ SymReg * mk_pcc_sub(
__attribute__nonnull__(2)
FUNC_MODIFIES(* imcc);

PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
char * mk_string(
ARGMOD(imc_info_t * imcc),
ARGIN(const char *name),
ARGMOD(int *t))
__attribute__nonnull__(1)
__attribute__nonnull__(2)
__attribute__nonnull__(3)
FUNC_MODIFIES(* imcc)
FUNC_MODIFIES(*t);

PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
SymReg * mk_sub_address(ARGMOD(imc_info_t * imcc), ARGIN(const char *name))
Expand Down Expand Up @@ -498,6 +510,10 @@ char * symreg_to_str(ARGIN(const SymReg *s))
#define ASSERT_ARGS_mk_pcc_sub __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(imcc) \
, PARROT_ASSERT_ARG(name))
#define ASSERT_ARGS_mk_string __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(imcc) \
, PARROT_ASSERT_ARG(name) \
, PARROT_ASSERT_ARG(t))
#define ASSERT_ARGS_mk_sub_address __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(imcc) \
, PARROT_ASSERT_ARG(name))
Expand Down

0 comments on commit 513ae44

Please sign in to comment.