From 513ae44fba59c56ea211c966ac40b26dc95ee969 Mon Sep 17 00:00:00 2001 From: Reini Urban Date: Sat, 11 Oct 2014 10:15:33 +0200 Subject: [PATCH] [imcc] add mk_string, unify string quote and unescape handling Also for mk_sub_address_fromc, _mk_adress, do_loadlib, .lex, mk_const_ident. This broke the string GC somewhow. --- compilers/imcc/imcc.y | 58 +++++++++---------------- compilers/imcc/symreg.c | 94 ++++++++++++++++++++++++++++------------- compilers/imcc/symreg.h | 16 +++++++ 3 files changed, 101 insertions(+), 67 deletions(-) diff --git a/compilers/imcc/imcc.y b/compilers/imcc/imcc.y index 2cc893903f..9c578dc0b1 100644 --- a/compilers/imcc/imcc.y +++ b/compilers/imcc/imcc.y @@ -469,12 +469,14 @@ mk_pmc_const_named(ARGMOD(imc_info_t *imcc), ARGMOD(IMC_Unit *unit), ASSERT_ARGS(mk_pmc_const_named) SymReg *rhs; SymReg *r[3]; - const int ascii = (*constant == '\'' || *constant == '"'); - char *unquoted_name = mem_sys_strdup(name + 1); - size_t name_length; + //const int ascii = (*constant == '\'' || *constant == '"'); + //char *unquoted_name = mem_sys_strdup(name + 1); + //size_t name_length; + int t = 'S'; + char *unquoted_name = mk_string(imcc, name, &t); - name_length = strlen(unquoted_name) - 1; - unquoted_name[name_length] = 0; + //name_length = strlen(unquoted_name) - 1; + //unquoted_name[name_length] = 0; if (left->type == VTADDRESS) { /* IDENTIFIER */ if (imcc->state->pasm_file) { @@ -490,13 +492,13 @@ mk_pmc_const_named(ARGMOD(imc_info_t *imcc), ARGMOD(IMC_Unit *unit), if ((strncmp(unquoted_name, "Sub", name_length) == 0) || (strncmp(unquoted_name, "Coroutine", name_length) == 0)) { rhs = mk_const(imcc, constant, 'p'); - if (!ascii) + if (t == 'U') rhs->type |= VT_ENCODED; rhs->usage |= U_FIXUP | U_SUBID_LOOKUP; } else if (strncmp(unquoted_name, "LexInfo", name_length) == 0) { rhs = mk_const(imcc, constant, 'l'); - if (!ascii) + if (t == 'U') rhs->type |= VT_ENCODED; rhs->usage |= U_FIXUP | U_LEXINFO_LOOKUP; } @@ -749,23 +751,11 @@ mk_sub_address_fromc(ARGMOD(imc_info_t *imcc), ARGIN(const char *name)) ASSERT_ARGS(mk_sub_address_fromc) /* name is a quoted sub name */ SymReg *r; - char *name_copy; + int t = 'S'; + char *unquoted_name = mk_string(imcc, name, &t); - /* interpolate only if the first character is a double-quote */ - if (*name == '"') { - STRING *unescaped = Parrot_str_unescape(imcc->interp, name+1, '"', NULL); - name_copy = Parrot_str_to_cstring(imcc->interp, unescaped); - } - else if (*name == '\'') { - name_copy = mem_sys_strdup(name+1); - name_copy[strlen(name_copy) - 1] = 0; - } - else { - name_copy = mem_sys_strdup(name); - } - - r = mk_sub_address(imcc, name_copy); - mem_sys_free(name_copy); + r = mk_sub_address(imcc, unquoted_name); + mem_sys_free(unquoted_name); return r; } @@ -1017,7 +1007,8 @@ static void do_loadlib(ARGMOD(imc_info_t *imcc), ARGIN(const char *lib)) { ASSERT_ARGS(do_loadlib) - STRING * const s = Parrot_str_unescape(imcc->interp, lib + 1, '"', NULL); + STRING * const s = Parrot_str_new_init(imcc->interp, lib, 0, + Parrot_platform_encoding_ptr, 0); PMC * const lib_pmc = Parrot_dyn_load_lib(imcc->interp, s, NULL); if (PMC_IS_NULL(lib_pmc) || !VTABLE_get_bool(imcc->interp, lib_pmc)) { IMCC_fataly(imcc, EXCEPTION_LIBRARY_ERROR, @@ -1161,8 +1152,9 @@ pragma: hll_def '\n' { $$ = 0; } | LOADLIB STRINGC '\n' { + int t = 'S'; $$ = 0; - do_loadlib(imcc, $2); + do_loadlib(imcc, mk_string(imcc, $2, &t)); mem_sys_free($2); } ; @@ -1181,7 +1173,7 @@ annotate_directive: hll_def: HLL STRINGC - { + { /* XXX no '' or bare. first mk_const it, and then get the name from that */ STRING * const hll_name = Parrot_str_unescape(imcc->interp, $2 + 1, '"', NULL); Parrot_pcc_set_HLL(imcc->interp, CURRENT_CONTEXT(imcc->interp), Parrot_hll_register_HLL(imcc->interp, hll_name)); @@ -1254,20 +1246,10 @@ pasm_inst: { clear_state(imcc); } } | LEXICAL STRINGC COMMA REG { - char *name; - SymReg *r = mk_pasm_reg(imcc, $4); - SymReg *n; - if (*$2 == '"') { /* interpolate name with double-quote */ - STRING *unescaped = Parrot_str_unescape(imcc->interp, name+1, '"', NULL); - name = Parrot_str_to_cstring(imcc->interp, unescaped); - } else { /* only if we insist on keeping ' or " around (there's no need) */ - name = mem_sys_strdup($2 + 1); - name[strlen(name) - 1] = 0; - } - n = mk_const(imcc, name, 'S'); + SymReg *r = mk_pasm_reg(imcc, $4); + SymReg *n = mk_const(imcc, $2, 'S'); set_lexical(imcc, r, n); $$ = 0; - mem_sys_free(name); mem_sys_free($2); mem_sys_free($4); } diff --git a/compilers/imcc/symreg.c b/compilers/imcc/symreg.c index 52fe02dbd7..d1f6a50bf0 100644 --- a/compilers/imcc/symreg.c +++ b/compilers/imcc/symreg.c @@ -725,12 +725,12 @@ mk_const_ident(ARGMOD(imc_info_t * imcc), ARGIN(const char *name), int t, IMCC_fataly(imcc, EXCEPTION_SYNTAX_ERROR, "global PMC constant not allowed"); - r = _mk_symreg(imcc, &imcc->ghash, name, t); + r = _mk_symreg(imcc, &imcc->ghash, mk_string(imcc, name, &t), t); r->type = VT_CONSTP; } else { - r = mk_ident(imcc, name, t, VT_CONSTP); + r = mk_ident(imcc, mk_string(imcc, name, &t), t, VT_CONSTP); if (t == 'P') return mk_pmc_const_2(imcc, imcc->cur_unit, r, val); @@ -833,11 +833,10 @@ int_overflows(ARGIN(const SymReg *r)) /* -=item C +=item C -Makes a new constant and populates the cache of global symbols. +Creates a fresh char *. Strips surrounding string quotes and unescapes double-quoted strings. -Must be used in the parser. =cut @@ -845,46 +844,76 @@ Must be used in the parser. PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL -SymReg * -mk_const(ARGMOD(imc_info_t * imcc), ARGIN(const char *name), int t) +char * +mk_string(ARGMOD(imc_info_t * imcc), ARGIN(const char *name), ARGMOD(int *t)) { - ASSERT_ARGS(mk_const) - SymHash * const h = &imcc->ghash; - int encoded = 0; - SymReg * result; - char *const_name = (char *)name; - - if (!h->data) - create_symhash(imcc, h); + ASSERT_ARGS(mk_string) + char *new_name; - if (t != 'U') { + if (*t != 'U') { if (*name == '"') { STRING *unescaped = Parrot_str_unescape(imcc->interp, name+1, '"', NULL); + new_name = unescaped->strstart; /* but we need to keep escaped \0. represent it encoded */ - if (memchr(unescaped->strstart, 0, unescaped->bufused)) { + if (memchr(new_name, 0, unescaped->bufused)) { int len = strlen(name); - const_name = (char*)mem_internal_allocate(len + 8 + 1); /* 8 = strlen("fixed_8:") */ - strcpy(const_name, "fixed_8:"); - strcat(const_name, name); - const_name[len + 8 + 1] = 0; - if (t == 'S') t = 'U'; + new_name = (char*)mem_internal_allocate(len + 8 + 1); + strcpy(new_name, "fixed_8:"); + strcat(new_name, name); + new_name[len + 8 + 1] = 0; + if (*t == 'S') *t = 'U'; } } else if (*name == '\'') { - const_name = mem_sys_strdup(name + 1); - const_name[strlen(const_name) - 1] = 0; + new_name = mem_sys_strdup(name + 1); + new_name[strlen(new_name) - 1] = 0; + } + else { + new_name = mem_sys_strdup(name); } } + else { + new_name = mem_sys_strdup(name); + } /* TODO: resolve encoding aliases here with U, not in string_from_reg. registers should store encoded strings more efficiently: GH #1097 */ + IMCC_debug(imcc, DEBUG_MKCONST, "# mk_string '%s' %c\n", + new_name, *t); + return new_name; +} + +/* + +=item C + +Makes a new constant and populates the cache of global symbols. +Strips surrounding string quotes and unescapes double-quoted strings. +Must be used in the parser. + +=cut + +*/ + +PARROT_WARN_UNUSED_RESULT +PARROT_CANNOT_RETURN_NULL +SymReg * +mk_const(ARGMOD(imc_info_t * imcc), ARGIN(const char *name), int t) +{ + ASSERT_ARGS(mk_const) + SymHash * const h = &imcc->ghash; + int encoded = 0; + SymReg * result; + const char *const_name = mk_string(imcc, name, &t); + + if (!h->data) + create_symhash(imcc, h); IMCC_debug(imcc, DEBUG_MKCONST, "# mk_const '%s' %c\n", const_name, t); result = _mk_const(imcc, h, const_name, t); return result; } - /* =item C @@ -952,17 +981,19 @@ _mk_address(ARGMOD(imc_info_t * imcc), ARGMOD(SymHash *hsh), { ASSERT_ARGS(_mk_address) SymReg *r; + int t = 'S'; + char *unquoted_name = mk_string(imcc, name, &t); if (uniq == U_add_all) { int is_lexical = 0; - r = get_sym_by_name(&imcc->ghash, name); + r = get_sym_by_name(&imcc->ghash, unquoted_name); if (r && r->usage & U_LEXICAL) is_lexical = 1; r = mem_gc_allocate_zeroed_typed(imcc->interp, SymReg); r->type = VTADDRESS; - r->name = mem_sys_strdup(name); + r->name = unquoted_name; _store_symreg(imcc, hsh, r); if (is_lexical) @@ -973,8 +1004,8 @@ _mk_address(ARGMOD(imc_info_t * imcc), ARGMOD(SymHash *hsh), char *aux_name = NULL; const char * const sub_name = (uniq == U_add_uniq_sub) /* remember to free this name; add_ns malloc()s it */ - ? (aux_name = add_ns(imcc, name)) - : name; + ? (aux_name = add_ns(imcc, unquoted_name)) + : unquoted_name; r = _get_sym(hsh, sub_name); @@ -984,6 +1015,7 @@ _mk_address(ARGMOD(imc_info_t * imcc), ARGMOD(SymHash *hsh), IMCC_fataly(imcc, EXCEPTION_SYNTAX_ERROR, "Label '%s' already defined\n", sub_name); else if (uniq == U_add_uniq_sub) { + mem_sys_free(unquoted_name); mem_sys_free(aux_name); IMCC_fataly(imcc, EXCEPTION_SYNTAX_ERROR, "Subroutine '%s' already defined\n", name); @@ -997,7 +1029,11 @@ _mk_address(ARGMOD(imc_info_t * imcc), ARGMOD(SymHash *hsh), r->lhs_use_count++; if (uniq == U_add_uniq_sub) mem_sys_free(aux_name); + else + mem_sys_free(unquoted_name); } + else + mem_sys_free(unquoted_name); } return r; diff --git a/compilers/imcc/symreg.h b/compilers/imcc/symreg.h index 3100e39d2e..a00c0ba95c 100644 --- a/compilers/imcc/symreg.h +++ b/compilers/imcc/symreg.h @@ -366,6 +366,18 @@ SymReg * mk_pcc_sub( __attribute__nonnull__(2) FUNC_MODIFIES(* imcc); +PARROT_WARN_UNUSED_RESULT +PARROT_CANNOT_RETURN_NULL +char * mk_string( + ARGMOD(imc_info_t * imcc), + ARGIN(const char *name), + ARGMOD(int *t)) + __attribute__nonnull__(1) + __attribute__nonnull__(2) + __attribute__nonnull__(3) + FUNC_MODIFIES(* imcc) + FUNC_MODIFIES(*t); + PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL SymReg * mk_sub_address(ARGMOD(imc_info_t * imcc), ARGIN(const char *name)) @@ -498,6 +510,10 @@ char * symreg_to_str(ARGIN(const SymReg *s)) #define ASSERT_ARGS_mk_pcc_sub __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(imcc) \ , PARROT_ASSERT_ARG(name)) +#define ASSERT_ARGS_mk_string __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ + PARROT_ASSERT_ARG(imcc) \ + , PARROT_ASSERT_ARG(name) \ + , PARROT_ASSERT_ARG(t)) #define ASSERT_ARGS_mk_sub_address __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ PARROT_ASSERT_ARG(imcc) \ , PARROT_ASSERT_ARG(name))