diff --git a/quickjs.c b/quickjs.c index f93fcbef0..63786a30f 100644 --- a/quickjs.c +++ b/quickjs.c @@ -211,6 +211,9 @@ typedef enum JSErrorEnum { #define JS_MAX_LOCAL_VARS 65535 #define JS_STACK_SIZE_MAX 65534 #define JS_STRING_LEN_MAX ((1 << 30) - 1) +// 1,024 bytes is about the cutoff point where it starts getting +// more profitable to ref slice than to copy +#define JS_STRING_SLICE_LEN_MAX 1024 // in bytes #define __exception __attribute__((warn_unused_result)) @@ -545,7 +548,12 @@ typedef enum { JS_ATOM_KIND_PRIVATE, } JSAtomKindEnum; -#define JS_ATOM_HASH_MASK ((1 << 30) - 1) +typedef enum { + JS_STRING_KIND_NORMAL, + JS_STRING_KIND_SLICE, +} JSStringKind; + +#define JS_ATOM_HASH_MASK ((1 << 29) - 1) struct JSString { JSRefCountHeader header; /* must come first, 32-bit */ @@ -554,7 +562,8 @@ struct JSString { /* for JS_ATOM_TYPE_SYMBOL: hash = 0, atom_type = 3, for JS_ATOM_TYPE_PRIVATE: hash = 1, atom_type = 3 XXX: could change encoding to have one more bit in hash */ - uint32_t hash : 30; + uint32_t hash : 29; + uint8_t kind : 1; uint8_t atom_type : 2; /* != 0 if atom, JS_ATOM_TYPE_x */ uint32_t hash_next; /* atom_index for JS_ATOM_TYPE_SYMBOL */ JSWeakRefRecord *first_weak_ref; @@ -563,14 +572,39 @@ struct JSString { #endif }; +typedef struct JSStringSlice { + JSString *parent; + uint32_t start; // in characters, not bytes +} JSStringSlice; + static inline uint8_t *str8(JSString *p) { - return (void *)(p + 1); + JSStringSlice *slice; + + switch (p->kind) { + case JS_STRING_KIND_NORMAL: + return (void *)&p[1]; + case JS_STRING_KIND_SLICE: + slice = (void *)&p[1]; + return str8(slice->parent) + slice->start; + } + abort(); + return NULL; } static inline uint16_t *str16(JSString *p) { - return (void *)(p + 1); + JSStringSlice *slice; + + switch (p->kind) { + case JS_STRING_KIND_NORMAL: + return (void *)&p[1]; + case JS_STRING_KIND_SLICE: + slice = (void *)&p[1]; + return str16(slice->parent) + slice->start; + } + abort(); + return NULL; } typedef struct JSClosureVar { @@ -2049,6 +2083,7 @@ static JSString *js_alloc_string_rt(JSRuntime *rt, int max_len, int is_wide_char str->header.ref_count = 1; str->is_wide_char = is_wide_char; str->len = max_len; + str->kind = JS_STRING_KIND_NORMAL; str->atom_type = 0; str->hash = 0; /* optional but costless */ str->hash_next = 0; /* optional */ @@ -2069,18 +2104,28 @@ static JSString *js_alloc_string(JSContext *ctx, int max_len, int is_wide_char) return p; } +static inline void js_free_string0(JSRuntime *rt, JSString *str); + /* same as JS_FreeValueRT() but faster */ static inline void js_free_string(JSRuntime *rt, JSString *str) { - if (--str->header.ref_count <= 0) { - if (str->atom_type) { - JS_FreeAtomStruct(rt, str); - } else { + if (--str->header.ref_count <= 0) + js_free_string0(rt, str); +} + +static inline void js_free_string0(JSRuntime *rt, JSString *str) +{ + if (str->atom_type) { + JS_FreeAtomStruct(rt, str); + } else { #ifdef ENABLE_DUMPS // JS_DUMP_LEAKS - list_del(&str->link); + list_del(&str->link); #endif - js_free_rt(rt, str); + if (str->kind == JS_STRING_KIND_SLICE) { + JSStringSlice *slice = (void *)&str[1]; + js_free_string(rt, slice->parent); // safe, recurses only 1 level } + js_free_rt(rt, str); } } @@ -2962,6 +3007,7 @@ static JSAtom __JS_NewAtom(JSRuntime *rt, JSString *str, int atom_type) p->header.ref_count = 1; p->is_wide_char = str->is_wide_char; p->len = str->len; + p->kind = JS_STRING_KIND_NORMAL; #ifdef ENABLE_DUMPS // JS_DUMP_LEAKS list_add_tail(&p->link, &rt->string_list); #endif @@ -2976,6 +3022,7 @@ static JSAtom __JS_NewAtom(JSRuntime *rt, JSString *str, int atom_type) p->header.ref_count = 1; p->is_wide_char = 1; /* Hack to represent NULL as a JSString */ p->len = 0; + p->kind = JS_STRING_KIND_NORMAL; #ifdef ENABLE_DUMPS // JS_DUMP_LEAKS list_add_tail(&p->link, &rt->string_list); #endif @@ -3680,13 +3727,37 @@ static JSValue js_new_string_char(JSContext *ctx, uint16_t c) static JSValue js_sub_string(JSContext *ctx, JSString *p, int start, int end) { - int len = end - start; + JSStringSlice *slice; + JSString *q; + int len; + + len = end - start; if (start == 0 && end == p->len) { return js_dup(JS_MKPTR(JS_TAG_STRING, p)); } if (len <= 0) { return js_empty_string(ctx->rt); } + if (len > (JS_STRING_SLICE_LEN_MAX >> p->is_wide_char)) { + if (p->kind == JS_STRING_KIND_SLICE) { + slice = (void *)&p[1]; + p = slice->parent; + start += slice->start; + } + // allocate as 16 bit wide string to avoid wastage; + // js_alloc_string allocates 1 byte extra for 8 bit strings; + q = js_alloc_string(ctx, sizeof(*slice)/2, /*is_wide_char*/true); + if (!q) + return JS_EXCEPTION; + q->is_wide_char = p->is_wide_char; + q->kind = JS_STRING_KIND_SLICE; + q->len = len; + slice = (void *)&q[1]; + slice->parent = p; + slice->start = start; + p->header.ref_count++; + return JS_MKPTR(JS_TAG_STRING, q); + } if (p->is_wide_char) { JSString *str; int i; @@ -5750,17 +5821,7 @@ static void js_free_value_rt(JSRuntime *rt, JSValue v) switch(tag) { case JS_TAG_STRING: - { - JSString *p = JS_VALUE_GET_STRING(v); - if (p->atom_type) { - JS_FreeAtomStruct(rt, p); - } else { -#ifdef ENABLE_DUMPS // JS_DUMP_LEAKS - list_del(&p->link); -#endif - js_free_rt(rt, p); - } - } + js_free_string0(rt, JS_VALUE_GET_STRING(v)); break; case JS_TAG_OBJECT: case JS_TAG_FUNCTION_BYTECODE: @@ -58038,6 +58099,13 @@ uintptr_t js_std_cmd(int cmd, ...) { *pv = ctx->error_back_trace; ctx->error_back_trace = JS_UNDEFINED; break; + case 3: // GetStringKind + ctx = va_arg(ap, JSContext *); + pv = va_arg(ap, JSValue *); + rv = -1; + if (JS_IsString(*pv)) + rv = JS_VALUE_GET_STRING(*pv)->kind; + break; default: rv = -1; } diff --git a/run-test262.c b/run-test262.c index 5dfeb5008..c937313aa 100644 --- a/run-test262.c +++ b/run-test262.c @@ -1700,15 +1700,32 @@ void update_stats(JSRuntime *rt, const char *filename) { js_mutex_unlock(&stats_mutex); } +static JSValue qjs_black_box(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst argv[], int magic) +{ + return JS_NewInt32(ctx, js_std_cmd(magic, ctx, &argv[0])); +} + +static const JSCFunctionListEntry qjs_methods[] = { + JS_CFUNC_MAGIC_DEF("getStringKind", 1, qjs_black_box, /*GetStringKind*/3), +}; + +static const JSCFunctionListEntry qjs_object = + JS_OBJECT_DEF("qjs", qjs_methods, countof(qjs_methods), JS_PROP_C_W_E); + JSContext *JS_NewCustomContext(JSRuntime *rt) { JSContext *ctx; + JSValue obj; ctx = JS_NewContext(rt); if (ctx && local) { js_init_module_std(ctx, "qjs:std"); js_init_module_os(ctx, "qjs:os"); js_init_module_bjson(ctx, "qjs:bjson"); + obj = JS_GetGlobalObject(ctx); + JS_SetPropertyFunctionList(ctx, obj, &qjs_object, 1); + JS_FreeValue(ctx, obj); } return ctx; } diff --git a/tests/microbench.js b/tests/microbench.js index 7f6df4763..445c6c026 100644 --- a/tests/microbench.js +++ b/tests/microbench.js @@ -797,6 +797,39 @@ function string_build4(n) return n * 100; } +function string_slice1(n) +{ + var i, j, s; + s = "x".repeat(1<<16); + for (i = 0; i < n; i++) { + for (j = 0; j < 1000; j++) + s.slice(-1); // too short for JSStringSlice + } + return n * 1000; +} + +function string_slice2(n) +{ + var i, j, s; + s = "x".repeat(1<<16); + for (i = 0; i < n; i++) { + for (j = 0; j < 1000; j++) + s.slice(-1024); + } + return n * 1000; +} + +function string_slice3(n) +{ + var i, j, s; + s = "x".repeat(1<<16); + for (i = 0; i < n; i++) { + for (j = 0; j < 1000; j++) + s.slice(1); + } + return n * 1000; +} + /* sort bench */ function sort_bench(text) { @@ -1114,6 +1147,9 @@ function main(argc, argv, g) string_build2, //string_build3, //string_build4, + string_slice1, + string_slice2, + string_slice3, sort_bench, int_to_string, int_toString, diff --git a/tests/test_builtin.js b/tests/test_builtin.js index 255886adf..462558e28 100644 --- a/tests/test_builtin.js +++ b/tests/test_builtin.js @@ -378,6 +378,11 @@ function test_string() assert(eval('"\0"'), "\0"); assert("abc".padStart(Infinity, ""), "abc"); + + assert(qjs.getStringKind("xyzzy".slice(1)), + /*JS_STRING_KIND_NORMAL*/0); + assert(qjs.getStringKind("xyzzy".repeat(512).slice(1)), + /*JS_STRING_KIND_SLICE*/1); } function test_math()