From f5303b3dd722eca5dd7a7021b23b91cb0e64fc46 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 29 Apr 2024 05:19:01 -0700 Subject: [PATCH] Make table.concat faster (#1243) table.concat is idiomatic and should be the fastest way to concatenate all table array elements together, but apparently you can beat it by using `string.format`, `string.rep` and `table.unpack`: ```lua string.format(string.rep("%*", #t), table.unpack(t)) ``` ... this just won't do, so we should fix table.concat performance. The deficit comes from two places: - rawgeti overhead followed by other stack accesses, all to extract a string from what is almost always an in-bounds array lookup - addlstring overhead in case separator is empty (extra function calls) This change fixes this by using a fast path for in-bounds array lookup for a string. Note that `table.concat` also supports numbers (these need to be converted to strings which is a little cumbersome and has innate overhead), and out-of-bounds accesses*. In these cases we fall back to the old implementation. To trigger out-of-bounds accesses, you need to skip the past-array-end element (which is nil per array invariant), but this is achievable because table.concat supports offset+length arguments. This should almost never come up in practice but the per-element branches et al are fairly cheap compared to the eventual string copy/alloc anyway. This change makes table.concat ~2x faster when the separator is empty; the table.concat benchmark shows +40% gains but it uses a variety of string separators of different lengths so it doesn't get the full benefit from this change. --------- Co-authored-by: vegorov-rbx <75688451+vegorov-rbx@users.noreply.github.com> --- VM/src/ltablib.cpp | 36 ++++++++++++++++++++++++------------ tests/conformance/tables.lua | 28 ++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/VM/src/ltablib.cpp b/VM/src/ltablib.cpp index 27c08f117..a57d6cf76 100644 --- a/VM/src/ltablib.cpp +++ b/VM/src/ltablib.cpp @@ -12,6 +12,7 @@ #include "lvm.h" LUAU_DYNAMIC_FASTFLAGVARIABLE(LuauFastCrossTableMove, false) +LUAU_DYNAMIC_FASTFLAGVARIABLE(LuauFasterConcat, false) static int foreachi(lua_State* L) { @@ -282,31 +283,42 @@ static int tmove(lua_State* L) return 1; } -static void addfield(lua_State* L, luaL_Strbuf* b, int i) +static void addfield(lua_State* L, luaL_Strbuf* b, int i, Table* t) { - int tt = lua_rawgeti(L, 1, i); - if (tt != LUA_TSTRING && tt != LUA_TNUMBER) - luaL_error(L, "invalid value (%s) at index %d in table for 'concat'", luaL_typename(L, -1), i); - luaL_addvalue(b); + if (DFFlag::LuauFasterConcat && t && unsigned(i - 1) < unsigned(t->sizearray) && ttisstring(&t->array[i - 1])) + { + TString* ts = tsvalue(&t->array[i - 1]); + luaL_addlstring(b, getstr(ts), ts->len); + } + else + { + int tt = lua_rawgeti(L, 1, i); + if (tt != LUA_TSTRING && tt != LUA_TNUMBER) + luaL_error(L, "invalid value (%s) at index %d in table for 'concat'", luaL_typename(L, -1), i); + luaL_addvalue(b); + } } static int tconcat(lua_State* L) { - luaL_Strbuf b; size_t lsep; - int i, last; const char* sep = luaL_optlstring(L, 2, "", &lsep); luaL_checktype(L, 1, LUA_TTABLE); - i = luaL_optinteger(L, 3, 1); - last = luaL_opt(L, luaL_checkinteger, 4, lua_objlen(L, 1)); + int i = luaL_optinteger(L, 3, 1); + int last = luaL_opt(L, luaL_checkinteger, 4, lua_objlen(L, 1)); + + Table* t = DFFlag::LuauFasterConcat ? hvalue(L->base) : NULL; + + luaL_Strbuf b; luaL_buffinit(L, &b); for (; i < last; i++) { - addfield(L, &b, i); - luaL_addlstring(&b, sep, lsep); + addfield(L, &b, i, t); + if (!DFFlag::LuauFasterConcat || lsep != 0) + luaL_addlstring(&b, sep, lsep); } if (i == last) // add last value (if interval was not empty) - addfield(L, &b, i); + addfield(L, &b, i, t); luaL_pushresult(&b); return 1; } diff --git a/tests/conformance/tables.lua b/tests/conformance/tables.lua index 03b463968..75163fd19 100644 --- a/tests/conformance/tables.lua +++ b/tests/conformance/tables.lua @@ -412,6 +412,34 @@ do assert(table.find({[(1)] = true}, true) == 1) end +-- test table.concat +do + -- regular usage + assert(table.concat({}) == "") + assert(table.concat({}, ",") == "") + assert(table.concat({"a", "b", "c"}, ",") == "a,b,c") + assert(table.concat({"a", "b", "c"}, ",", 2) == "b,c") + assert(table.concat({"a", "b", "c"}, ",", 1, 2) == "a,b") + + -- hash elements + local t = {} + t[123] = "a" + t[124] = "b" + + assert(table.concat(t) == "") + assert(table.concat(t, ",", 123, 124) == "a,b") + assert(table.concat(t, ",", 123, 123) == "a") + + -- numeric values + assert(table.concat({1, 2, 3}, ",") == "1,2,3") + assert(table.concat({"a", 2, "c"}, ",") == "a,2,c") + + -- error cases + assert(pcall(table.concat, "") == false) + assert(pcall(table.concat, t, false) == false) + assert(pcall(table.concat, t, ",", 1, 100) == false) +end + -- test indexing with strings that have zeroes embedded in them do local t = {}