From 20f4141b1a4abbb42b61b87cc3b52906acb12f0e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Dec 2015 13:02:35 +0100 Subject: [PATCH 1/7] FFI: Properly unsink non-standard cdata allocations. --- src/lib_ffi.c | 5 +---- src/lj_cdata.c | 9 +++++++++ src/lj_cdata.h | 2 ++ src/lj_snap.c | 5 +++-- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/lib_ffi.c b/src/lib_ffi.c index b2b2d37ff..7be624b42 100644 --- a/src/lib_ffi.c +++ b/src/lib_ffi.c @@ -505,10 +505,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.) } if (sz == CTSIZE_INVALID) lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE); - if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) - cd = lj_cdata_new(cts, id, sz); - else - cd = lj_cdata_newv(L, id, sz, ctype_align(info)); + cd = lj_cdata_newx(cts, id, sz, info); setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), o, (MSize)(L->top - o)); /* Initialize cdata. */ diff --git a/src/lj_cdata.c b/src/lj_cdata.c index 5cd2c1140..30d788e4c 100644 --- a/src/lj_cdata.c +++ b/src/lj_cdata.c @@ -49,6 +49,15 @@ GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align) return cd; } +/* Allocate arbitrary C data object. */ +GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info) +{ + if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) + return lj_cdata_new(cts, id, sz); + else + return lj_cdata_newv(cts->L, id, sz, ctype_align(info)); +} + /* Free a C data object. */ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) { diff --git a/src/lj_cdata.h b/src/lj_cdata.h index c8975be1c..0891c33c8 100644 --- a/src/lj_cdata.h +++ b/src/lj_cdata.h @@ -60,6 +60,8 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz) LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align); +LJ_FUNC GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, + CTInfo info); LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, diff --git a/src/lj_snap.c b/src/lj_snap.c index 7c78f8a3a..62515ed0f 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -711,8 +711,9 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { CTState *cts = ctype_cts(J->L); CTypeID id = (CTypeID)T->ir[ir->op1].i; - CTSize sz = lj_ctype_size(cts, id); - GCcdata *cd = lj_cdata_new(cts, id, sz); + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); + GCcdata *cd = lj_cdata_newx(cts, id, sz, info); setcdataV(J->L, o, cd); if (ir->o == IR_CNEWI) { uint8_t *p = (uint8_t *)cdataptr(cd); From a687a60eaac9bd700f821415eaa50393c2fea18a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Dec 2015 13:28:24 +0100 Subject: [PATCH 2/7] DynASM/x64: Add full VREG support. Contributed by Peter Cawley. --- dynasm/dasm_x86.h | 33 +++++++++++--- dynasm/dasm_x86.lua | 107 +++++++++++++++++++++++++++++++------------- 2 files changed, 104 insertions(+), 36 deletions(-) diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h index 175febe0c..4d2b773b4 100644 --- a/dynasm/dasm_x86.h +++ b/dynasm/dasm_x86.h @@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...) dasm_State *D = Dst_REF; dasm_ActList p = D->actionlist + start; dasm_Section *sec = D->section; - int pos = sec->pos, ofs = sec->ofs, mrm = 4; + int pos = sec->pos, ofs = sec->ofs, mrm = -1; int *b; if (pos >= sec->epos) { @@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...) b[pos++] = n; switch (action) { case DASM_DISP: - if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } + if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ case DASM_IMM_D: ofs += 4; break; @@ -203,10 +203,17 @@ void dasm_put(Dst_DECL, int start, ...) case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; case DASM_SPACE: p++; ofs += n; break; case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ - case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); - if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue; + case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG); + if (*p < 0x40 && p[1] == DASM_DISP) mrm = n; + if (*p < 0x20 && (n&7) == 4) ofs++; + switch ((*p++ >> 3) & 3) { + case 3: n |= b[pos-3]; + case 2: n |= b[pos-2]; + case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; } + } + continue; } - mrm = 4; + mrm = -1; } else { int *pl, n; switch (action) { @@ -393,7 +400,21 @@ int dasm_encode(Dst_DECL, void *buffer) case DASM_IMM_W: dasmw(n); break; case DASM_VREG: { int t = *p++; - if (t >= 5) n <<= 4; else if (t >= 2) n <<= 3; + unsigned char *ex = cp - (t&7); + if ((n & 8) && t < 0xa0) { + if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6); + } else if (n & 0x10) { + if (*ex & 0x80) { + *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2; + } + while (++ex < cp) ex[-1] = *ex; + if (mark) mark--; + cp--; + } + n &= 7; + if (t >= 0xc0) n <<= 4; + else if (t >= 0x40) n <<= 3; + else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; } cp[-1] ^= n; break; } diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua index 1fa80b55e..8c4bfc614 100644 --- a/dynasm/dasm_x86.lua +++ b/dynasm/dasm_x86.lua @@ -41,7 +41,7 @@ local action_names = { -- int arg, 1 buffer pos: "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", -- action arg (1 byte), int arg, 1 buffer pos (reg/num): - "VREG", "SPACE", -- !x64: VREG support NYI. + "VREG", "SPACE", -- ptrdiff_t arg, 1 buffer pos (address): !x64 "SETLABEL", "REL_A", -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): @@ -83,6 +83,21 @@ local actargs = { 0 } -- Current number of section buffer positions for dasm_put(). local secpos = 1 +-- VREG kind encodings, pre-shifted by 5 bits. +local map_vreg = { + ["modrm.rm.m"] = 0x00, + ["modrm.rm.r"] = 0x20, + ["opcode"] = 0x20, + ["sib.base"] = 0x20, + ["sib.index"] = 0x40, + ["modrm.reg"] = 0x80, + ["vex.v"] = 0xa0, + ["imm.hi"] = 0xc0, +} + +-- Current number of VREG actions contributing to REX/VEX shrinkage. +local vreg_shrink_count = 0 + ------------------------------------------------------------------------------ -- Compute action numbers for action names. @@ -134,6 +149,21 @@ local function waction(action, a, num) if a or num then secpos = secpos + (num or 1) end end +-- Optionally add a VREG action. +local function wvreg(kind, vreg, psz, sk, defer) + if not vreg then return end + waction("VREG", vreg) + local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'") + if b < (sk or 0) then + vreg_shrink_count = vreg_shrink_count + 1 + end + if not defer then + b = b + vreg_shrink_count * 8 + vreg_shrink_count = 0 + end + wputxb(b + (psz or 0)) +end + -- Add call to embedded DynASM C code. local function wcall(func, args) wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) @@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) map_reg_valid_index[map_archdef.esp] = false if x64 then map_reg_valid_index[map_archdef.rsp] = false end +if x64 then map_reg_needrex[map_archdef.Rb] = true end map_archdef["Ra"] = "@"..addrsize -- FP registers (internally tword sized, but use "f" as operand size). @@ -463,16 +494,24 @@ local function wputszarg(sz, n) end -- Put multi-byte opcode with operand-size dependent modifications. -local function wputop(sz, op, rex, vex) +local function wputop(sz, op, rex, vex, vregr, vregxb) + local psz, sk = 0, nil if vex then local tail if vex.m == 1 and band(rex, 11) == 0 then - wputb(0xc5) + if x64 and vregxb then + sk = map_vreg["modrm.reg"] + else + wputb(0xc5) tail = shl(bxor(band(rex, 4), 4), 5) - else + psz = 3 + end + end + if not tail then wputb(0xc4) wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) tail = shl(band(rex, 8), 4) + psz = 4 end local reg, vreg = 0, nil if vex.v then @@ -482,12 +521,18 @@ local function wputop(sz, op, rex, vex) end if sz == "y" or vex.l then tail = tail + 4 end wputb(tail + shl(bxor(reg, 15), 3) + vex.p) - if vreg then waction("VREG", vreg); wputxb(4) end + wvreg("vex.v", vreg) rex = 0 if op >= 256 then werror("bad vex opcode") end + else + if rex ~= 0 then + if not x64 then werror("bad operand size") end + elseif (vregr or vregxb) and x64 then + rex = 0x10 + sk = map_vreg["vex.v"] + end end local r - if rex ~= 0 and not x64 then werror("bad operand size") end if sz == "w" then wputb(102) end -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end @@ -496,20 +541,20 @@ local function wputop(sz, op, rex, vex) if rex ~= 0 then local opc3 = band(op, 0xffff00) if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then - wputb(64 + band(rex, 15)); rex = 0 + wputb(64 + band(rex, 15)); rex = 0; psz = 2 end end - wputb(shr(op, 16)); op = band(op, 0xffff) + wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1 end if op >= 256 then local b = shr(op, 8) - if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end - wputb(b) - op = band(op, 255) + if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end + wputb(b); op = band(op, 255); psz = psz + 1 end - if rex ~= 0 then wputb(64 + band(rex, 15)) end + if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end if sz == "b" then op = op - 1 end wputb(op) + return psz, sk end -- Put ModRM or SIB formatted byte. @@ -519,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm) end -- Put ModRM/SIB plus optional displacement. -local function wputmrmsib(t, imark, s, vsreg) +local function wputmrmsib(t, imark, s, vsreg, psz, sk) local vreg, vxreg local reg, xreg = t.reg, t.xreg if reg and reg < 0 then reg = 0; vreg = t.vreg end @@ -529,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg) -- Register mode. if sub(t.mode, 1, 1) == "r" then wputmodrm(3, s, reg) - if vsreg then waction("VREG", vsreg); wputxb(2) end - if vreg then waction("VREG", vreg); wputxb(0) end + wvreg("modrm.reg", vsreg, psz+1, sk, vreg) + wvreg("modrm.rm.r", vreg, psz+1, sk) return end @@ -544,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg) -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) wputmodrm(0, s, 4) if imark == "I" then waction("MARK") end - if vsreg then waction("VREG", vsreg); wputxb(2) end + wvreg("modrm.reg", vsreg, psz+1, sk, vxreg) wputmodrm(t.xsc, xreg, 5) - if vxreg then waction("VREG", vxreg); wputxb(3) end + wvreg("sib.index", vxreg, psz+2, sk) else -- Pure 32 bit displacement. if x64 and tdisp ~= "table" then wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) + wvreg("modrm.reg", vsreg, psz+1, sk) if imark == "I" then waction("MARK") end wputmodrm(0, 4, 5) else riprel = x64 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) + wvreg("modrm.reg", vsreg, psz+1, sk) if imark == "I" then waction("MARK") end end - if vsreg then waction("VREG", vsreg); wputxb(2) end end if riprel then -- Emit rip-relative displacement. if match("UWSiI", imark) then @@ -586,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg) if xreg or band(reg, 7) == 4 then wputmodrm(m or 2, s, 4) -- ModRM. if m == nil or imark == "I" then waction("MARK") end - if vsreg then waction("VREG", vsreg); wputxb(2) end + wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg) wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. - if vxreg then waction("VREG", vxreg); wputxb(3) end - if vreg then waction("VREG", vreg); wputxb(1) end + wvreg("sib.index", vxreg, psz+2, sk, vreg) + wvreg("sib.base", vreg, psz+2, sk) else wputmodrm(m or 2, s, reg) -- ModRM. if (imark == "I" and (m == 1 or m == 2)) or (m == nil and (vsreg or vreg)) then waction("MARK") end - if vsreg then waction("VREG", vsreg); wputxb(2) end - if vreg then waction("VREG", vreg); wputxb(1) end + wvreg("modrm.reg", vsreg, psz+1, sk, vreg) + wvreg("modrm.rm.m", vreg, psz+1, sk) end -- Put displacement. @@ -1761,10 +1807,11 @@ local function dopattern(pat, args, sz, op, needrex) if t.xreg and t.xreg > 7 then rex = rex + 2 end if s > 7 then rex = rex + 4 end if needrex then rex = rex + 16 end - wputop(szov, opcode, rex, vex); opcode = nil + local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg) + opcode = nil local imark = sub(pat, -1) -- Force a mark (ugly). -- Put ModRM/SIB with regno/last digit as spare. - wputmrmsib(t, imark, s, addin and addin.vreg) + wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk) addin = nil elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix local b = band(opcode, 255); opcode = shr(opcode, 8) @@ -1791,8 +1838,8 @@ local function dopattern(pat, args, sz, op, needrex) if szov == "q" and rex == 0 then rex = rex + 8 end if needrex then rex = rex + 16 end if addin and addin.reg == -1 then - wputop(szov, opcode - 7, rex, vex) - waction("VREG", addin.vreg); wputxb(0) + local psz, sk = wputop(szov, opcode - 7, rex, vex, true) + wvreg("opcode", addin.vreg, psz, sk) else if addin and addin.reg > 7 then rex = rex + 1 end wputop(szov, opcode, rex, vex) @@ -1836,7 +1883,7 @@ local function dopattern(pat, args, sz, op, needrex) local reg = a.reg if reg < 0 then wputb(0) - waction("VREG", a.vreg); wputxb(5) + wvreg("imm.hi", a.vreg) else wputb(shl(reg, 4)) end @@ -1988,8 +2035,8 @@ if x64 then rex = a.reg > 7 and 9 or 8 end end - wputop(sz, opcode, rex) - if vreg then waction("VREG", vreg); wputxb(0) end + local psz, sk = wputop(sz, opcode, rex, nil, vreg) + wvreg("opcode", vreg, psz, sk) waction("IMM_D", format("(unsigned int)(%s)", op64)) waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) end From f61148c486545bf22df81f223efd3297d1c66f7b Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Dec 2015 16:34:11 +0100 Subject: [PATCH 3/7] ARM: Add external frame unwinding. Thanks to Nick Zavaritsky. --- src/host/buildvm_asm.c | 9 +++++ src/lj_err.c | 92 +++++++++++++++++++++++++++++------------- src/vm_arm.dasc | 11 +++++ 3 files changed, 84 insertions(+), 28 deletions(-) diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index 9b7ae53a2..9b1194259 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -261,11 +261,20 @@ void emit_asm(BuildCtx *ctx) #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND /* This should really be moved into buildvm_arm.dasc. */ +#if LJ_ARCH_HASFPU + fprintf(ctx->fp, + ".fnstart\n" + ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n" + ".vsave {d8-d15}\n" + ".save {r4}\n" + ".pad #28\n"); +#else fprintf(ctx->fp, ".fnstart\n" ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n" ".pad #28\n"); #endif +#endif #if LJ_TARGET_MIPS fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); #endif diff --git a/src/lj_err.c b/src/lj_err.c index 9ac0c988f..d641735e9 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -190,13 +190,6 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) ** since various OS, distros and compilers mess up the header installation. */ -typedef struct _Unwind_Exception -{ - uint64_t exclass; - void (*excleanup)(int, struct _Unwind_Exception *); - uintptr_t p1, p2; -} __attribute__((__aligned__)) _Unwind_Exception; - typedef struct _Unwind_Context _Unwind_Context; #define _URC_OK 0 @@ -206,8 +199,20 @@ typedef struct _Unwind_Context _Unwind_Context; #define _URC_CONTINUE_UNWIND 8 #define _URC_FAILURE 9 +#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */ +#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c)) +#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff) +#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff)) + #if !LJ_TARGET_ARM +typedef struct _Unwind_Exception +{ + uint64_t exclass; + void (*excleanup)(int, struct _Unwind_Exception *); + uintptr_t p1, p2; +} __attribute__((__aligned__)) _Unwind_Exception; + extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); @@ -219,11 +224,6 @@ extern int _Unwind_RaiseException(_Unwind_Exception *); #define _UA_HANDLER_FRAME 4 #define _UA_FORCE_UNWIND 8 -#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */ -#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c)) -#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff) -#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff)) - /* DWARF2 personality handler referenced from interpreter .eh_frame. */ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) @@ -302,10 +302,23 @@ static void err_raise_ext(int errcode) } #endif -#else +#else /* LJ_TARGET_ARM */ + +#define _US_VIRTUAL_UNWIND_FRAME 0 +#define _US_UNWIND_FRAME_STARTING 1 +#define _US_ACTION_MASK 3 +#define _US_FORCE_UNWIND 8 + +typedef struct _Unwind_Control_Block _Unwind_Control_Block; +typedef struct _Unwind_Context _Unwind_Context; + +struct _Unwind_Control_Block { + uint64_t exclass; + uint32_t misc[20]; +}; -extern void _Unwind_DeleteException(void *); -extern int __gnu_unwind_frame (void *, _Unwind_Context *); +extern int _Unwind_RaiseException(_Unwind_Control_Block *); +extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *); extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *); extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *); @@ -321,34 +334,57 @@ static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v) _Unwind_VRS_Set(ctx, 0, r, 0, &v); } -#define _US_VIRTUAL_UNWIND_FRAME 0 -#define _US_UNWIND_FRAME_STARTING 1 -#define _US_ACTION_MASK 3 -#define _US_FORCE_UNWIND 8 +extern void lj_vm_unwind_ext(void); /* ARM unwinder personality handler referenced from interpreter .ARM.extab. */ -LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx) +LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb, + _Unwind_Context *ctx) { void *cf = (void *)_Unwind_GetGR(ctx, 13); lua_State *L = cframe_L(cf); - if ((state & _US_ACTION_MASK) == _US_VIRTUAL_UNWIND_FRAME) { - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); + int errcode; + + switch ((state & _US_ACTION_MASK)) { + case _US_VIRTUAL_UNWIND_FRAME: + if ((state & _US_FORCE_UNWIND)) break; return _URC_HANDLER_FOUND; - } - if ((state&(_US_ACTION_MASK|_US_FORCE_UNWIND)) == _US_UNWIND_FRAME_STARTING) { - _Unwind_DeleteException(ucb); - _Unwind_SetGR(ctx, 15, (uint32_t)(void *)lj_err_throw); - _Unwind_SetGR(ctx, 0, (uint32_t)L); - _Unwind_SetGR(ctx, 1, (uint32_t)LUA_ERRRUN); + case _US_UNWIND_FRAME_STARTING: + if (LJ_UEXCLASS_CHECK(ucb->exclass)) { + errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass); + } else { + errcode = LUA_ERRRUN; + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); + } + cf = err_unwind(L, cf, errcode); + if ((state & _US_FORCE_UNWIND) || cf == NULL) break; + _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext); + _Unwind_SetGR(ctx, 0, (uint32_t)ucb); + _Unwind_SetGR(ctx, 1, (uint32_t)errcode); + _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ? + (uint32_t)lj_vm_unwind_ff_eh : + (uint32_t)lj_vm_unwind_c_eh); return _URC_INSTALL_CONTEXT; + default: + return _URC_FAILURE; } if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) return _URC_FAILURE; return _URC_CONTINUE_UNWIND; } +#if LJ_UNWIND_EXT +static __thread _Unwind_Control_Block static_uex; + +static void err_raise_ext(int errcode) +{ + memset(&static_uex, 0, sizeof(static_uex)); + static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); + _Unwind_RaiseException(&static_uex); +} #endif +#endif /* LJ_TARGET_ARM */ + #elif LJ_TARGET_X64 && LJ_ABI_WIN /* diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index af722f9ea..acc0853bb 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -372,6 +372,17 @@ static void build_subroutines(BuildCtx *ctx) | str CARG1, [BASE, #-4] // Prepend false to error message. | st_vmstate CARG2 | b ->vm_returnc + | + |->vm_unwind_ext: // Complete external unwind. +#if !LJ_NO_UNWIND + | push {r0, r1, r2, lr} + | bl extern _Unwind_Complete + | ldr r0, [sp] + | bl extern _Unwind_DeleteException + | pop {r0, r1, r2, lr} + | mov r0, r1 + | bx r2 +#endif | |//----------------------------------------------------------------------- |//-- Grow stack for calls ----------------------------------------------- From cfae3846f87bc93b3d5fdac04c61c09fc4adb1aa Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Dec 2015 16:40:39 +0100 Subject: [PATCH 4/7] DynASM/x86: Add AVX AES instructions. Contributed by Peter Cawley. --- dynasm/dasm_x86.lua | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua index 8c4bfc614..60f5211a3 100644 --- a/dynasm/dasm_x86.lua +++ b/dynasm/dasm_x86.lua @@ -1567,6 +1567,12 @@ local map_op = { -- AVX, AVX2 integer ops -- In general, xmm requires AVX, ymm requires AVX2. + vaesdec_3 = "rrmo:660F38VDErM", + vaesdeclast_3 = "rrmo:660F38VDFrM", + vaesenc_3 = "rrmo:660F38VDCrM", + vaesenclast_3 = "rrmo:660F38VDDrM", + vaesimc_2 = "rmo:660F38uDBrM", + vaeskeygenassist_3 = "rmio:660F3AuDFrMU", vlddqu_2 = "rxoy:F20FuF0rM", vmaskmovdqu_2 = "rro:660FuF7rM", vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", From 5ce6399ed95cbf69f7ef90e2d9969ca1d4462660 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Dec 2015 16:42:37 +0100 Subject: [PATCH 5/7] x86: Improve disassembly of AVX shift instructions. Contributed by Peter Cawley. --- src/jit/dis_x86.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua index 49bbcad0d..7f86d965d 100644 --- a/src/jit/dis_x86.lua +++ b/src/jit/dis_x86.lua @@ -158,8 +158,8 @@ local map_opc2 = { "||punpcklqdqXrvm","||punpckhqdqXrvm", "movPrVSm","movqMrm|movdquXrm|movdqaXrm", --7x -"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", -"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", +"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu", +"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu", "pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|", "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", nil,nil, From 52ebe02c277fef4ca62e6cab09c0d7ec9ed4d169 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Dec 2015 16:46:31 +0100 Subject: [PATCH 6/7] x86: Disassemble AES instructions. Contributed by Peter Cawley. --- src/jit/dis_x86.lua | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua index 7f86d965d..a7c05ed6d 100644 --- a/src/jit/dis_x86.lua +++ b/src/jit/dis_x86.lua @@ -239,6 +239,9 @@ nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm", --8x [0x8c] = "||pmaskmovXrvVSm", [0x8e] = "||pmaskmovVSmXvr", +--Dx +[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm", +[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm", --Fx [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", }, @@ -262,12 +265,14 @@ nil,nil,nil,nil, [0x40] = "||dppsXrvmu", [0x41] = "||dppdXrvmu", [0x42] = "||mpsadbwXrvmu", +[0x44] = "||pclmulqdqXrvmu", [0x46] = "||perm2i128Xrvmu", [0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb", [0x4c] = "||pblendvbXrvmb", --6x [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", +[0xdf] = "||aeskeygenassistXrmu", }, } From 22e7b00ddbc2adcc351f9d3656956c5937fc8ee8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Dec 2015 17:06:48 +0100 Subject: [PATCH 7/7] DynASM/x64: Fix for full VREG support. Thanks to Peter Cawley. --- dynasm/dasm_x86.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h index 4d2b773b4..be9c289f0 100644 --- a/dynasm/dasm_x86.h +++ b/dynasm/dasm_x86.h @@ -403,6 +403,7 @@ int dasm_encode(Dst_DECL, void *buffer) unsigned char *ex = cp - (t&7); if ((n & 8) && t < 0xa0) { if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6); + n &= 7; } else if (n & 0x10) { if (*ex & 0x80) { *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2; @@ -410,8 +411,8 @@ int dasm_encode(Dst_DECL, void *buffer) while (++ex < cp) ex[-1] = *ex; if (mark) mark--; cp--; + n &= 7; } - n &= 7; if (t >= 0xc0) n <<= 4; else if (t >= 0x40) n <<= 3; else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }