From 2763a421d6219c8cb2bbd39246de619dc796bab6 Mon Sep 17 00:00:00 2001 From: Guy Menanteau Date: Tue, 11 Jun 2019 11:11:47 +0000 Subject: [PATCH] Patch for PPC64 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a patch for PPC64 support based on https://github.com/LuaJIT/LuaJIT/pull/140. https://bugzilla.redhat.com/show_bug.cgi?id=1591701 This patch has been rebased to match FPU support Author: Guy Menanteau Signed-Off-By: Marcin Koƛcielnicki [ppc] Fix access beyond list in ipairs The load into TMP2 was incorrectly put into ENDIAN_LE, which made the subsequent check invalid. [ppc] Fix typo [ppc] Load BASEP4 as much as possible BASEP4 doesn't seem to get initialized all the time, especially when BASE is updated because of which programs can crash at random on ppc32. Err on the conservative side and set BASEP4 every time BASE_LO (or BASE_HI for LE) are accessed. This eventually needs to be tuned optimally. [ppc] Revert LE code for assert [ppc] Fix off by one in assert It ended up reading the first argument twice. Fix BC_POW on ppc64le --- dynasm/dasm_ppc.lua | 5 + src/Makefile | 11 +- src/host/buildvm_asm.c | 9 +- src/lj_arch.h | 15 +- src/lj_ccall.c | 166 ++++- src/lj_ccall.h | 13 + src/lj_ccallback.c | 68 +- src/lj_ctype.h | 2 +- src/lj_def.h | 4 + src/lj_frame.h | 9 + src/lj_target_ppc.h | 14 + src/vm_ppc.dasc | 1377 +++++++++++++++++++++++++++------------- 12 files changed, 1238 insertions(+), 455 deletions(-) diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua index 20634e134..572c9317b 100644 --- a/dynasm/dasm_ppc.lua +++ b/dynasm/dasm_ppc.lua @@ -257,9 +257,11 @@ map_op = { addic_3 = "30000000RRI", ["addic._3"] = "34000000RRI", addi_3 = "38000000RR0I", + addil_3 = "38000000RR0J", li_2 = "38000000RI", la_2 = "38000000RD", addis_3 = "3c000000RR0I", + addisl_3 = "3c000000RR0J", lis_2 = "3c000000RI", lus_2 = "3c000000RU", bc_3 = "40000000AAK", @@ -842,6 +844,9 @@ map_op = { srdi_3 = op_alias("rldicl_4", function(p) p[4] = p[3]; p[3] = "64-("..p[3]..")" end), + ["srdi._3"] = op_alias("rldicl._4", function(p) + p[4] = p[3]; p[3] = "64-("..p[3]..")" + end), clrldi_3 = op_alias("rldicl_4", function(p) p[4] = p[3]; p[3] = "0" end), diff --git a/src/Makefile b/src/Makefile index c343bceb0..e43ba18fb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -463,7 +463,16 @@ ifeq (ppc,$(TARGET_LJARCH)) DASM_AFLAGS+= -D GPR64 endif ifeq (PS3,$(TARGET_SYS)) - DASM_AFLAGS+= -D PPE -D TOC + DASM_AFLAGS+= -D PPE + endif + ifneq (,$(findstring LJ_ARCH_PPC_OPD 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D OPD + endif + ifneq (,$(findstring LJ_ARCH_PPC_OPDENV 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D OPDENV + endif + ifneq (,$(findstring LJ_ARCH_PPC_ELFV2 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D ELFV2 endif endif endif diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index c8fdcbc75..17998c89f 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -188,7 +188,11 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, #else #define TOCPREFIX "" #endif - if ((ins >> 26) == 16) { + if ((ins >> 26) == 14) { + fprintf(ctx->fp, "\taddi %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym); + } else if ((ins >> 26) == 15) { + fprintf(ctx->fp, "\taddis %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym); + } else if ((ins >> 26) == 16) { fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n", (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym); } else if ((ins >> 26) == 18) { @@ -290,6 +294,9 @@ void emit_asm(BuildCtx *ctx) int i, rel; fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); +#if LJ_ARCH_PPC_ELFV2 + fprintf(ctx->fp, "\t.abiversion 2\n"); +#endif fprintf(ctx->fp, "\t.text\n"); emit_asm_align(ctx, 4); diff --git a/src/lj_arch.h b/src/lj_arch.h index 5f1be8c7d..295f52f99 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -299,8 +299,18 @@ #if LJ_TARGET_CONSOLE #define LJ_ARCH_PPC32ON64 1 #define LJ_ARCH_NOFFI 1 +#if LJ_TARGET_PS3 +#define LJ_ARCH_PPC_OPD 1 +#endif #elif LJ_ARCH_BITS == 64 -#error "No support for PPC64" +#define LJ_ARCH_PPC32ON64 1 +#define LJ_ARCH_NOJIT 1 /* NYI */ +#if _CALL_ELF == 2 +#define LJ_ARCH_PPC_ELFV2 1 +#else +#define LJ_ARCH_PPC_OPD 1 +#define LJ_ARCH_PPC_OPDENV 1 +#endif #endif #if _ARCH_PWR7 @@ -482,9 +492,6 @@ #error "No support for ILP32 model on ARM64" #endif #elif LJ_TARGET_PPC -#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) -#error "No support for little-endian PPC32" -#endif #if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" #endif diff --git a/src/lj_ccall.c b/src/lj_ccall.c index ab3d3c4e3..b724ae57e 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -370,21 +370,97 @@ #elif LJ_TARGET_PPC /* -- PPC calling conventions --------------------------------------------- */ +#if LJ_ARCH_BITS == 64 + +#if LJ_ARCH_PPC_ELFV2 + +#define CCALL_HANDLE_STRUCTRET \ + if (sz > 16 && ccall_classify_fp(cts, ctr) <= 0) { \ + cc->retref = 1; /* Return by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; \ + } + +#define CCALL_HANDLE_STRUCTRET2 \ + int isfp = ccall_classify_fp(cts, ctr); \ + int i; \ + if (isfp == FTYPE_FLOAT) { \ + for (i = 0; i < ctr->size / 4; i++) \ + ((float *)dp)[i] = cc->fpr[i]; \ + } else if (isfp == FTYPE_DOUBLE) { \ + for (i = 0; i < ctr->size / 8; i++) \ + ((double *)dp)[i] = cc->fpr[i]; \ + } else { \ + if (ctr->size < 8 && LJ_BE) { \ + sp += 8 - ctr->size; \ + } \ + memcpy(dp, sp, ctr->size); \ + } + +#else + #define CCALL_HANDLE_STRUCTRET \ cc->retref = 1; /* Return all structs by reference. */ \ cc->gpr[ngpr++] = (GPRArg)dp; +#endif + #define CCALL_HANDLE_COMPLEXRET \ /* Complex values are returned in 2 or 4 GPRs. */ \ cc->retref = 0; +#define CCALL_HANDLE_STRUCTARG + #define CCALL_HANDLE_COMPLEXRET2 \ - memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = cc->fpr[0]; \ + ((float *)dp)[1] = cc->fpr[1]; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0]; \ + ((double *)dp)[1] = cc->fpr[1]; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + isfp = 1; \ + if (d->size == sizeof(float) * 2) { \ + d = ctype_get(cts, CTID_COMPLEX_DOUBLE); \ + isf32 = 1; \ + } + +#define CCALL_HANDLE_REGARG \ + if (isfp && d->size == sizeof(float)) { \ + d = ctype_get(cts, CTID_DOUBLE); \ + isf32 = 1; \ + } \ + if (ngpr < maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n; \ + if (ngpr > maxgpr) { \ + nsp += ngpr - 8; \ + ngpr = 8; \ + if (nsp > CCALL_MAXSTACK) { \ + goto err_nyi; \ + } \ + } \ + goto done; \ + } + +#else + +#define CCALL_HANDLE_STRUCTRET \ + cc->retref = 1; /* Return all structs by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 2 or 4 GPRs. */ \ + cc->retref = 0; #define CCALL_HANDLE_STRUCTARG \ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ sz = CTSIZE_PTR; /* Pass all structs by reference. */ +#define CCALL_HANDLE_COMPLEXRET2 \ + memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ + #define CCALL_HANDLE_COMPLEXARG \ /* Pass complex by value in 2 or 4 GPRs. */ @@ -419,6 +495,8 @@ } #endif +#endif + #if !LJ_ABI_SOFTFP #define CCALL_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ @@ -846,6 +924,50 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct) #endif +/* -- PowerPC64 ELFv2 ABI struct classification ------------------- */ + +#if LJ_ARCH_PPC_ELFV2 + +#define FTYPE_FLOAT 1 +#define FTYPE_DOUBLE 2 + +static unsigned int ccall_classify_fp(CTState *cts, CType *ct) { + if (ctype_isfp(ct->info)) { + if (ct->size == sizeof(float)) + return FTYPE_FLOAT; + else + return FTYPE_DOUBLE; + } else if (ctype_iscomplex(ct->info)) { + if (ct->size == sizeof(float) * 2) + return FTYPE_FLOAT; + else + return FTYPE_DOUBLE; + } else if (ctype_isstruct(ct->info)) { + int res = -1; + int sz = ct->size; + while (ct->sib) { + ct = ctype_get(cts, ct->sib); + if (ctype_isfield(ct->info)) { + int sub = ccall_classify_fp(cts, ctype_rawchild(cts, ct)); + if (res == -1) + res = sub; + if (sub != -1 && sub != res) + return 0; + } else if (ctype_isbitfield(ct->info) || + ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + return 0; + } + } + if (res > 0 && sz > res * 4 * 8) + return 0; + return res; + } else { + return 0; + } +} + +#endif + /* -- MIPS64 ABI struct classification ---------------------------- */ #if LJ_TARGET_MIPS64 @@ -1020,6 +1142,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CTSize sz; MSize n, isfp = 0, isva = 0; void *dp, *rp = NULL; +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + int isf32 = 0; +#endif #if LJ_TARGET_S390X uint32_t onstack = 0; @@ -1083,7 +1208,37 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(void **)dp = rp; dp = rp; } +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 && LJ_BE + if (ctype_isstruct(d->info) && sz < CTSIZE_PTR) { + dp = (char *)dp + (CTSIZE_PTR - sz); + } +#endif lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + if (isfp) { + int i; + for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++) + cc->fpr[nfpr++] = ((double *)dp)[i]; + } + if (isf32) { + int i; + for (i = 0; i < d->size / 8; i++) + ((float *)dp)[i*2] = ((double *)dp)[i]; + } +#endif +#if LJ_ARCH_PPC_ELFV2 + if (ctype_isstruct(d->info)) { + isfp = ccall_classify_fp(cts, d); + int i; + if (isfp == FTYPE_FLOAT) { + for (i = 0; i < d->size / 4 && nfpr < CCALL_NARG_FPR; i++) + cc->fpr[nfpr++] = ((float *)dp)[i]; + } else if (isfp == FTYPE_DOUBLE) { + for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++) + cc->fpr[nfpr++] = ((double *)dp)[i]; + } + } +#endif /* Extend passed integers to 32 bits at least. */ if (ctype_isinteger_or_bool(d->info) && d->size < 4) { if (d->info & CTF_UNSIGNED) @@ -1097,6 +1252,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, if (isfp && d->size == sizeof(float)) ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ #endif +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)) + && d->size <= 4) { + if (d->info & CTF_UNSIGNED) + *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; + else + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } +#endif #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) #if LJ_TARGET_MIPS64 diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 93f8b91a0..68fc27b9a 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -86,10 +86,23 @@ typedef union FPRArg { #elif LJ_TARGET_PPC #define CCALL_NARG_GPR 8 +#if LJ_ARCH_BITS == 64 +#define CCALL_NARG_FPR 13 +#if LJ_ARCH_PPC_ELFV2 +#define CCALL_NRET_GPR 2 +#define CCALL_NRET_FPR 8 +#define CCALL_SPS_EXTRA 14 +#else +#define CCALL_NRET_GPR 1 +#define CCALL_NRET_FPR 2 +#define CCALL_SPS_EXTRA 16 +#endif +#else #define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8) #define CCALL_NRET_GPR 4 /* For complex double. */ #define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1) #define CCALL_SPS_EXTRA 4 +#endif #define CCALL_SPS_FREE 0 typedef intptr_t GPRArg; diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 300280185..9556732a1 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -61,8 +61,24 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #elif LJ_TARGET_PPC +#if LJ_ARCH_PPC_OPD + +#define CALLBACK_SLOT2OFS(slot) (24*(slot)) +#define CALLBACK_OFS2SLOT(ofs) ((ofs)/24) +#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) + +#elif LJ_ARCH_PPC_ELFV2 + +#define CALLBACK_SLOT2OFS(slot) (4*(slot)) +#define CALLBACK_OFS2SLOT(ofs) ((ofs)/4) +#define CALLBACK_MAX_SLOT (CALLBACK_MCODE_SIZE/4 - 10) + +#else + #define CALLBACK_MCODE_HEAD 24 +#endif + #elif LJ_TARGET_MIPS32 #define CALLBACK_MCODE_HEAD 20 @@ -188,24 +204,59 @@ static void callback_mcode_init(global_State *g, uint32_t *page) lua_assert(p - page <= CALLBACK_MCODE_SIZE); } #elif LJ_TARGET_PPC +#if LJ_ARCH_PPC_OPD +register void *vm_toc __asm__("r2"); +static void callback_mcode_init(global_State *g, uint64_t *page) +{ + uint64_t *p = page; + void *target = (void *)lj_vm_ffi_callback; + MSize slot; + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { + *p++ = (uint64_t)target; + *p++ = (uint64_t)vm_toc; + *p++ = (uint64_t)g | ((uint64_t)slot << 47); + } + lua_assert(p - page <= CALLBACK_MCODE_SIZE / 8); +} +#else static void callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; MSize slot; +#if LJ_ARCH_PPC_ELFV2 + // Needs to be in sync with lj_vm_ffi_callback. + lua_assert(CALLBACK_MCODE_SIZE == 4096); + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { + *p = PPCI_B | (((page+CALLBACK_MAX_SLOT-p) & 0x00ffffffu) << 2); + p++; + } + *p++ = PPCI_LI | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 32) & 0xffff); + *p++ = PPCI_LI | PPCF_T(RID_R11) | ((((intptr_t)g) >> 32) & 0xffff); + *p++ = PPCI_RLDICR | PPCF_T(RID_SYS1) | PPCF_A(RID_SYS1) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */ + *p++ = PPCI_RLDICR | PPCF_T(RID_R11) | PPCF_A(RID_R11) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */ + *p++ = PPCI_ORIS | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 16) & 0xffff); + *p++ = PPCI_ORIS | PPCF_A(RID_R11) | PPCF_T(RID_R11) | ((((intptr_t)g) >> 16) & 0xffff); + *p++ = PPCI_ORI | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | (((intptr_t)target) & 0xffff); + *p++ = PPCI_ORI | PPCF_A(RID_R11) | PPCF_T(RID_R11) | (((intptr_t)g) & 0xffff); + *p++ = PPCI_MTCTR | PPCF_T(RID_SYS1); + *p++ = PPCI_BCTR; +#else *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16); - *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16); + *p++ = PPCI_LIS | PPCF_T(RID_R11) | (u32ptr(g) >> 16); *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff); - *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff); + *p++ = PPCI_ORI | PPCF_A(RID_R11)|PPCF_T(RID_R11) | (u32ptr(g) & 0xffff); *p++ = PPCI_MTCTR | PPCF_T(RID_TMP); *p++ = PPCI_BCTR; for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p++ = PPCI_LI | PPCF_T(RID_R11) | slot; + *p++ = PPCI_LI | PPCF_T(RID_R12) | slot; *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); p++; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); +#endif + lua_assert(p - page <= CALLBACK_MCODE_SIZE / 4); } +#endif #elif LJ_TARGET_MIPS static void callback_mcode_init(global_State *g, uint32_t *page) { @@ -662,6 +713,15 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } +#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 + if (ctr->size <= 4 && + (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) { + if (ctr->info & CTF_UNSIGNED) + *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; + else + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } +#endif #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ if (ctr->size <= 4 && diff --git a/src/lj_ctype.h b/src/lj_ctype.h index 73cefef86..e59a703aa 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h @@ -153,7 +153,7 @@ typedef struct CType { /* Simplify target-specific configuration. Checked in lj_ccall.h. */ #define CCALL_MAX_GPR 8 -#define CCALL_MAX_FPR 8 +#define CCALL_MAX_FPR 14 typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg; diff --git a/src/lj_def.h b/src/lj_def.h index ac729e4bc..d0ef808c0 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -71,7 +71,11 @@ typedef unsigned int uintptr_t; #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ #define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */ +#if defined(__powerpc64__) && _CALL_ELF != 2 +#define LJ_NUM_CBPAGE 4 /* Number of FFI callback pages. */ +#else #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ +#endif /* Minimum table/buffer sizes. */ #define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */ diff --git a/src/lj_frame.h b/src/lj_frame.h index f78c8fcc2..17ea66e29 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -210,6 +210,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #define CFRAME_OFS_MULTRES 408 #define CFRAME_SIZE 384 #define CFRAME_SHIFT_MULTRES 3 +#elif LJ_ARCH_PPC_ELFV2 +#define CFRAME_OFS_ERRF 360 +#define CFRAME_OFS_NRES 356 +#define CFRAME_OFS_PREV 336 +#define CFRAME_OFS_L 352 +#define CFRAME_OFS_PC 348 +#define CFRAME_OFS_MULTRES 344 +#define CFRAME_SIZE 368 +#define CFRAME_SHIFT_MULTRES 3 #elif LJ_ARCH_PPC32ON64 #define CFRAME_OFS_ERRF 472 #define CFRAME_OFS_NRES 468 diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index c7d4c229f..e93016543 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h @@ -30,8 +30,13 @@ enum { /* Calling conventions. */ RID_RET = RID_R3, +#if LJ_LE + RID_RETHI = RID_R4, + RID_RETLO = RID_R3, +#else RID_RETHI = RID_R3, RID_RETLO = RID_R4, +#endif RID_FPRET = RID_F1, /* These definitions must match with the *.dasc file(s): */ @@ -131,6 +136,8 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) #define PPCF_C(r) ((r) << 6) #define PPCF_MB(n) ((n) << 6) #define PPCF_ME(n) ((n) << 1) +#define PPCF_SH(n) ((((n) & 31) << (11+1)) | (((n) & 32) >> (5-1))) +#define PPCF_M6(n) ((((n) & 31) << (5+1)) | (((n) & 32) << (11-5))) #define PPCF_Y 0x00200000 #define PPCF_DOT 0x00000001 @@ -200,6 +207,13 @@ typedef enum PPCIns { PPCI_RLWINM = 0x54000000, PPCI_RLWIMI = 0x50000000, + PPCI_RLDICL = 0x78000000, + PPCI_RLDICR = 0x78000004, + PPCI_RLDIC = 0x78000008, + PPCI_RLDIMI = 0x7800000c, + PPCI_RLDCL = 0x78000010, + PPCI_RLDCR = 0x78000012, + PPCI_B = 0x48000000, PPCI_BL = 0x48000001, PPCI_BC = 0x40800000, diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 4299e266d..0d2cae8b0 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -21,35 +21,40 @@ |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). |// Affects reg saves, stack layout, carry/overflow/dot flags etc. |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). -|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3). +|// OPD Need function descriptors (64 bit or 32 bit variant, e.g. PS3). |// Function pointers are really a struct: code, TOC, env (optional). -|// TOCENV Function pointers have an environment pointer, too (not on PS3). +|// OPDENV Function pointers have an environment pointer, too (not on PS3). +|// ELFV2 The 64-bit ELF V2 ABI is in use. |// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360). |// Must avoid (slow) micro-coded instructions. | |.if P64 -|.define TOC, 1 -|.define TOCENV, 1 |.macro lpx, a, b, c; ldx a, b, c; .endmacro |.macro lp, a, b; ld a, b; .endmacro |.macro stp, a, b; std a, b; .endmacro +|.macro stpx, a, b, c; stdx a, b, c; .endmacro |.define decode_OPP, decode_OP8 -|.if FFI -|// Missing: Calling conventions, 64 bit regs, TOC. -|.error lib_ffi not yet implemented for PPC64 -|.endif +|.define PSIZE, 8 |.else |.macro lpx, a, b, c; lwzx a, b, c; .endmacro |.macro lp, a, b; lwz a, b; .endmacro |.macro stp, a, b; stw a, b; .endmacro +|.macro stpx, a, b, c; stwx a, b, c; .endmacro |.define decode_OPP, decode_OP4 +|.define PSIZE, 4 |.endif | |// Convenience macros for TOC handling. -|.if TOC +|.if OPD or ELFV2 |// Linker needs a TOC patch area for every external call relocation. -|.macro blex, target; bl extern target@plt; nop; .endmacro +|.macro blex, target; bl extern target; nop; .endmacro |.macro .toc, a, b; a, b; .endmacro +|.else +|.macro blex, target; bl extern target@plt; .endmacro +|.macro .toc, a, b; .endmacro +|.endif +|.if OPD +|.macro .opd, a, b; a, b; .endmacro |.if P64 |.define TOC_OFS, 8 |.define ENV_OFS, 16 @@ -57,13 +62,13 @@ |.define TOC_OFS, 4 |.define ENV_OFS, 8 |.endif -|.else // No TOC. -|.macro blex, target; bl extern target@plt; .endmacro -|.macro .toc, a, b; .endmacro +|.else // No OPD. +|.macro .opd, a, b; .endmacro |.endif -|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro +|.macro .opdenv, a, b; .if OPDENV; a, b; .endif; .endmacro | |.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro +|.macro .elfv2, a, b; .if ELFV2; a, b; .endif; .endmacro | |.macro andix., y, a, i |.if PPE @@ -74,29 +79,6 @@ |.endif |.endmacro | -|.macro clrso, reg -|.if PPE -| li reg, 0 -| mtxer reg -|.else -| mcrxr cr0 -|.endif -|.endmacro -| -|.macro checkov, reg, noov -|.if PPE -| mfxer reg -| add reg, reg, reg -| cmpwi reg, 0 -| li reg, 0 -| mtxer reg -| bgey noov -|.else -| mcrxr cr0 -| bley noov -|.endif -|.endmacro -| |//----------------------------------------------------------------------- | |// Fixed register assignments for the interpreter. @@ -122,6 +104,7 @@ |.define LREG, r18 // Register holding lua_State (also in SAVE_L). |.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. |.define JGL, r31 // On-trace: global_State + 32768. +|.define BASEP4, r26 // Equal to BASE + 4 | |// Constants for type-comparisons, stores and conversions. C callee-save. |.define TISNUM, r22 @@ -158,6 +141,12 @@ |.if FPU |.define FARG1, f1 |.define FARG2, f2 +|.define FARG3, f3 +|.define FARG4, f4 +|.define FARG5, f5 +|.define FARG6, f6 +|.define FARG7, f7 +|.define FARG8, f8 |.endif | |.define CRET1, r3 @@ -165,6 +154,7 @@ | |.define TOCREG, r2 // TOC register (only used by C code). |.define ENVREG, r11 // Environment pointer (nested C functions). +|.define FUNCREG, r12 // ELFv2 function pointer (overlaps RD) | |// Stack layout while in interpreter. Must match with lj_frame.h. |.if GPR64 @@ -198,6 +188,49 @@ |.define TMPD, TMPD_HI |.define TONUM_D, TONUM_HI | +|.elif ELFV2 +| +|// 392(sp) // \ 32 bit C frame info. +|.define SAVE_LR, 384(sp) +|.define SAVE_CR, 376(sp) // 64 bit CR save. +|.define CFRAME_SPACE, 368 // Delta for sp. +|// Back chain for sp: 368(sp) <-- sp entering interpreter +|.define SAVE_ERRF, 360(sp) // | +|.define SAVE_NRES, 356(sp) // | +|.define SAVE_L, 352(sp) // > Parameter save area. +|.define SAVE_PC, 348(sp) // | +|.define SAVE_MULTRES, 344(sp) // | +|.define SAVE_CFRAME, 336(sp) // / 64 bit C frame chain. +|.define SAVE_FPR_, 192 // .. 192+18*8: 64 bit FPR saves. +|.define SAVE_GPR_, 48 // .. 48+18*8: 64 bit GPR saves. +|.if ENDIAN_LE +|.define TMPD_HI, 44(sp) +|.define TMPD_LO, 40(sp) +|.define TONUM_HI, 36(sp) +|.define TONUM_LO, 32(sp) +|.else +|.define TMPD_LO, 44(sp) +|.define TMPD_HI, 40(sp) +|.define TONUM_LO, 36(sp) +|.define TONUM_HI, 32(sp) +|.endif +|.define SAVE_TOC, 24(sp) // TOC save area. +|// Next frame lr: 16(sp) +|// Next frame cr: 8(sp) +|// Back chain for sp: 0(sp) <-- sp while in interpreter +| +|.if ENDIAN_LE +|.define TMPD_BLO, 32(sp) +|.define TMPD, TMPD_LO +|.define TONUM_D, TONUM_LO +|.else +|.define TMPD_BLO, 39(sp) +|.define TMPD, TMPD_HI +|.define TONUM_D, TONUM_HI +|.endif +| +|.define EXIT_OFFSET, 32 +| |.else | |// 508(sp) // \ 32 bit C frame info. @@ -208,23 +241,39 @@ |.define SAVE_MULTRES, 456(sp) // | |.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain. |.define SAVE_LR, 416(sp) +|.define SAVE_CR, 408(sp) // 64 bit CR save. |.define CFRAME_SPACE, 400 // Delta for sp. |// Back chain for sp: 400(sp) <-- sp entering interpreter |.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves. |.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves. |// 48(sp) // Callee parameter save area (ABI mandated). |.define SAVE_TOC, 40(sp) // TOC save area. +|.if ENDIAN_LE +|.define TMPD_HI, 36(sp) // \ Link editor temp (ABI mandated). +|.define TMPD_LO, 32(sp) // / +|.define TONUM_HI, 28(sp) // \ Compiler temp (ABI mandated). +|.define TONUM_LO, 24(sp) // / +|.else |.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated). |.define TMPD_HI, 32(sp) // / |.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated). |.define TONUM_HI, 24(sp) // / +|.endif |// Next frame lr: 16(sp) -|.define SAVE_CR, 8(sp) // 64 bit CR save. +|// Next frame cr: 8(sp) |// Back chain for sp: 0(sp) <-- sp while in interpreter | +|.if ENDIAN_LE +|.define TMPD_BLO, 32(sp) +|.define TMPD, TMPD_LO +|.define TONUM_D, TONUM_LO +|.else |.define TMPD_BLO, 39(sp) |.define TMPD, TMPD_HI |.define TONUM_D, TONUM_HI +|.endif +| +|.define EXIT_OFFSET, 112 | |.endif |.else @@ -249,10 +298,17 @@ |.define SAVE_MULTRES, 28(sp) |.define UNUSED1, 24(sp) |.if FPU +|.if ENDIAN_LE +|.define TMPD_HI, 20(sp) +|.define TMPD_LO, 16(sp) +|.define TONUM_HI, 12(sp) +|.define TONUM_LO, 8(sp) +|.else |.define TMPD_LO, 20(sp) |.define TMPD_HI, 16(sp) |.define TONUM_LO, 12(sp) |.define TONUM_HI, 8(sp) +|.endif |.else |.define SFSAVE_4, 20(sp) |.define SFSAVE_3, 16(sp) @@ -263,10 +319,22 @@ |// Back chain for sp: 0(sp) <-- sp while in interpreter | |.if FPU +|.if ENDIAN_LE +|.define TMPD_BLO, 16(sp) +|.define TMPD, TMPD_LO +|.define TONUM_D, TONUM_LO +|.else |.define TMPD_BLO, 23(sp) |.define TMPD, TMPD_HI |.define TONUM_D, TONUM_HI |.endif +|.else +|.define TMPD_BLO, 23(sp) +|.define TMPD, TMPD_HI +|.define TONUM_D, TONUM_HI +|.endif +| +|.define EXIT_OFFSET, 16 | |.endif | @@ -383,8 +451,35 @@ |//----------------------------------------------------------------------- | |// Access to frame relative to BASE. +|.if ENDIAN_LE +|.define FRAME_PC, -4 +|.define FRAME_FUNC, -8 +|.define FRAME_CONTPC, -12 +|.define FRAME_CONTRET, -16 +|.define WORD_LO, 0 +|.define WORD_HI, 4 +|.define WORD_BLO, 0 +|.define BASE_LO, BASE +|.define BASE_HI, BASEP4 +|.macro lwzux2, hi, lo, base, idx +| lwzux lo, base, idx +| lwz hi, 4(base) +|.endmacro +|.else |.define FRAME_PC, -8 |.define FRAME_FUNC, -4 +|.define FRAME_CONTPC, -16 +|.define FRAME_CONTRET, -12 +|.define WORD_LO, 4 +|.define WORD_HI, 0 +|.define WORD_BLO, 7 +|.define BASE_LO, BASEP4 +|.define BASE_HI, BASE +|.macro lwzux2, hi, lo, base, idx +| lwzux hi, base, idx +| lwz lo, 4(base) +|.endmacro +|.endif | |// Instruction decode. |.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro @@ -445,6 +540,7 @@ |// Call decode and dispatch. |.macro ins_callt | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC +| addi BASEP4, BASE, 4 | lwz PC, LFUNC:RB->pc | lwz INS, 0(PC) | addi PC, PC, 4 @@ -537,7 +633,12 @@ static void build_subroutines(BuildCtx *ctx) | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame. | mr BASE, TMP2 // Restore caller base. | // Prepending may overwrite the pcall frame, so do it at the end. - | stwu TMP1, FRAME_PC(RA) // Prepend true to results. + | .if ENDIAN_LE + | addi RA, RA, -8 + | stw TMP1, WORD_HI(RA) // Prepend true to results. + | .else + | stwu TMP1, -8(RA) // Prepend true to results. + | .endif | |->vm_returnc: | addi RD, RD, 8 // RD = (nresults+1)*8. @@ -603,7 +704,7 @@ static void build_subroutines(BuildCtx *ctx) | lwz TMP1, L->maxstack | cmplw BASE, TMP1 | bge >8 - | stw TISNIL, 0(BASE) + | stw TISNIL, WORD_HI(BASE) | addi RD, RD, 8 | addi BASE, BASE, 8 | b <2 @@ -654,7 +755,12 @@ static void build_subroutines(BuildCtx *ctx) |->vm_unwind_ff_eh: // Landing pad for external unwinder. | lwz L, SAVE_L | .toc ld TOCREG, SAVE_TOC + |.if P64 + | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants. + | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff + |.else | li TISNUM, LJ_TISNUM // Setup type comparison constants. + |.endif | lp BASE, L->base | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | lwz DISPATCH, L->glref // Setup pointer to dispatch table. @@ -669,7 +775,7 @@ static void build_subroutines(BuildCtx *ctx) | la RA, -8(BASE) // Results start at BASE-8. | .FPU stw TMP3, TMPD | addi DISPATCH, DISPATCH, GG_G2DISP - | stw TMP1, 0(RA) // Prepend false to error message. + | stw TMP1, WORD_HI(RA) // Prepend false to error message. | li RD, 16 // 2 results: false + error message. | st_vmstate | .FPU lfs TONUM, TMPD @@ -730,7 +836,12 @@ static void build_subroutines(BuildCtx *ctx) | stw L, DISPATCH_GL(cur_L)(DISPATCH) | mr RA, BASE | lp BASE, L->base + |.if P64 + | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants. + | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff + |.else | li TISNUM, LJ_TISNUM // Setup type comparison constants. + |.endif | lp TMP1, L->top | lwz PC, FRAME_PC(BASE) | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). @@ -780,7 +891,12 @@ static void build_subroutines(BuildCtx *ctx) |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | stw L, DISPATCH_GL(cur_L)(DISPATCH) | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). + |.if P64 + | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants. + | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff + |.else | li TISNUM, LJ_TISNUM // Setup type comparison constants. + |.endif | lp TMP1, L->top | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | add PC, PC, BASE @@ -800,8 +916,8 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_call_dispatch: | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC - | lwz TMP0, FRAME_PC(BASE) - | lwz LFUNC:RB, FRAME_FUNC(BASE) + | lwz TMP0, WORD_HI-8(BASE) + | lwz LFUNC:RB, WORD_LO-8(BASE) | checkfunc TMP0; bne ->vmeta_call | |->vm_call_dispatch_f: @@ -820,7 +936,9 @@ static void build_subroutines(BuildCtx *ctx) | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). | lp TMP1, L->cframe | addi DISPATCH, DISPATCH, GG_G2DISP - | .toc lp CARG4, 0(CARG4) + | .opd lp TOCREG, TOC_OFS(CARG4) + | .opdenv lp ENVREG, ENV_OFS(CARG4) + | .opd lp CARG4, 0(CARG4) | li TMP2, 0 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. | stw TMP2, SAVE_ERRF // No error function. @@ -828,7 +946,9 @@ static void build_subroutines(BuildCtx *ctx) | stp sp, L->cframe // Add our C frame to cframe chain. | stw L, DISPATCH_GL(cur_L)(DISPATCH) | mtctr CARG4 + | .elfv2 mr FUNCREG, CARG4 | bctrl // (lua_State *L, lua_CFunction func, void *ud) + | .toc lp TOCREG, SAVE_TOC |.if PPE | mr BASE, CRET1 | cmpwi CRET1, 0 @@ -850,20 +970,27 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_dispatch: | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 - | lwz TMP0, -12(BASE) // Continuation. + | lwz TMP0, FRAME_CONTRET(BASE) // Continuation. | mr RB, BASE | mr BASE, TMP2 // Restore caller BASE. | lwz LFUNC:TMP1, FRAME_FUNC(TMP2) |.if FFI | cmplwi TMP0, 1 |.endif - | lwz PC, -16(RB) // Restore PC from [cont|PC]. - | subi TMP2, RD, 8 + | lwz PC, FRAME_CONTPC(RB) // Restore PC from [cont|PC]. + | addi BASEP4, BASE, 4 + | addi TMP2, RD, WORD_HI-8 | lwz TMP1, LFUNC:TMP1->pc | stwx TISNIL, RA, TMP2 // Ensure one valid arg. + |.if P64 + | ld TMP3, 0(DISPATCH) + |.endif |.if FFI | ble >1 |.endif + |.if P64 + | add TMP0, TMP0, TMP3 + |.endif | lwz KBASE, PC2PROTO(k)(TMP1) | // BASE = base, RA = resultptr, RB = meta base | mtctr TMP0 @@ -914,20 +1041,20 @@ static void build_subroutines(BuildCtx *ctx) | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | li TMP0, LJ_TSTR | decode_RB8 RB, INS - | stw STR:RC, 4(CARG3) + | stw STR:RC, WORD_LO(CARG3) | add CARG2, BASE, RB - | stw TMP0, 0(CARG3) + | stw TMP0, WORD_HI(CARG3) | b >1 | |->vmeta_tgets: | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) | li TMP0, LJ_TTAB - | stw TAB:RB, 4(CARG2) + | stw TAB:RB, WORD_LO(CARG2) | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) - | stw TMP0, 0(CARG2) + | stw TMP0, WORD_HI(CARG2) | li TMP1, LJ_TSTR - | stw STR:RC, 4(CARG3) - | stw TMP1, 0(CARG3) + | stw STR:RC, WORD_LO(CARG3) + | stw TMP1, WORD_HI(CARG3) | b >1 | |->vmeta_tgetb: // TMP0 = index @@ -938,8 +1065,8 @@ static void build_subroutines(BuildCtx *ctx) | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | add CARG2, BASE, RB |.if DUALNUM - | stw TISNUM, 0(CARG3) - | stw TMP0, 4(CARG3) + | stw TISNUM, WORD_HI(CARG3) + | stw TMP0, WORD_LO(CARG3) |.else | stfd f0, 0(CARG3) |.endif @@ -977,7 +1104,7 @@ static void build_subroutines(BuildCtx *ctx) | // BASE = base, L->top = new base, stack = cont/func/t/k | subfic TMP1, BASE, FRAME_CONT | lp BASE, L->top - | stw PC, -16(BASE) // [cont|PC] + | stw PC, FRAME_CONTPC(BASE) // [cont|PC] | add PC, TMP1, BASE | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | li NARGS8:RC, 16 // 2 args for func(t, k). @@ -996,7 +1123,10 @@ static void build_subroutines(BuildCtx *ctx) |.endif | b ->BC_TGETR_Z |1: - | stwx TISNIL, BASE, RA + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif + | stwx TISNIL, BASE_HI, RA | b ->cont_nop | |//----------------------------------------------------------------------- @@ -1005,20 +1135,20 @@ static void build_subroutines(BuildCtx *ctx) | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | li TMP0, LJ_TSTR | decode_RB8 RB, INS - | stw STR:RC, 4(CARG3) + | stw STR:RC, WORD_LO(CARG3) | add CARG2, BASE, RB - | stw TMP0, 0(CARG3) + | stw TMP0, WORD_HI(CARG3) | b >1 | |->vmeta_tsets: | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) | li TMP0, LJ_TTAB - | stw TAB:RB, 4(CARG2) + | stw TAB:RB, WORD_LO(CARG2) | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) - | stw TMP0, 0(CARG2) + | stw TMP0, WORD_HI(CARG2) | li TMP1, LJ_TSTR - | stw STR:RC, 4(CARG3) - | stw TMP1, 0(CARG3) + | stw STR:RC, WORD_LO(CARG3) + | stw TMP1, WORD_HI(CARG3) | b >1 | |->vmeta_tsetb: // TMP0 = index @@ -1029,8 +1159,8 @@ static void build_subroutines(BuildCtx *ctx) | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | add CARG2, BASE, RB |.if DUALNUM - | stw TISNUM, 0(CARG3) - | stw TMP0, 4(CARG3) + | stw TISNUM, WORD_HI(CARG3) + | stw TMP0, WORD_LO(CARG3) |.else | stfd f0, 0(CARG3) |.endif @@ -1069,7 +1199,7 @@ static void build_subroutines(BuildCtx *ctx) | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) | subfic TMP1, BASE, FRAME_CONT | lp BASE, L->top - | stw PC, -16(BASE) // [cont|PC] + | stw PC, FRAME_CONTPC(BASE) // [cont|PC] | add PC, TMP1, BASE | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | li NARGS8:RC, 24 // 3 args for func(t, k, v) @@ -1099,17 +1229,9 @@ static void build_subroutines(BuildCtx *ctx) |->vmeta_comp: | mr CARG1, L | subi PC, PC, 4 - |.if DUALNUM - | mr CARG2, RA - |.else | add CARG2, BASE, RA - |.endif | stw PC, SAVE_PC - |.if DUALNUM - | mr CARG3, RD - |.else | add CARG3, BASE, RD - |.endif | stp BASE, L->base | decode_OP1 CARG4, INS | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) @@ -1146,7 +1268,7 @@ static void build_subroutines(BuildCtx *ctx) | b ->cont_nop | |->cont_condt: // RA = resultptr - | lwz TMP0, 0(RA) + | lwz TMP0, WORD_HI(RA) | .gpr64 extsw TMP0, TMP0 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true. | subfe CRET1, CRET1, CRET1 @@ -1154,7 +1276,7 @@ static void build_subroutines(BuildCtx *ctx) | b <4 | |->cont_condf: // RA = resultptr - | lwz TMP0, 0(RA) + | lwz TMP0, WORD_HI(RA) | .gpr64 extsw TMP0, TMP0 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false. | subfe CRET1, CRET1, CRET1 @@ -1206,8 +1328,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vmeta_unm: - | mr CARG3, RD - | mr CARG4, RD + | add CARG3, BASE, RD + | add CARG4, BASE, RD | b >1 | |->vmeta_arith_vn: @@ -1242,7 +1364,7 @@ static void build_subroutines(BuildCtx *ctx) |->vmeta_binop: | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 | sub TMP1, CRET1, BASE - | stw PC, -16(CRET1) // [cont|PC] + | stw PC, FRAME_CONTPC(CRET1) // [cont|PC] | mr TMP2, BASE | addi PC, TMP1, FRAME_CONT | mr BASE, CRET1 @@ -1253,7 +1375,7 @@ static void build_subroutines(BuildCtx *ctx) #if LJ_52 | mr SAVE0, CARG1 #endif - | mr CARG2, RD + | add CARG2, BASE, RD | stp BASE, L->base | mr CARG1, L | stw PC, SAVE_PC @@ -1330,25 +1452,25 @@ static void build_subroutines(BuildCtx *ctx) |.macro .ffunc_1, name |->ff_ .. name: | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz CARG1, 4(BASE) + | lwz CARG3, WORD_HI(BASE) + | lwz CARG1, WORD_LO(BASE) | blt ->fff_fallback |.endmacro | |.macro .ffunc_2, name |->ff_ .. name: | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lwz CARG4, 8(BASE) - | lwz CARG1, 4(BASE) - | lwz CARG2, 12(BASE) + | lwz CARG3, WORD_HI(BASE) + | lwz CARG4, WORD_HI+8(BASE) + | lwz CARG1, WORD_LO(BASE) + | lwz CARG2, WORD_LO+8(BASE) | blt ->fff_fallback |.endmacro | |.macro .ffunc_n, name |->ff_ .. name: | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) + | lwz CARG1, WORD_HI(BASE) |.if FPU | lfd FARG1, 0(BASE) |.else @@ -1361,15 +1483,15 @@ static void build_subroutines(BuildCtx *ctx) |.macro .ffunc_nn, name |->ff_ .. name: | cmplwi NARGS8:RC, 16 - | lwz CARG1, 0(BASE) + | lwz CARG1, WORD_HI(BASE) |.if FPU | lfd FARG1, 0(BASE) - | lwz CARG3, 8(BASE) + | lwz CARG3, WORD_HI+8(BASE) | lfd FARG2, 8(BASE) |.else - | lwz CARG2, 4(BASE) - | lwz CARG3, 8(BASE) - | lwz CARG4, 12(BASE) + | lwz CARG2, WORD_LO(BASE) + | lwz CARG3, WORD_HI+8(BASE) + | lwz CARG4, WORD_LO+8(BASE) |.endif | blt ->fff_fallback | checknum CARG1; bge ->fff_fallback @@ -1392,17 +1514,17 @@ static void build_subroutines(BuildCtx *ctx) | cmplw cr1, CARG3, TMP1 | lwz PC, FRAME_PC(BASE) | bge cr1, ->fff_fallback - | stw CARG3, 0(RA) + | stw CARG3, WORD_HI(RA) | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. | addi TMP1, BASE, 8 | add TMP2, RA, NARGS8:RC - | stw CARG1, 4(RA) + | stw CARG1, WORD_LO(RA) | beq ->fff_res // Done if exactly 1 argument. |1: | cmplw TMP1, TMP2 |.if FPU | lfd f0, 0(TMP1) - | stfd f0, 0(TMP1) + | stfd f0, -8(TMP1) |.else | lwz CARG1, 0(TMP1) | lwz CARG2, 4(TMP1) @@ -1415,14 +1537,28 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc type | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) + | lwz CARG1, WORD_HI(BASE) | blt ->fff_fallback | .gpr64 extsw CARG1, CARG1 + |.if P64 + | li TMP0, LJ_TNUMX + | srawi TMP3, CARG1, 15 + | subfc TMP1, TMP0, CARG1 + |.else | subfc TMP0, TISNUM, CARG1 - | subfe TMP2, CARG1, CARG1 + |.endif + | subfe TMP2, CARG1, CARG1 + |.if P64 + | cmpwi TMP3, -2 + | orc TMP1, TMP2, TMP1 + | subf TMP1, TMP0, TMP1 + | beq >1 + |.else | orc TMP1, TMP2, TMP0 - | addi TMP1, TMP1, ~LJ_TISNUM+1 + | subf TMP1, TISNUM, TMP1 + |.endif | slwi TMP1, TMP1, 3 + |2: |.if FPU | la TMP2, CFUNC:RB->upvalue | lfdx FARG1, TMP2, TMP1 @@ -1432,6 +1568,11 @@ static void build_subroutines(BuildCtx *ctx) | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo |.endif | b ->fff_resn + |.if P64 + |1: + | li TMP1, ~LJ_TLIGHTUD<<3 + | b <2 + |.endif | |//-- Base library: getters and setters --------------------------------- | @@ -1454,10 +1595,10 @@ static void build_subroutines(BuildCtx *ctx) | sub TMP1, TMP0, TMP1 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) |3: // Rearranged logic, because we expect _not_ to find the key. - | lwz CARG4, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) + | lwz CARG4, WORD_HI+offsetof(Node, key)(NODE:TMP2) + | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2) + | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2) + | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2) | checkstr CARG4; bne >4 | cmpw TMP0, STR:RC; beq >5 |4: @@ -1475,14 +1616,33 @@ static void build_subroutines(BuildCtx *ctx) |6: | cmpwi CARG3, LJ_TUDATA; beq <1 | .gpr64 extsw CARG3, CARG3 + |.if P64 + | li TMP0, LJ_TNUMX + | srawi TMP3, CARG3, 15 + | subfc TMP1, TMP0, CARG3 + |.else | subfc TMP0, TISNUM, CARG3 + |.endif | subfe TMP2, CARG3, CARG3 + |.if P64 + | cmpwi TMP3, -2 + | orc TMP1, TMP2, TMP1 + | subf TMP1, TMP0, TMP1 + | beq >7 + |.else | orc TMP1, TMP2, TMP0 - | addi TMP1, TMP1, ~LJ_TISNUM+1 + | subf TMP1, TISNUM, TMP1 + |.endif | slwi TMP1, TMP1, 2 + |8: | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) | lwzx TAB:CARG1, TMP2, TMP1 | b <2 + |.if P64 + |7: + | li TMP1, ~LJ_TLIGHTUD<<2 + | b <8 + |.endif | |.ffunc_2 setmetatable | // Fast path: no mt for table yet and not clearing the mt. @@ -1500,8 +1660,8 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc rawget | cmplwi NARGS8:RC, 16 - | lwz CARG4, 0(BASE) - | lwz TAB:CARG2, 4(BASE) + | lwz CARG4, WORD_HI(BASE) + | lwz TAB:CARG2, WORD_LO(BASE) | blt ->fff_fallback | checktab CARG4; bne ->fff_fallback | la CARG3, 8(BASE) @@ -1521,11 +1681,11 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc tonumber | // Only handles the number case inline (without a base argument). | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) + | lwz CARG1, WORD_HI(BASE) |.if FPU | lfd FARG1, 0(BASE) |.else - | lwz CARG2, 4(BASE) + | lwz CARG2, WORD_LO(BASE) |.endif | bne ->fff_fallback // Exactly one argument. | checknum CARG1; bgt ->fff_fallback @@ -1560,10 +1720,15 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc next | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | lwz TAB:CARG2, 4(BASE) + | lwz CARG1, WORD_HI(BASE) + | lwz TAB:CARG2, WORD_LO(BASE) | blt ->fff_fallback + |.if ENDIAN_LE + | add TMP1, BASE, NARGS8:RC + | stw TISNIL, WORD_HI(TMP1) // Set missing 2nd arg to nil. + |.else | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. + |.endif | checktab CARG1 | lwz PC, FRAME_PC(BASE) | bne ->fff_fallback @@ -1620,7 +1785,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | la RA, -8(BASE) #endif - | stw TISNIL, 8(BASE) + | stw TISNIL, 8+WORD_HI(BASE) | li RD, (3+1)*8 |.if FPU | stfd f0, 0(RA) @@ -1632,11 +1797,11 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc ipairs_aux | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lwz TAB:CARG1, 4(BASE) - | lwz CARG4, 8(BASE) + | lwz CARG3, WORD_HI(BASE) + | lwz TAB:CARG1, WORD_LO(BASE) + | lwz CARG4, 8+WORD_HI(BASE) |.if DUALNUM - | lwz TMP2, 12(BASE) + | lwz TMP2, 8+WORD_LO(BASE) |.else | lfd FARG2, 8(BASE) |.endif @@ -1665,20 +1830,20 @@ static void build_subroutines(BuildCtx *ctx) | la RA, -8(BASE) | cmplw TMP0, TMP2 |.if DUALNUM - | stw TISNUM, 0(RA) + | stw TISNUM, WORD_HI(RA) | slwi TMP3, TMP2, 3 - | stw TMP2, 4(RA) + | stw TMP2, WORD_LO(RA) |.else | slwi TMP3, TMP2, 3 | stfd FARG2, 0(RA) |.endif | ble >2 // Not in array part? |.if FPU - | lwzx TMP2, TMP1, TMP3 - | lfdx f0, TMP1, TMP3 + | lfdux f0, TMP1, TMP3 + | lwz TMP2, WORD_HI(TMP1) |.else | lwzux TMP2, TMP1, TMP3 - | lwz TMP3, 4(TMP1) + | lwz TMP3, WORD_HI(TMP1) |.endif |1: | checknil TMP2 @@ -1703,7 +1868,7 @@ static void build_subroutines(BuildCtx *ctx) | cmplwi CRET1, 0 | li RD, (0+1)*8 | beq ->fff_res - | lwz TMP2, 0(CRET1) + | lwz TMP2, WORD_HI(CRET1) |.if FPU | lfd f0, 0(CRET1) |.else @@ -1736,11 +1901,11 @@ static void build_subroutines(BuildCtx *ctx) | la RA, -8(BASE) #endif |.if DUALNUM - | stw TISNUM, 8(BASE) + | stw TISNUM, 8+WORD_HI(BASE) |.else - | stw ZERO, 8(BASE) + | stw ZERO, 8+WORD_HI(BASE) |.endif - | stw ZERO, 12(BASE) + | stw ZERO, 8+WORD_LO(BASE) | li RD, (3+1)*8 |.if FPU | stfd f0, 0(RA) @@ -1766,7 +1931,7 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc xpcall | cmplwi NARGS8:RC, 16 - | lwz CARG3, 8(BASE) + | lwz CARG3, 8+WORD_HI(BASE) |.if FPU | lfd FARG2, 8(BASE) | lfd FARG1, 0(BASE) @@ -1897,7 +2062,7 @@ static void build_subroutines(BuildCtx *ctx) |.if resume | li TMP1, LJ_TTRUE | la RA, -8(BASE) - | stw TMP1, -8(BASE) // Prepend true to results. + | stw TMP1, WORD_HI-8(BASE) // Prepend true to results. | addi RD, RD, 16 |.else | mr RA, BASE @@ -1922,7 +2087,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. | li RD, (2+1)*8 - | stw TMP1, -8(BASE) // Prepend false to results. + | stw TMP1, WORD_HI-8(BASE) // Prepend false to results. | la RA, -8(BASE) |.if FPU | stfd f0, 0(BASE) // Copy error message. @@ -1980,8 +2145,8 @@ static void build_subroutines(BuildCtx *ctx) |->fff_resi: | lwz PC, FRAME_PC(BASE) | la RA, -8(BASE) - | stw TISNUM, -8(BASE) - | stw CRET1, -4(BASE) + | stw TISNUM, WORD_HI-8(BASE) + | stw CRET1, WORD_LO-8(BASE) | b ->fff_res1 |1: | lus CARG3, 0x41e0 // 2^31. @@ -1996,9 +2161,9 @@ static void build_subroutines(BuildCtx *ctx) |->fff_restv: | // CARG3/CARG1 = TValue result. | lwz PC, FRAME_PC(BASE) - | stw CARG3, -8(BASE) + | stw CARG3, WORD_HI-8(BASE) | la RA, -8(BASE) - | stw CARG1, -4(BASE) + | stw CARG1, WORD_LO-8(BASE) |->fff_res1: | // RA = results, PC = return. | li RD, (1+1)*8 @@ -2016,10 +2181,11 @@ static void build_subroutines(BuildCtx *ctx) | ins_next1 | // Adjust BASE. KBASE is assumed to be set for the calling frame. | sub BASE, RA, TMP0 + | addi BASEP4, BASE, 4 | ins_next2 | |6: // Fill up results with nil. - | subi TMP1, RD, 8 + | addi TMP1, RD, WORD_HI-8 | addi RD, RD, 8 | stwx TISNIL, RA, TMP1 | b <5 @@ -2137,7 +2303,7 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc math_log | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) + | lwz CARG1, WORD_HI(BASE) | bne ->fff_fallback // Need exactly 1 argument. | checknum CARG1; bge ->fff_fallback |.if FPU @@ -2166,20 +2332,20 @@ static void build_subroutines(BuildCtx *ctx) |.if DUALNUM |.ffunc math_ldexp | cmplwi NARGS8:RC, 16 - | lwz TMP0, 0(BASE) + | lwz TMP0, WORD_HI(BASE) |.if FPU | lfd FARG1, 0(BASE) |.else - | lwz CARG1, 0(BASE) - | lwz CARG2, 4(BASE) + | lwz CARG1, WORD_HI(BASE) + | lwz CARG2, WORD_LO(BASE) |.endif - | lwz TMP1, 8(BASE) + | lwz TMP1, WORD_HI+8(BASE) |.if GPR64 - | lwz CARG2, 12(BASE) + | lwz CARG2, WORD_LO+8(BASE) |.elif FPU - | lwz CARG1, 12(BASE) + | lwz CARG1, WORD_LO+8(BASE) |.else - | lwz CARG3, 12(BASE) + | lwz CARG3, WORD_LO+8(BASE) |.endif | blt ->fff_fallback | checknum TMP0; bge ->fff_fallback @@ -2218,8 +2384,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif | li RD, (2+1)*8 |.if DUALNUM - | stw TISNUM, 8(RA) - | stw TMP1, 12(RA) + | stw TISNUM, WORD_HI+8(RA) + | stw TMP1, WORD_LO+8(RA) |.else | stfd FARG2, 8(RA) |.endif @@ -2253,9 +2419,9 @@ static void build_subroutines(BuildCtx *ctx) | add SAVE1, BASE, NARGS8:RC | bne >4 |1: // Handle integers. - | lwz CARG4, 0(SAVE0) + | lwz CARG4, WORD_HI(SAVE0) | cmplw cr1, SAVE0, SAVE1 - | lwz CARG2, 4(SAVE0) + | lwz CARG2, WORD_LO(SAVE0) | bge cr1, ->fff_resi | checknum CARG4 | xoris TMP0, CARG1, 0x8000 @@ -2296,7 +2462,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | bge ->fff_fallback |5: // Handle numbers. - | lwz CARG3, 0(SAVE0) + | lwz CARG3, WORD_HI(SAVE0) | cmplw cr1, SAVE0, SAVE1 |.if FPU | lfd FARG2, 0(SAVE0) @@ -2335,7 +2501,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | b <5 |7: // Convert integer to number and continue above. - | lwz CARG3, 4(SAVE0) + | lwz CARG3, WORD_LO(SAVE0) | bne ->fff_fallback |.if FPU | tonum_i FARG2, CARG3 @@ -2347,7 +2513,12 @@ static void build_subroutines(BuildCtx *ctx) | .ffunc_n name | li TMP1, 8 |1: + |.if ENDIAN_LE + | add CARG2, BASE, TMP1 + | lwz CARG2, WORD_HI(CARG2) + |.else | lwzx CARG2, BASE, TMP1 + |.endif | lfdx FARG2, BASE, TMP1 | cmplw cr1, TMP1, NARGS8:RC | checknum CARG2 @@ -2371,8 +2542,8 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc string_byte // Only handle the 1-arg case here. | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz STR:CARG1, 4(BASE) + | lwz CARG3, WORD_HI(BASE) + | lwz STR:CARG1, WORD_LO(BASE) | bne ->fff_fallback // Need exactly 1 argument. | checkstr CARG3 | bne ->fff_fallback @@ -2403,12 +2574,12 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc string_char // Only handle the 1-arg case here. | ffgccheck | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) + | lwz CARG3, WORD_HI(BASE) |.if DUALNUM - | lwz TMP0, 4(BASE) + | lwz TMP0, WORD_LO(BASE) | bne ->fff_fallback // Exactly 1 argument. | checknum CARG3; bne ->fff_fallback - | la CARG2, 7(BASE) + | la CARG2, WORD_BLO(BASE) |.else | lfd FARG1, 0(BASE) | bne ->fff_fallback // Exactly 1 argument. @@ -2432,16 +2603,16 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc string_sub | ffgccheck | cmplwi NARGS8:RC, 16 - | lwz CARG3, 16(BASE) + | lwz CARG3, WORD_HI+16(BASE) |.if not DUALNUM | lfd f0, 16(BASE) |.endif - | lwz TMP0, 0(BASE) - | lwz STR:CARG1, 4(BASE) + | lwz TMP0, WORD_HI(BASE) + | lwz STR:CARG1, WORD_LO(BASE) | blt ->fff_fallback - | lwz CARG2, 8(BASE) + | lwz CARG2, WORD_HI+8(BASE) |.if DUALNUM - | lwz TMP1, 12(BASE) + | lwz TMP1, WORD_LO+8(BASE) |.else | lfd f1, 8(BASE) |.endif @@ -2449,7 +2620,7 @@ static void build_subroutines(BuildCtx *ctx) | beq >1 |.if DUALNUM | checknum CARG3 - | lwz TMP2, 20(BASE) + | lwz TMP2, WORD_LO+16(BASE) | bne ->fff_fallback |1: | checknum CARG2; bne ->fff_fallback @@ -2505,8 +2676,8 @@ static void build_subroutines(BuildCtx *ctx) | .ffunc string_ .. name | ffgccheck | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz STR:CARG2, 4(BASE) + | lwz CARG3, WORD_HI(BASE) + | lwz STR:CARG2, WORD_LO(BASE) | blt ->fff_fallback | checkstr CARG3 | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) @@ -2544,10 +2715,10 @@ static void build_subroutines(BuildCtx *ctx) | addi SAVE0, BASE, 8 | add SAVE1, BASE, NARGS8:RC |1: - | lwz CARG4, 0(SAVE0) + | lwz CARG4, WORD_HI(SAVE0) | cmplw cr1, SAVE0, SAVE1 |.if DUALNUM - | lwz CARG2, 4(SAVE0) + | lwz CARG2, WORD_LO(SAVE0) |.else | lfd FARG1, 0(SAVE0) |.endif @@ -2714,20 +2885,23 @@ static void build_subroutines(BuildCtx *ctx) | |->fff_fallback: // Call fast function fallback handler. | // BASE = new base, RB = CFUNC, RC = nargs*8 - | lp TMP3, CFUNC:RB->f + | lp FUNCREG, CFUNC:RB->f | add TMP1, BASE, NARGS8:RC | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC. | addi TMP0, TMP1, 8*LUA_MINSTACK | lwz TMP2, L->maxstack | stw PC, SAVE_PC // Redundant (but a defined value). - | .toc lp TMP3, 0(TMP3) + | .opd lp TOCREG, TOC_OFS(FUNCREG) + | .opdenv lp ENVREG, ENV_OFS(FUNCREG) + | .opd lp FUNCREG, 0(FUNCREG) | cmplw TMP0, TMP2 | stp BASE, L->base | stp TMP1, L->top | mr CARG1, L | bgt >5 // Need to grow stack. - | mtctr TMP3 + | mtctr FUNCREG | bctrl // (lua_State *L) + | .toc lp TOCREG, SAVE_TOC | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | lp BASE, L->base | cmpwi CRET1, 0 @@ -2829,6 +3003,7 @@ static void build_subroutines(BuildCtx *ctx) |3: | lp BASE, L->base |4: // Re-dispatch to static ins. + | addi BASEP4, BASE, 4 | lwz INS, -4(PC) | decode_OPP TMP1, INS | decode_RB8 RB, INS @@ -2842,7 +3017,7 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_hook: // Continue from hook yield. | addi PC, PC, 4 - | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins. + | lwz MULTRES, WORD_LO-24(RB) // Restore MULTRES for *M ins. | b <4 | |->vm_hotloop: // Hot loop counter underflow. @@ -2884,6 +3059,7 @@ static void build_subroutines(BuildCtx *ctx) | lp BASE, L->base | lp TMP0, L->top | stw ZERO, SAVE_PC // Invalidate for subsequent line hook. + | addi BASEP4, BASE, 4 | sub NARGS8:RC, TMP0, BASE | add RA, BASE, RA | lwz LFUNC:RB, FRAME_FUNC(BASE) @@ -2895,7 +3071,7 @@ static void build_subroutines(BuildCtx *ctx) |.if JIT | // RA = resultptr, RB = meta base | lwz INS, -4(PC) - | lwz TRACE:TMP2, -20(RB) // Save previous trace. + | lwz TRACE:TMP2, WORD_LO-24(RB) // Save previous trace. | addic. TMP1, MULTRES, -8 | decode_RA8 RC, INS // Call base. | beq >2 @@ -2941,10 +3117,16 @@ static void build_subroutines(BuildCtx *ctx) | mr CARG2, PC | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | lp BASE, L->base + | addi BASEP4, BASE, 4 | b ->cont_nop | |9: + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + | stwx TISNIL, BASEP4, RC + |.else | stwx TISNIL, BASE, RC + |.endif | addi RC, RC, 8 | b <3 |.endif @@ -2959,6 +3141,7 @@ static void build_subroutines(BuildCtx *ctx) | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | lp BASE, L->base | subi PC, PC, 4 + | addi BASEP4, BASE, 4 | b ->cont_nop #endif | @@ -2968,40 +3151,73 @@ static void build_subroutines(BuildCtx *ctx) | |.macro savex_, a, b, c, d |.if FPU - | stfd f..a, 16+a*8(sp) - | stfd f..b, 16+b*8(sp) - | stfd f..c, 16+c*8(sp) - | stfd f..d, 16+d*8(sp) + | stfd f..a, EXIT_OFFSET+a*8(sp) + | stfd f..b, EXIT_OFFSET+b*8(sp) + | stfd f..c, EXIT_OFFSET+c*8(sp) + | stfd f..d, EXIT_OFFSET+d*8(sp) |.endif |.endmacro | + |.macro saver, a + | stp r..a, EXIT_OFFSET+32*8+a*PSIZE(sp) + |.endmacro + | |->vm_exit_handler: |.if JIT - | addi sp, sp, -(16+32*8+32*4) - | stmw r2, 16+32*8+2*4(sp) + | addi sp, sp, -(EXIT_OFFSET+32*8+32*PSIZE) + | saver 3 // CARG1 + | saver 4 // CARG2 + | saver 5 // CARG3 + | saver 17 // DISPATCH | addi DISPATCH, JGL, -GG_DISP2G-32768 | li CARG2, ~LJ_VMST_EXIT - | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain. + | lp CARG1, EXIT_OFFSET+32*8+32*PSIZE(sp) // Get stack chain. | stw CARG2, DISPATCH_GL(vmstate)(DISPATCH) + | saver 2 + | saver 6 + | saver 7 + | saver 8 + | saver 9 + | saver 10 + | saver 11 + | saver 12 + | saver 13 | savex_ 0,1,2,3 - | stw CARG1, 0(sp) // Store extended stack chain. - | clrso TMP1 + | stp CARG1, 0(sp) // Store extended stack chain. + | savex_ 4,5,6,7 - | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp. + | saver 14 + | saver 15 + | saver 16 + | saver 18 + | addi CARG2, sp, EXIT_OFFSET+32*8+32*PSIZE // Recompute original value of sp. | savex_ 8,9,10,11 - | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP. + | stp CARG2, EXIT_OFFSET+32*8+1*PSIZE(sp) // Store sp in RID_SP. | savex_ 12,13,14,15 | mflr CARG3 | li TMP1, 0 | savex_ 16,17,18,19 - | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP. + | stw TMP1, EXIT_OFFSET+32*8+0*PSIZE(sp) // Clear RID_TMP. | savex_ 20,21,22,23 | lhz CARG4, 2(CARG3) // Load trace number. | savex_ 24,25,26,27 | lwz L, DISPATCH_GL(cur_L)(DISPATCH) | savex_ 28,29,30,31 + | saver 19 + | saver 20 + | saver 21 + | saver 22 + | saver 23 + | saver 24 + | saver 25 + | saver 26 + | saver 27 + | saver 28 + | saver 29 + | saver 30 + | saver 31 | sub CARG3, TMP0, CARG3 // Compute exit number. - | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) + | lwz BASE, DISPATCH_GL(jit_base)(DISPATCH) | srwi CARG3, CARG3, 2 | stp L, DISPATCH_J(L)(DISPATCH) | subi CARG3, CARG3, 2 @@ -3010,11 +3226,11 @@ static void build_subroutines(BuildCtx *ctx) | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH) | addi CARG1, DISPATCH, GG_DISP2J | stw CARG3, DISPATCH_J(exitno)(DISPATCH) - | addi CARG2, sp, 16 + | addi CARG2, sp, EXIT_OFFSET | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) | // Returns MULTRES (unscaled) or negated error code. | lp TMP1, L->cframe - | lwz TMP2, 0(sp) + | lp TMP2, 0(sp) | lp BASE, L->base |.if GPR64 | rldicr sp, TMP1, 0, 61 @@ -3022,7 +3238,7 @@ static void build_subroutines(BuildCtx *ctx) | rlwinm sp, TMP1, 0, 0, 29 |.endif | lwz PC, SAVE_PC // Get SAVE_PC. - | stw TMP2, 0(sp) + | stp TMP2, 0(sp) | stw L, SAVE_L // Set SAVE_L (on-trace resume/yield). | b >1 |.endif @@ -3043,7 +3259,12 @@ static void build_subroutines(BuildCtx *ctx) | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) | lwz KBASE, PC2PROTO(k)(TMP1) | // Setup type comparison constants. + |.if P64 + | lus TISNUM, LJ_TISNUM >> 16 + | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff + |.else | li TISNUM, LJ_TISNUM + |.endif | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | .FPU stw TMP3, TMPD | li ZERO, 0 @@ -3063,14 +3284,14 @@ static void build_subroutines(BuildCtx *ctx) | decode_RA8 RA, INS | lpx TMP0, DISPATCH, TMP1 | mtctr TMP0 - | cmplwi TMP1, BC_FUNCF*4 // Function header? + | cmplwi TMP1, BC_FUNCF*PSIZE // Function header? | bge >2 | decode_RB8 RB, INS | decode_RD8 RD, INS | decode_RC8 RC, INS | bctr |2: - | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function? + | cmplwi TMP1, (BC_FUNCC+2)*PSIZE // Fast function? | blt >3 | // Check frame below fast function. | lwz TMP1, FRAME_PC(BASE) @@ -3080,7 +3301,7 @@ static void build_subroutines(BuildCtx *ctx) | lwz TMP2, -4(TMP1) | decode_RA8 TMP0, TMP2 | sub TMP1, BASE, TMP0 - | lwz LFUNC:TMP2, -12(TMP1) + | lwz LFUNC:TMP2, WORD_LO-16(TMP1) | lwz TMP1, LFUNC:TMP2->pc | lwz KBASE, PC2PROTO(k)(TMP1) |3: @@ -3129,6 +3350,8 @@ static void build_subroutines(BuildCtx *ctx) | sfi2d CARG3, CARG4 | |->vm_modi: + | li TMP1, 0 + | mtxer TMP1 | divwo. TMP0, CARG1, CARG2 | bso >1 |.if GPR64 @@ -3147,7 +3370,8 @@ static void build_subroutines(BuildCtx *ctx) | cmpwi CARG2, 0 | li CARG1, 0 | beqlr - | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0. + | // Clear SO for -2147483648 % -1 and return 0. + | crxor 4*cr0+so, 4*cr0+so, 4*cr0+so | blr | |//----------------------------------------------------------------------- @@ -3160,10 +3384,18 @@ static void build_subroutines(BuildCtx *ctx) |->vm_cachesync: |.if JIT or FFI | // Compute start of first cache line and number of cache lines. + | .if GPR64 + | rldicr CARG1, CARG1, 0, 58 + | .else | rlwinm CARG1, CARG1, 0, 0, 26 + | .endif | sub CARG2, CARG2, CARG1 | addi CARG2, CARG2, 31 + | .if GPR64 + | srdi. CARG2, CARG2, 5 + | .else | rlwinm. CARG2, CARG2, 27, 5, 31 + | .endif | beqlr | mtctr CARG2 | mr CARG3, CARG1 @@ -3185,39 +3417,70 @@ static void build_subroutines(BuildCtx *ctx) |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- | - |// Handler for callback functions. Callback slot number in r11, g in r12. + |// Handler for callback functions. + |// 32-bit: Callback slot number in r12, g in r11. + |// 64-bit v1: Callback slot number in bits 47+ of r11, g in 0-46, TOC in r2. + |// 64-bit v2: Callback slot number in bits 2-11 of r12, g in r11, + |// vm_ffi_callback in r2. |->vm_ffi_callback: |.if FFI |.type CTSTATE, CTState, PC + | .if OPD + | rldicl r12, r11, 17, 47 + | rldicl r11, r11, 0, 17 + | .endif + | .if ELFV2 + | rlwinm r12, r12, 30, 22, 31 + | addisl TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@ha + | addil TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@l + | .endif | saveregs - | lwz CTSTATE, GL:r12->ctype_state - | addi DISPATCH, r12, GG_G2DISP - | stw r11, CTSTATE->cb.slot - | stw r3, CTSTATE->cb.gpr[0] + | lwz CTSTATE, GL:r11->ctype_state + | addi DISPATCH, r11, GG_G2DISP + | stw r12, CTSTATE->cb.slot + | stp r3, CTSTATE->cb.gpr[0] | .FPU stfd f1, CTSTATE->cb.fpr[0] - | stw r4, CTSTATE->cb.gpr[1] + | stp r4, CTSTATE->cb.gpr[1] | .FPU stfd f2, CTSTATE->cb.fpr[1] - | stw r5, CTSTATE->cb.gpr[2] + | stp r5, CTSTATE->cb.gpr[2] | .FPU stfd f3, CTSTATE->cb.fpr[2] - | stw r6, CTSTATE->cb.gpr[3] + | stp r6, CTSTATE->cb.gpr[3] | .FPU stfd f4, CTSTATE->cb.fpr[3] - | stw r7, CTSTATE->cb.gpr[4] + | stp r7, CTSTATE->cb.gpr[4] | .FPU stfd f5, CTSTATE->cb.fpr[4] - | stw r8, CTSTATE->cb.gpr[5] + | stp r8, CTSTATE->cb.gpr[5] | .FPU stfd f6, CTSTATE->cb.fpr[5] - | stw r9, CTSTATE->cb.gpr[6] + | stp r9, CTSTATE->cb.gpr[6] | .FPU stfd f7, CTSTATE->cb.fpr[6] - | stw r10, CTSTATE->cb.gpr[7] + | stp r10, CTSTATE->cb.gpr[7] | .FPU stfd f8, CTSTATE->cb.fpr[7] + | .if GPR64 + | stfd f9, CTSTATE->cb.fpr[8] + | stfd f10, CTSTATE->cb.fpr[9] + | stfd f11, CTSTATE->cb.fpr[10] + | stfd f12, CTSTATE->cb.fpr[11] + | stfd f13, CTSTATE->cb.fpr[12] + | .endif + | .if ELFV2 + | addi TMP0, sp, CFRAME_SPACE+96 + | .elif GPR64 + | addi TMP0, sp, CFRAME_SPACE+112 + | .else | addi TMP0, sp, CFRAME_SPACE+8 - | stw TMP0, CTSTATE->cb.stack + | .endif + | stp TMP0, CTSTATE->cb.stack | mr CARG1, CTSTATE | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok. | mr CARG2, sp | bl extern lj_ccallback_enter // (CTState *cts, void *cf) | // Returns lua_State *. | lp BASE, L:CRET1->base + |.if P64 + | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants. + | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff + |.else | li TISNUM, LJ_TISNUM // Setup type comparison constants. + |.endif | lp RC, L:CRET1->top | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | li ZERO, 0 @@ -3246,9 +3509,21 @@ static void build_subroutines(BuildCtx *ctx) | mr CARG1, CTSTATE | mr CARG2, RA | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | lwz CRET1, CTSTATE->cb.gpr[0] + | lp CRET1, CTSTATE->cb.gpr[0] | .FPU lfd FARG1, CTSTATE->cb.fpr[0] - | lwz CRET2, CTSTATE->cb.gpr[1] + | lp CRET2, CTSTATE->cb.gpr[1] + | .if GPR64 + | lfd FARG2, CTSTATE->cb.fpr[1] + | .else + | lp CARG3, CTSTATE->cb.gpr[2] + | lp CARG4, CTSTATE->cb.gpr[3] + | .endif + | .elfv2 lfd f3, CTSTATE->cb.fpr[2] + | .elfv2 lfd f4, CTSTATE->cb.fpr[3] + | .elfv2 lfd f5, CTSTATE->cb.fpr[4] + | .elfv2 lfd f6, CTSTATE->cb.fpr[5] + | .elfv2 lfd f7, CTSTATE->cb.fpr[6] + | .elfv2 lfd f8, CTSTATE->cb.fpr[7] | b ->vm_leave_unw |.endif | @@ -3261,23 +3536,46 @@ static void build_subroutines(BuildCtx *ctx) | lbz CARG2, CCSTATE->nsp | lbz CARG3, CCSTATE->nfpr | neg TMP1, TMP1 + | .if GPR64 + | std TMP0, 16(sp) + | .else | stw TMP0, 4(sp) + | .endif | cmpwi cr1, CARG3, 0 | mr TMP2, sp | addic. CARG2, CARG2, -1 + | .if GPR64 + | stdux sp, sp, TMP1 + | .else | stwux sp, sp, TMP1 + | .endif | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. - | stw r14, -4(TMP2) - | stw CCSTATE, -8(TMP2) + | .if GPR64 + | std r14, -8(TMP2) + | std CCSTATE, -16(TMP2) + | .else + | stw r14, -4(TMP2) + | stw CCSTATE, -8(TMP2) + | .endif | mr r14, TMP2 | la TMP1, CCSTATE->stack + | .if GPR64 + | sldi CARG2, CARG2, 3 + | .else | slwi CARG2, CARG2, 2 + | .endif | blty >2 - | la TMP2, 8(sp) + | .if ELFV2 + | la TMP2, 96(sp) + | .elif GPR64 + | la TMP2, 112(sp) + | .else + | la TMP2, 8(sp) + | .endif |1: - | lwzx TMP0, TMP1, CARG2 - | stwx TMP0, TMP2, CARG2 - | addic. CARG2, CARG2, -4 + | lpx TMP0, TMP1, CARG2 + | stpx TMP0, TMP2, CARG2 + | addic. CARG2, CARG2, -PSIZE | bge <1 |2: | bney cr1, >3 @@ -3289,28 +3587,55 @@ static void build_subroutines(BuildCtx *ctx) | .FPU lfd f6, CCSTATE->fpr[5] | .FPU lfd f7, CCSTATE->fpr[6] | .FPU lfd f8, CCSTATE->fpr[7] + | .if GPR64 + | .FPU lfd f9, CCSTATE->fpr[8] + | .FPU lfd f10, CCSTATE->fpr[9] + | .FPU lfd f11, CCSTATE->fpr[10] + | .FPU lfd f12, CCSTATE->fpr[11] + | .FPU lfd f13, CCSTATE->fpr[12] + | .endif |3: - | lp TMP0, CCSTATE->func - | lwz CARG2, CCSTATE->gpr[1] - | lwz CARG3, CCSTATE->gpr[2] - | lwz CARG4, CCSTATE->gpr[3] - | lwz CARG5, CCSTATE->gpr[4] - | mtctr TMP0 - | lwz r8, CCSTATE->gpr[5] - | lwz r9, CCSTATE->gpr[6] - | lwz r10, CCSTATE->gpr[7] - | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. + | .toc std TOCREG, SAVE_TOC + | lp FUNCREG, CCSTATE->func + | lp CARG2, CCSTATE->gpr[1] + | lp CARG3, CCSTATE->gpr[2] + | .opd lp TOCREG, TOC_OFS(FUNCREG) + | .opdenv lp ENVREG, ENV_OFS(FUNCREG) + | .opd lp FUNCREG, 0(FUNCREG) + | lp CARG4, CCSTATE->gpr[3] + | lp CARG5, CCSTATE->gpr[4] + | mtctr FUNCREG + | lp r8, CCSTATE->gpr[5] + | lp r9, CCSTATE->gpr[6] + | lp r10, CCSTATE->gpr[7] + | lp CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. | bctrl - | lwz CCSTATE:TMP1, -8(r14) - | lwz TMP2, -4(r14) + | .toc lp TOCREG, SAVE_TOC + | .if GPR64 + | ld CCSTATE:TMP1, -16(r14) + | ld TMP2, -8(r14) + | ld TMP0, 16(r14) + | .else + | lwz CCSTATE:TMP1, -8(r14) + | lwz TMP2, -4(r14) | lwz TMP0, 4(r14) - | stw CARG1, CCSTATE:TMP1->gpr[0] + | .endif + | stp CARG1, CCSTATE:TMP1->gpr[0] | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0] - | stw CARG2, CCSTATE:TMP1->gpr[1] + | stp CARG2, CCSTATE:TMP1->gpr[1] + | .if GPR64 + | stfd FARG2, CCSTATE:TMP1->fpr[1] + | .endif + | .elfv2 stfd FARG3, CCSTATE:TMP1->fpr[2] + | .elfv2 stfd FARG4, CCSTATE:TMP1->fpr[3] + | .elfv2 stfd FARG5, CCSTATE:TMP1->fpr[4] + | .elfv2 stfd FARG6, CCSTATE:TMP1->fpr[5] + | .elfv2 stfd FARG7, CCSTATE:TMP1->fpr[6] + | .elfv2 stfd FARG8, CCSTATE:TMP1->fpr[7] | mtlr TMP0 - | stw CARG3, CCSTATE:TMP1->gpr[2] + | stp CARG3, CCSTATE:TMP1->gpr[2] | mr sp, r14 - | stw CARG4, CCSTATE:TMP1->gpr[3] + | stp CARG4, CCSTATE:TMP1->gpr[3] | mr r14, TMP2 | blr |.endif @@ -3333,14 +3658,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | // RA = src1*8, RD = src2*8, JMP with RD = target + | addi BASEP4, BASE, 4 |.if DUALNUM - | lwzux CARG1, RA, BASE + | lwzx CARG1, BASE_HI, RA | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux CARG3, RD, BASE + | lwzx CARG2, BASE_LO, RA + | lwzx CARG3, BASE_HI, RD | lwz TMP2, -4(PC) | checknum cr0, CARG1 - | lwz CARG4, 4(RD) + | lwzx CARG4, BASE_LO, RD | decode_RD4 TMP2, TMP2 | checknum cr1, CARG3 | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16) @@ -3364,7 +3690,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |7: // RA is not an integer. | bgt cr0, ->vmeta_comp | // RA is a number. - | .FPU lfd f0, 0(RA) + | .FPU lfdx f0, BASE, RA | bgt cr1, ->vmeta_comp | blt cr1, >4 | // RA is a number, RD is an integer. @@ -3384,7 +3710,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bl ->vm_sfi2d_1 |.endif |4: - | .FPU lfd f1, 0(RD) + | .FPU lfdx f1, BASE, RD |5: |.if FPU | fcmpu cr0, f0, f1 @@ -3405,10 +3731,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | b <1 |.else - | lwzx TMP0, BASE, RA + | lwzx TMP0, BASE_HI, RA | addi PC, PC, 4 | lfdx f0, BASE, RA - | lwzx TMP1, BASE, RD + | lwzx TMP1, BASE_HI, RD | checknum cr0, TMP0 | lwz TMP2, -4(PC) | lfdx f1, BASE, RD @@ -3438,16 +3764,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISEQV: case BC_ISNEV: vk = op == BC_ISEQV; | // RA = src1*8, RD = src2*8, JMP with RD = target + | addi BASEP4, BASE, 4 |.if DUALNUM - | lwzux CARG1, RA, BASE + | lwzx CARG1, BASE_HI, RA | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux CARG3, RD, BASE + | lwzx CARG2, BASE_LO, RA + | .if ENDIAN_LE + | lwzx CARG3, BASE_HI, RD + | .else + | lwzux CARG3, RD, BASE_HI + | .endif | checknum cr0, CARG1 | lwz SAVE0, -4(PC) | checknum cr1, CARG3 | decode_RD4 SAVE0, SAVE0 - | lwz CARG4, 4(RD) + | .if ENDIAN_LE + | lwzux CARG4, RD, BASE_LO + | .else + | lwz CARG4, WORD_LO(RD) + | .endif | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) if (vk) { @@ -3456,11 +3791,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ble cr7, ->BC_ISNEN_Z } |.else - | lwzux CARG1, RA, BASE + | lwzx CARG1, BASE_HI, RA | lwz SAVE0, 0(PC) - | lfd f0, 0(RA) + | lfdx f0, BASE, RA | addi PC, PC, 4 - | lwzux CARG3, RD, BASE + | lwzx CARG3, BASE_HI, RD | checknum cr0, CARG1 | decode_RD4 SAVE0, SAVE0 | lfd f1, 0(RD) @@ -3481,8 +3816,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif |5: // Either or both types are not numbers. |.if not DUALNUM - | lwz CARG2, 4(RA) - | lwz CARG4, 4(RD) + | lwzx CARG2, BASE_LO, RA + | lwzx CARG4, BASE_LO, RD |.endif |.if FFI | cmpwi cr7, CARG1, LJ_TCDATA @@ -3498,10 +3833,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.if FFI | beq cr7, ->vmeta_equal_cd |.endif + |.if P64 + | cmplwi cr7, TMP2, ~LJ_TUDATA // Avoid 64 bit lightuserdata. + |.endif | cmplw cr5, CARG2, CARG4 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. + |.if P64 + | cror 4*cr6+lt, 4*cr6+lt, 4*cr7+gt + |.endif | mr SAVE1, PC | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. @@ -3541,9 +3882,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISEQS: case BC_ISNES: vk = op == BC_ISEQS; | // RA = src*8, RD = str_const*8 (~), JMP with RD = target - | lwzux TMP0, RA, BASE + | addi BASEP4, BASE, 4 + | lwzx TMP0, BASE_HI, RA | srwi RD, RD, 1 - | lwz STR:TMP3, 4(RA) + | lwzx STR:TMP3, BASE_LO, RA | lwz TMP2, 0(PC) | subfic RD, RD, -4 | addi PC, PC, 4 @@ -3574,16 +3916,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISEQN: case BC_ISNEN: vk = op == BC_ISEQN; | // RA = src*8, RD = num_const*8, JMP with RD = target + | addi BASEP4, BASE, 4 |.if DUALNUM - | lwzux CARG1, RA, BASE + | lwzx CARG1, BASE_HI, RA | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux CARG3, RD, KBASE + | lwzx CARG2, BASE_LO, RA + | lwzux2 CARG3, CARG4, RD, KBASE | checknum cr0, CARG1 | lwz SAVE0, -4(PC) | checknum cr1, CARG3 | decode_RD4 SAVE0, SAVE0 - | lwz CARG4, 4(RD) | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) if (vk) { |->BC_ISEQN_Z: @@ -3600,7 +3942,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } else { |->BC_ISNEN_Z: // Dummy label. } - | lwzx CARG1, BASE, RA + | lwzx CARG1, BASE_HI, RA | addi PC, PC, 4 | lfdx f0, BASE, RA | lwz SAVE0, -4(PC) @@ -3638,7 +3980,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |7: // RA is not an integer. | bge cr0, <3 | // RA is a number. - | .FPU lfd f0, 0(RA) + | .FPU lfdx f0, BASE, RA | blt cr1, >1 | // RA is a number, RD is an integer. |.if FPU @@ -3670,7 +4012,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISEQP: case BC_ISNEP: vk = op == BC_ISEQP; | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target - | lwzx TMP0, BASE, RA + | addi BASEP4, BASE, 4 + | lwzx TMP0, BASE_HI, RA | srwi TMP1, RD, 3 | lwz TMP2, 0(PC) | not TMP1, TMP1 @@ -3700,7 +4043,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | // RA = dst*8 or unused, RD = src*8, JMP with RD = target - | lwzx TMP0, BASE, RD + | addi BASEP4, BASE, 4 + | lwzx TMP0, BASE_HI, RD | lwz INS, 0(PC) | addi PC, PC, 4 if (op == BC_IST || op == BC_ISF) { @@ -3745,7 +4089,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISTYPE: | // RA = src*8, RD = -type*8 - | lwzx TMP0, BASE, RA + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif + | lwzx TMP0, BASE_HI, RA | srwi TMP1, RD, 3 | ins_next1 |.if not PPE and not GPR64 @@ -3759,7 +4106,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ISNUM: | // RA = src*8, RD = -(TISNUM-1)*8 - | lwzx TMP0, BASE, RA + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif + | lwzx TMP0, BASE_HI, RA | ins_next1 | checknum TMP0 | bge ->vmeta_istype @@ -3784,18 +4134,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_NOT: | // RA = dst*8, RD = src*8 + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif | ins_next1 - | lwzx TMP0, BASE, RD + | lwzx TMP0, BASE_HI, RD | .gpr64 extsw TMP0, TMP0 | subfic TMP1, TMP0, LJ_TTRUE | adde TMP0, TMP0, TMP1 - | stwx TMP0, BASE, RA + | stwx TMP0, BASE_HI, RA | ins_next2 break; case BC_UNM: | // RA = dst*8, RD = src*8 - | lwzux TMP1, RD, BASE - | lwz TMP0, 4(RD) + | addi BASEP4, BASE, 4 + | lwzx TMP1, BASE_HI, RD + | lwzx TMP0, BASE_LO, RD + |.if DUALNUM and not GPR64 + | mtxer ZERO + |.endif | checknum TMP1 |.if DUALNUM | bne >5 @@ -3807,18 +4164,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.else | nego. TMP0, TMP0 | bso >4 - |1: |.endif | ins_next1 - | stwux TISNUM, RA, BASE - | stw TMP0, 4(RA) + | stwx TISNUM, BASE_HI, RA + | stwx TMP0, BASE_LO, RA |3: | ins_next2 |4: - |.if not GPR64 - | // Potential overflow. - | checkov TMP1, <1 // Ignore unrelated overflow. - |.endif | lus TMP1, 0x41e0 // 2^31. | li TMP0, 0 | b >7 @@ -3828,8 +4180,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | xoris TMP1, TMP1, 0x8000 |7: | ins_next1 - | stwux TMP1, RA, BASE - | stw TMP0, 4(RA) + | stwx TMP1, BASE_HI, RA + | stwx TMP0, BASE_LO, RA |.if DUALNUM | b <3 |.else @@ -3838,15 +4190,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_LEN: | // RA = dst*8, RD = src*8 - | lwzux TMP0, RD, BASE - | lwz CARG1, 4(RD) + | addi BASEP4, BASE, 4 + | lwzx TMP0, BASE_HI, RD + | lwzx CARG1, BASE_LO, RD | checkstr TMP0; bne >2 | lwz CRET1, STR:CARG1->len |1: |.if DUALNUM | ins_next1 - | stwux TISNUM, RA, BASE - | stw CRET1, 4(RA) + | stwx TISNUM, BASE_HI, RA + | stwx CRET1, BASE_LO, RA |.else | tonum_u f0, CRET1 // Result is a non-negative integer. | ins_next1 @@ -3878,12 +4231,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.macro ins_arithpre | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + | addi BASEP4, BASE, 4 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ||switch (vk) { ||case 0: - | lwzx CARG1, BASE, RB + | .if ENDIAN_LE and DUALNUM + | addi CARG3, RC, 4 + | .endif + | lwzx CARG1, BASE_HI, RB | .if DUALNUM - | lwzx CARG3, KBASE, RC + | .if ENDIAN_LE + | lwzx CARG3, KBASE, CARG3 + | .else + | lwzx CARG3, KBASE, RC + | .endif | .endif | .if FPU | lfdx f14, BASE, RB @@ -3904,9 +4265,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | .endif || break; ||case 1: - | lwzx CARG1, BASE, RB + | .if ENDIAN_LE and DUALNUM + | addi CARG3, RC, 4 + | .endif + | lwzx CARG1, BASE_HI, RB | .if DUALNUM - | lwzx CARG3, KBASE, RC + | .if ENDIAN_LE + | lwzx CARG3, KBASE, CARG3 + | .else + | lwzx CARG3, KBASE, RC + | .endif | .endif | .if FPU | lfdx f15, BASE, RB @@ -3927,8 +4295,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | .endif || break; ||default: - | lwzx CARG1, BASE, RB - | lwzx CARG3, BASE, RC + | lwzx CARG1, BASE_HI, RB + | lwzx CARG3, BASE_HI, RC | .if FPU | lfdx f14, BASE, RB | lfdx f15, BASE, RC @@ -4011,50 +4379,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |.macro ins_arithdn, intins, fpins, fpcall | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + | addi BASEP4, BASE, 4 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ||switch (vk) { ||case 0: - | lwzux CARG1, RB, BASE - | lwzux CARG3, RC, KBASE + | .if ENDIAN_LE + | lwzx TMP1, RB, BASE_HI + | lwzux CARG2, RC, KBASE + | lwz TMP2, 4(RC) + | checknum cr0, TMP1 + | lwzux CARG1, RB, BASE + | checknum cr1, TMP2 + | .else + | lwzux CARG1, RB, BASE + | lwzux CARG3, RC, KBASE | lwz CARG2, 4(RB) - | checknum cr0, CARG1 - | lwz CARG4, 4(RC) - | checknum cr1, CARG3 + | checknum cr0, CARG1 + | lwz CARG4, 4(RC) + | checknum cr1, CARG3 + | .endif || break; ||case 1: - | lwzux CARG3, RB, BASE - | lwzux CARG1, RC, KBASE - | lwz CARG4, 4(RB) - | checknum cr0, CARG3 - | lwz CARG2, 4(RC) - | checknum cr1, CARG1 + | .if ENDIAN_LE + | lwzux CARG1, RC, KBASE + | lwzx TMP1, RB, BASE_HI + | lwz TMP2, 4(RC) + | checknum cr0, TMP1 + | lwzux CARG2, RB, BASE + | checknum cr1, TMP2 + | .else + | lwzux CARG3, RB, BASE + | lwzux CARG1, RC, KBASE + | lwz CARG4, 4(RB) + | checknum cr0, CARG3 + | lwz CARG2, 4(RC) + | checknum cr1, CARG1 + | .endif || break; ||default: - | lwzux CARG1, RB, BASE - | lwzux CARG3, RC, BASE - | lwz CARG2, 4(RB) - | checknum cr0, CARG1 - | lwz CARG4, 4(RC) - | checknum cr1, CARG3 + | .if ENDIAN_LE + | lwzx TMP1, RB, BASE_HI + | lwzx TMP2, RC, BASE_HI + | lwzux CARG1, RB, BASE + | checknum cr0, TMP1 + | lwzux CARG2, RC, BASE + | checknum cr1, TMP2 + | .else + | lwzux CARG1, RB, BASE + | lwzux CARG3, RC, BASE + | lwz CARG2, 4(RB) + | checknum cr0, CARG1 + | lwz CARG4, 4(RC) + | checknum cr1, CARG3 + | .endif || break; ||} | bne >5 | bne cr1, >5 - |.if "intins" == "intmod" - | mr CARG1, CARG2 - | mr CARG2, CARG4 - |.endif + |.if ENDIAN_LE + | intins CARG1, CARG1, CARG2 + |.else + | .if "intins" == "intmod" + | mr CARG1, CARG2 + | mr CARG2, CARG4 + | .endif | intins CARG1, CARG2, CARG4 - | bso >4 - |1: + |.endif + | ins_arithfallback bso | ins_next1 - | stwux TISNUM, RA, BASE - | stw CARG1, 4(RA) + | stwx TISNUM, BASE_HI, RA + | stwx CARG1, BASE_LO, RA |2: | ins_next2 - |4: // Overflow. - | checkov TMP0, <1 // Ignore unrelated overflow. - | ins_arithfallback b |5: // FP variant. |.if FPU ||if (vk == 1) { @@ -4137,8 +4533,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_POW: | // NYI: (partial) integer arithmetic. - | lwzx CARG1, BASE, RB - | lwzx CARG3, BASE, RC + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif + | lwzx CARG1, BASE_HI, RB + | lwzx CARG3, BASE_HI, RC |.if FPU | lfdx FARG1, BASE, RB | lfdx FARG2, BASE, RC @@ -4177,6 +4576,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Returns NULL (finished) or TValue * (metamethod). | cmplwi CRET1, 0 | lp BASE, L->base + | addi BASEP4, BASE, 4 | bne ->vmeta_binop | ins_next1 |.if FPU @@ -4195,42 +4595,46 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_KSTR: | // RA = dst*8, RD = str_const*8 (~) + | addi BASEP4, BASE, 4 | srwi TMP1, RD, 1 | subfic TMP1, TMP1, -4 | ins_next1 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 | li TMP2, LJ_TSTR - | stwux TMP2, RA, BASE - | stw TMP0, 4(RA) + | stwx TMP2, BASE_HI, RA + | stwx TMP0, BASE_LO, RA | ins_next2 break; case BC_KCDATA: |.if FFI | // RA = dst*8, RD = cdata_const*8 (~) + | addi BASEP4, BASE, 4 | srwi TMP1, RD, 1 | subfic TMP1, TMP1, -4 | ins_next1 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 | li TMP2, LJ_TCDATA - | stwux TMP2, RA, BASE - | stw TMP0, 4(RA) + | stwx TMP2, BASE_HI, RA + | stwx TMP0, BASE_LO, RA | ins_next2 |.endif break; case BC_KSHORT: | // RA = dst*8, RD = int16_literal*8 + | addi BASEP4, BASE, 4 |.if DUALNUM | slwi RD, RD, 13 | srawi RD, RD, 16 | ins_next1 - | stwux TISNUM, RA, BASE - | stw RD, 4(RA) + | stwx TISNUM, BASE_HI, RA + | stwx RD, BASE_LO, RA | ins_next2 |.else | // The soft-float approach is faster. | slwi RD, RD, 13 | srawi TMP1, RD, 31 | xor TMP2, TMP1, RD + | .gpr64 extsw RD, RD | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) | cntlzw TMP3, TMP2 | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 @@ -4242,8 +4646,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add RD, RD, TMP1 // hi = hi + exponent-1 | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi | ins_next1 - | stwux RD, RA, BASE - | stw ZERO, 4(RA) + | stwx RD, BASE_HI, RA + | stwx ZERO, BASE_LO, RA | ins_next2 |.endif break; @@ -4263,18 +4667,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_KPRI: | // RA = dst*8, RD = primitive_type*8 (~) + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif | srwi TMP1, RD, 3 | not TMP0, TMP1 | ins_next1 - | stwx TMP0, BASE, RA + | stwx TMP0, BASE_HI, RA | ins_next2 break; case BC_KNIL: | // RA = base*8, RD = end*8 - | stwx TISNIL, BASE, RA + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif + | stwx TISNIL, BASE_HI, RA | addi RA, RA, 8 |1: - | stwx TISNIL, BASE, RA + | stwx TISNIL, BASE_HI, RA | cmpw RA, RD | addi RA, RA, 8 | blt <1 @@ -4318,7 +4728,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz CARG2, UPVAL:RB->v | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | lbz TMP0, UPVAL:RB->closed - | lwz TMP2, 0(RD) + | lwz TMP2, WORD_HI(RD) |.if FPU | stfd f0, 0(CARG2) |.else @@ -4326,7 +4736,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | stw CARG3, 4(CARG2) |.endif | cmplwi cr1, TMP0, 0 - | lwz TMP1, 4(RD) + | lwz TMP1, WORD_LO(RD) | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | subi TMP2, TMP2, (LJ_TNUMX+1) | bne >2 // Upvalue is closed and black? @@ -4359,8 +4769,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lbz TMP3, STR:TMP1->marked | lbz TMP2, UPVAL:RB->closed | li TMP0, LJ_TSTR - | stw STR:TMP1, 4(CARG2) - | stw TMP0, 0(CARG2) + | stw STR:TMP1, WORD_LO(CARG2) + | stw TMP0, WORD_HI(CARG2) | bne >2 |1: | ins_next @@ -4407,7 +4817,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwzx UPVAL:RB, LFUNC:RB, RA | ins_next1 | lwz TMP1, UPVAL:RB->v - | stw TMP0, 0(TMP1) + | stw TMP0, WORD_HI(TMP1) | ins_next2 break; @@ -4422,6 +4832,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add CARG2, BASE, RA | bl extern lj_func_closeuv // (lua_State *L, TValue *level) | lp BASE, L->base + | addi BASEP4, BASE, 4 |1: | ins_next break; @@ -4440,8 +4851,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Returns GCfuncL *. | lp BASE, L->base | li TMP0, LJ_TFUNC - | stwux TMP0, RA, BASE - | stw LFUNC:CRET1, 4(RA) + | addi BASEP4, BASE, 4 + | stwx TMP0, BASE_HI, RA + | stwx LFUNC:CRET1, BASE_LO, RA | ins_next break; @@ -4474,8 +4886,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | lp BASE, L->base | li TMP0, LJ_TTAB - | stwux TMP0, RA, BASE - | stw TAB:CRET1, 4(RA) + | addi BASEP4, BASE, 4 + | stwx TMP0, BASE_HI, RA + | stwx TAB:CRET1, BASE_LO, RA | ins_next if (op == BC_TNEW) { |3: @@ -4508,13 +4921,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_TGETV: | // RA = dst*8, RB = table*8, RC = key*8 - | lwzux CARG1, RB, BASE - | lwzux CARG2, RC, BASE - | lwz TAB:RB, 4(RB) + | addi BASEP4, BASE, 4 + | lwzx CARG1, BASE_HI, RB + | lwzx CARG2, BASE_HI, RC + | lwzx TAB:RB, BASE_LO, RB |.if DUALNUM - | lwz RC, 4(RC) + | lwzx RC, BASE_LO, RC |.else - | lfd f0, 0(RC) + | lfdx f0, BASE, RC |.endif | checktab CARG1 | checknum cr1, CARG2 @@ -4541,9 +4955,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | slwi TMP2, TMP2, 3 |.endif | ble ->vmeta_tgetv // Integer key and in array part? - | lwzx TMP0, TMP1, TMP2 |.if FPU - | lfdx f14, TMP1, TMP2 + | .if ENDIAN_LE + | lfdux f14, TMP1, TMP2 + | lwz TMP0, WORD_HI(TMP1) + | .else + | lwzx TMP0, TMP1, TMP2 + | lfdx f14, TMP1, TMP2 + | .endif |.else | lwzux SAVE0, TMP1, TMP2 | lwz SAVE1, 4(TMP1) @@ -4571,21 +4990,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: | checkstr CARG2; bne ->vmeta_tgetv |.if not DUALNUM - | lwz STR:RC, 4(RC) + | lwzx STR:RC, BASE_LO, RC |.endif | b ->BC_TGETS_Z // String key? break; case BC_TGETS: | // RA = dst*8, RB = table*8, RC = str_const*8 (~) - | lwzux CARG1, RB, BASE + | addi BASEP4, BASE, 4 + | lwzx CARG1, BASE_HI, RB | srwi TMP1, RC, 1 - | lwz TAB:RB, 4(RB) + | lwzx TAB:RB, BASE_LO, RB | subfic TMP1, TMP1, -4 | checktab CARG1 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 | bne ->vmeta_tgets1 |->BC_TGETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 + | addi BASEP4, BASE, 4 | lwz TMP0, TAB:RB->hmask | lwz TMP1, STR:RC->hash | lwz NODE:TMP2, TAB:RB->node @@ -4595,16 +5016,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sub TMP1, TMP0, TMP1 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) |1: - | lwz CARG1, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) + | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2) + | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2) + | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2) + | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2) | checkstr CARG1; bne >4 | cmpw TMP0, STR:RC; bne >4 | checknil CARG2; beq >5 // Key found, but nil value? |3: - | stwux CARG2, RA, BASE - | stw TMP1, 4(RA) + | stwx CARG2, BASE_HI, RA + | stwx TMP1, BASE_LO, RA | ins_next | |4: // Follow hash chain. @@ -4625,16 +5046,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_TGETB: | // RA = dst*8, RB = table*8, RC = index*8 - | lwzux CARG1, RB, BASE + | addi BASEP4, BASE, 4 + | lwzx CARG1, BASE_HI, RB | srwi TMP0, RC, 3 - | lwz TAB:RB, 4(RB) + | lwzx TAB:RB, BASE_LO, RB | checktab CARG1; bne ->vmeta_tgetb | lwz TMP1, TAB:RB->asize | lwz TMP2, TAB:RB->array | cmplw TMP0, TMP1; bge ->vmeta_tgetb |.if FPU - | lwzx TMP1, TMP2, RC - | lfdx f0, TMP2, RC + | .if ENDIAN_LE + | lfdux f0, TMP2, RC + | lwz TMP1, WORD_HI(TMP2) + | .else + | lwzx TMP1, TMP2, RC + | lfdx f0, TMP2, RC + | .endif |.else | lwzux TMP1, TMP2, RC | lwz TMP3, 4(TMP2) @@ -4661,12 +5088,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_TGETR: | // RA = dst*8, RB = table*8, RC = key*8 - | add RB, BASE, RB - | lwz TAB:CARG1, 4(RB) + | addi BASEP4, BASE, 4 + | lwzx TAB:CARG1, BASE_LO, RB |.if DUALNUM - | add RC, BASE, RC | lwz TMP0, TAB:CARG1->asize - | lwz CARG2, 4(RC) + | lwzx CARG2, BASE_LO, RC | lwz TMP1, TAB:CARG1->array |.else | lfdx f0, BASE, RC @@ -4696,13 +5122,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_TSETV: | // RA = src*8, RB = table*8, RC = key*8 - | lwzux CARG1, RB, BASE - | lwzux CARG2, RC, BASE - | lwz TAB:RB, 4(RB) + | addi BASEP4, BASE, 4 + | lwzx CARG1, BASE_HI, RB + | lwzx CARG2, BASE_HI, RC + | lwzx TAB:RB, BASE_LO, RB |.if DUALNUM - | lwz RC, 4(RC) + | lwzx RC, BASE_LO, RC |.else - | lfd f0, 0(RC) + | lfdx f0, BASE, RC |.endif | checktab CARG1 | checknum cr1, CARG2 @@ -4729,7 +5156,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | slwi TMP0, TMP2, 3 |.endif | ble ->vmeta_tsetv // Integer key and in array part? + | .if ENDIAN_LE + | addi TMP2, TMP1, 4 + | lwzx TMP2, TMP2, TMP0 + | .else | lwzx TMP2, TMP1, TMP0 + | .endif | lbz TMP3, TAB:RB->marked |.if FPU | lfdx f14, BASE, RA @@ -4763,7 +5195,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: | checkstr CARG2; bne ->vmeta_tsetv |.if not DUALNUM - | lwz STR:RC, 4(RC) + | lwzx STR:RC, BASE_LO, RC |.endif | b ->BC_TSETS_Z // String key? | @@ -4773,9 +5205,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_TSETS: | // RA = src*8, RB = table*8, RC = str_const*8 (~) - | lwzux CARG1, RB, BASE + | addi BASEP4, BASE, 4 + | lwzx CARG1, BASE_HI, RB | srwi TMP1, RC, 1 - | lwz TAB:RB, 4(RB) + | lwzx TAB:RB, BASE_LO, RB | subfic TMP1, TMP1, -4 | checktab CARG1 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 @@ -4800,9 +5233,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lbz TMP3, TAB:RB->marked | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) |1: - | lwz CARG1, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val + | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2) + | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2) + | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2) | lwz NODE:TMP1, NODE:TMP2->next | checkstr CARG1; bne >5 | cmpw TMP0, STR:RC; bne >5 @@ -4847,9 +5280,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check. |6: | li TMP0, LJ_TSTR - | stw STR:RC, 4(CARG3) + | stw STR:RC, WORD_LO(CARG3) | mr CARG2, TAB:RB - | stw TMP0, 0(CARG3) + | stw TMP0, WORD_HI(CARG3) | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | // Returns TValue *. | lp BASE, L->base @@ -4859,6 +5292,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | stw SAVE0, 0(CRET1) | stw SAVE1, 4(CRET1) |.endif + | addi BASEP4, BASE, 4 | b <3 // No 2nd write barrier needed. | |7: // Possible table write barrier for the value. Skip valiswhite check. @@ -4867,9 +5301,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_TSETB: | // RA = src*8, RB = table*8, RC = index*8 - | lwzux CARG1, RB, BASE + | addi BASEP4, BASE, 4 + | lwzx CARG1, BASE_HI, RB | srwi TMP0, RC, 3 - | lwz TAB:RB, 4(RB) + | lwzx TAB:RB, BASE_LO, RB | checktab CARG1; bne ->vmeta_tsetb | lwz TMP1, TAB:RB->asize | lwz TMP2, TAB:RB->array @@ -4883,7 +5318,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz SAVE1, 4(CARG2) |.endif | bge ->vmeta_tsetb - | lwzx TMP1, TMP2, RC + | .if ENDIAN_LE + | addi TMP1, TMP2, 4 + | lwzx TMP1, TMP1, RC + | .else + | lwzx TMP1, TMP2, RC + | .endif | checknil TMP1; beq >5 |1: | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) @@ -4912,13 +5352,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_TSETR: | // RA = dst*8, RB = table*8, RC = key*8 - | add RB, BASE, RB - | lwz TAB:CARG2, 4(RB) + | addi BASEP4, BASE, 4 + | lwzx TAB:CARG2, BASE_LO, RB |.if DUALNUM - | add RC, BASE, RC | lbz TMP3, TAB:CARG2->marked | lwz TMP0, TAB:CARG2->asize - | lwz CARG3, 4(RC) + | lwzx CARG3, BASE_LO, RC | lwz TMP1, TAB:CARG2->array |.else | lfdx f0, BASE, RC @@ -4959,9 +5398,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add RA, BASE, RA |1: | add TMP3, KBASE, RD - | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table. + | lwz TAB:CARG2, WORD_LO-8(RA) // Guaranteed to be a table. | addic. TMP0, MULTRES, -8 - | lwz TMP3, 4(TMP3) // Integer constant is in lo-word. + | lwz TMP3, WORD_LO(TMP3) // Integer constant is in lo-word. | srwi CARG3, TMP0, 3 | beq >4 // Nothing to copy? | add CARG3, CARG3, TMP3 @@ -5020,8 +5459,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_CALL: | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 | mr TMP2, BASE - | lwzux TMP0, BASE, RA - | lwz LFUNC:RB, 4(BASE) + | lwzux2 TMP0, LFUNC:RB, BASE, RA | subi NARGS8:RC, NARGS8:RC, 8 | addi BASE, BASE, 8 | checkfunc TMP0; bne ->vmeta_call @@ -5035,8 +5473,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_CALLT: | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - | lwzux TMP0, RA, BASE - | lwz LFUNC:RB, 4(RA) + | lwzux2 TMP0, LFUNC:RB, RA, BASE | subi NARGS8:RC, NARGS8:RC, 8 | lwz TMP1, FRAME_PC(BASE) | checkfunc TMP0 @@ -5099,8 +5536,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) | mr TMP2, BASE | add BASE, BASE, RA - | lwz TMP1, -24(BASE) - | lwz LFUNC:RB, -20(BASE) + | lwz TMP1, WORD_HI-24(BASE) + | lwz LFUNC:RB, WORD_LO-24(BASE) |.if FPU | lfd f1, -8(BASE) | lfd f0, -16(BASE) @@ -5110,8 +5547,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz CARG3, -16(BASE) | lwz CARG4, -12(BASE) |.endif - | stw TMP1, 0(BASE) // Copy callable. - | stw LFUNC:RB, 4(BASE) + | stw TMP1, WORD_HI(BASE) // Copy callable. + | stw LFUNC:RB, WORD_LO(BASE) | checkfunc TMP1 | li NARGS8:RC, 16 // Iterators get 2 arguments. |.if FPU @@ -5133,8 +5570,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // NYI: add hotloop, record BC_ITERN. |.endif | add RA, BASE, RA - | lwz TAB:RB, -12(RA) - | lwz RC, -4(RA) // Get index from control var. + | lwz TAB:RB, WORD_LO-16(RA) + | lwz RC, WORD_LO-8(RA) // Get index from control var. | lwz TMP0, TAB:RB->asize | lwz TMP1, TAB:RB->array | addi PC, PC, 4 @@ -5142,10 +5579,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmplw RC, TMP0 | slwi TMP3, RC, 3 | bge >5 // Index points after array part? - | lwzx TMP2, TMP1, TMP3 |.if FPU - | lfdx f0, TMP1, TMP3 + | lfdux f0, TMP3, TMP1 + | lwz TMP2, WORD_HI(TMP3) |.else + | lwzx TMP2, TMP1, TMP3 | lwzux CARG1, TMP3, TMP1 | lwz CARG2, 4(TMP3) |.endif @@ -5153,8 +5591,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz INS, -4(PC) | beq >4 |.if DUALNUM - | stw RC, 4(RA) - | stw TISNUM, 0(RA) + | stw RC, WORD_LO(RA) + | stw TISNUM, WORD_HI(RA) |.else | tonum_u f1, RC |.endif @@ -5167,7 +5605,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | stw CARG2, 12(RA) |.endif | decode_RD4 TMP1, INS - | stw RC, -4(RA) // Update control var. + | stw RC, WORD_LO-8(RA) // Update control var. | add PC, TMP1, TMP3 |.if not DUALNUM | stfd f1, 0(RA) @@ -5189,15 +5627,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bgty <3 | slwi RB, RC, 3 | sub TMP3, TMP3, RB - | lwzx RB, TMP2, TMP3 |.if FPU - | lfdx f0, TMP2, TMP3 + | lfdux f0, TMP3, TMP2 + | lwz RB, WORD_HI(TMP3) |.else | add CARG3, TMP2, TMP3 | lwz CARG1, 0(CARG3) | lwz CARG2, 4(CARG3) - |.endif | add NODE:TMP3, TMP2, TMP3 + |.endif | checknil RB | lwz INS, -4(PC) | beq >7 @@ -5224,7 +5662,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | addi RC, RC, 1 | add PC, TMP1, TMP2 - | stw RC, -4(RA) // Update control var. + | stw RC, WORD_LO-8(RA) // Update control var. | b <3 | |7: // Skip holes in hash part. @@ -5235,10 +5673,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISNEXT: | // RA = base*8, RD = target (points to ITERN) | add RA, BASE, RA - | lwz TMP0, -24(RA) - | lwz CFUNC:TMP1, -20(RA) - | lwz TMP2, -16(RA) - | lwz TMP3, -8(RA) + | lwz TMP0, WORD_HI-24(RA) + | lwz CFUNC:TMP1, WORD_LO-24(RA) + | lwz TMP2, WORD_HI-16(RA) + | lwz TMP3, WORD_HI-8(RA) | cmpwi cr0, TMP2, LJ_TTAB | cmpwi cr1, TMP0, LJ_TFUNC | cmpwi cr6, TMP3, LJ_TNIL @@ -5252,17 +5690,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bne cr0, >5 | lus TMP1, 0xfffe | ori TMP1, TMP1, 0x7fff - | stw ZERO, -4(RA) // Initialize control var. - | stw TMP1, -8(RA) + | stw ZERO, WORD_LO-8(RA) // Initialize control var. + | stw TMP1, WORD_HI-8(RA) | addis PC, TMP3, -(BCBIAS_J*4 >> 16) |1: | ins_next |5: // Despecialize bytecode if any of the checks fail. | li TMP0, BC_JMP | li TMP1, BC_ITERC + | .if ENDIAN_LE + | stb TMP0, -4(PC) + | .else | stb TMP0, -1(PC) + | .endif | addis PC, TMP3, -(BCBIAS_J*4 >> 16) + | .if ENDIAN_LE + | stb TMP1, 0(PC) + | .else | stb TMP1, 3(PC) + | .endif | b <1 break; @@ -5306,7 +5752,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addi RA, RA, 8 | blt cr1, <1 // More vararg slots? |2: // Fill up remainder with nil. - | stw TISNIL, 0(RA) + | stw TISNIL, WORD_HI(RA) | cmplw RA, TMP2 | addi RA, RA, 8 | blt <2 @@ -5353,6 +5799,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add RA, BASE, RA | add RC, BASE, SAVE0 | subi TMP3, BASE, 8 + | addi BASEP4, BASE, 4 | b <6 break; @@ -5425,13 +5872,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bgt >6 | sub BASE, TMP2, RA | lwz LFUNC:TMP1, FRAME_FUNC(BASE) + | addi BASEP4, BASE, 4 | ins_next1 | lwz TMP1, LFUNC:TMP1->pc | lwz KBASE, PC2PROTO(k)(TMP1) | ins_next2 | |6: // Fill up results with nil. - | subi TMP1, RD, 8 + | addi TMP1, RD, WORD_HI-8 | addi RD, RD, 8 | stwx TISNIL, TMP2, TMP1 | b <5 @@ -5474,13 +5922,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bgt >6 | sub BASE, TMP2, RA | lwz LFUNC:TMP1, FRAME_FUNC(BASE) + | addi BASEP4, BASE, 4 | ins_next1 | lwz TMP1, LFUNC:TMP1->pc | lwz KBASE, PC2PROTO(k)(TMP1) | ins_next2 | |6: // Fill up results with nil. - | subi TMP1, RD, 8 + | addi TMP1, RD, WORD_HI-8 | addi RD, RD, 8 | stwx TISNIL, TMP2, TMP1 | b <5 @@ -5506,11 +5955,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) vk = (op == BC_IFORL || op == BC_JFORL); |.if DUALNUM | // Integer loop. - | lwzux TMP1, RA, BASE - | lwz CARG1, FORL_IDX*8+4(RA) + | lwzux2 TMP1, CARG1, RA, BASE + if (vk) { + | mtxer ZERO + } | cmplw cr0, TMP1, TISNUM if (vk) { - | lwz CARG3, FORL_STEP*8+4(RA) + | lwz CARG3, FORL_STEP*8+WORD_LO(RA) | bne >9 |.if GPR64 | // Need to check overflow for (a<<32) + (b<<32). @@ -5522,15 +5973,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addo. CARG1, CARG1, CARG3 |.endif | cmpwi cr6, CARG3, 0 - | lwz CARG2, FORL_STOP*8+4(RA) - | bso >6 + | lwz CARG2, FORL_STOP*8+WORD_LO(RA) + | bso >2 |4: - | stw CARG1, FORL_IDX*8+4(RA) + | stw CARG1, FORL_IDX*8+WORD_LO(RA) } else { - | lwz SAVE0, FORL_STEP*8(RA) - | lwz CARG3, FORL_STEP*8+4(RA) - | lwz TMP2, FORL_STOP*8(RA) - | lwz CARG2, FORL_STOP*8+4(RA) + | lwz SAVE0, FORL_STEP*8+WORD_HI(RA) + | lwz CARG3, FORL_STEP*8+WORD_LO(RA) + | lwz TMP2, FORL_STOP*8+WORD_HI(RA) + | lwz CARG2, FORL_STOP*8+WORD_LO(RA) | cmplw cr7, SAVE0, TISNUM | cmplw cr1, TMP2, TISNUM | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq @@ -5541,11 +5992,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | blt cr6, >5 | cmpw CARG1, CARG2 |1: - | stw TISNUM, FORL_EXT*8(RA) + | stw TISNUM, FORL_EXT*8+WORD_HI(RA) if (op != BC_JFORL) { | srwi RD, RD, 1 } - | stw CARG1, FORL_EXT*8+4(RA) + | stw CARG1, FORL_EXT*8+WORD_LO(RA) if (op != BC_JFORL) { | add RD, PC, RD } @@ -5565,11 +6016,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: // Invert check for negative step. | cmpw CARG2, CARG1 | b <1 - if (vk) { - |6: // Potential overflow. - | checkov TMP0, <4 // Ignore unrelated overflow. - | b <2 - } |.endif if (vk) { |.if DUALNUM @@ -5599,12 +6045,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz CARG3, FORL_STOP*8(RA) | lwz CARG4, FORL_STOP*8+4(RA) |.endif - | lwz SAVE0, FORL_STEP*8(RA) + | lwz SAVE0, FORL_STEP*8+WORD_HI(RA) } else { |.if DUALNUM |9: // FP loop. |.else + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + | lwzx TMP1, RA, BASE_LO + | add RA, RA, BASE + |.else | lwzux TMP1, RA, BASE + |.endif | lwz SAVE0, FORL_STEP*8(RA) | lwz TMP2, FORL_STOP*8(RA) | cmplw cr0, TMP1, TISNUM @@ -5707,17 +6159,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) #endif case BC_IITERL: | // RA = base*8, RD = target - | lwzux TMP1, RA, BASE - | lwz TMP2, 4(RA) + | lwzux2 TMP1, TMP2, RA, BASE | checknil TMP1; beq >1 // Stop if iterator returned nil. if (op == BC_JITERL) { - | stw TMP1, -8(RA) - | stw TMP2, -4(RA) + | stw TMP1, WORD_HI-8(RA) + | stw TMP2, WORD_LO-8(RA) | b =>BC_JLOOP } else { | branch_RD // Otherwise save control var + branch. - | stw TMP1, -8(RA) - | stw TMP2, -4(RA) + | stw TMP1, WORD_HI-8(RA) + | stw TMP2, WORD_LO-8(RA) } |1: | ins_next @@ -5746,7 +6197,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Traces on PPC don't store the trace number, so use 0. | stw ZERO, DISPATCH_GL(vmstate)(DISPATCH) | lwzx TRACE:TMP2, TMP1, RD - | clrso TMP1 + | mtxer ZERO | lp TMP2, TRACE:TMP2->mcode | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) | mtctr TMP2 @@ -5798,7 +6249,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | |3: // Clear missing parameters. - | stwx TISNIL, BASE, NARGS8:RC + |.if ENDIAN_LE + | addi BASEP4, BASE, 4 + |.endif + | stwx TISNIL, BASE_HI, NARGS8:RC | addi NARGS8:RC, NARGS8:RC, 8 | b <2 break; @@ -5815,11 +6269,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz TMP2, L->maxstack | add TMP1, BASE, RC | add TMP0, RA, RC - | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC. + | stw LFUNC:RB, WORD_LO(TMP1) // Store copy of LFUNC. | addi TMP3, RC, 8+FRAME_VARG | lwz KBASE, -4+PC2PROTO(k)(PC) | cmplw TMP0, TMP2 - | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG. + | stw TMP3, WORD_HI(TMP1) // Store delta + FRAME_VARG. | bge ->vm_growstack_l | lbz TMP2, -4+PC2PROTO(numparams)(PC) | mr RA, BASE @@ -5830,18 +6284,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beq >3 |1: | cmplw RA, RC // Less args than parameters? - | lwz TMP0, 0(RA) - | lwz TMP3, 4(RA) + | lwz TMP0, WORD_HI(RA) + | lwz TMP3, WORD_LO(RA) | bge >4 - | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC). + | stw TISNIL, WORD_HI(RA) // Clear old fixarg slot (help the GC). | addi RA, RA, 8 |2: | addic. TMP2, TMP2, -1 - | stw TMP0, 8(TMP1) - | stw TMP3, 12(TMP1) + | stw TMP0, WORD_HI+8(TMP1) + | stw TMP3, WORD_LO+8(TMP1) | addi TMP1, TMP1, 8 | bne <1 |3: + | addi BASEP4, BASE, 4 | ins_next2 | |4: // Clear missing parameters. @@ -5853,35 +6308,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_FUNCCW: | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 if (op == BC_FUNCC) { - | lp RD, CFUNC:RB->f + | lp FUNCREG, CFUNC:RB->f } else { - | lp RD, DISPATCH_GL(wrapf)(DISPATCH) + | lp FUNCREG, DISPATCH_GL(wrapf)(DISPATCH) } | add TMP1, RA, NARGS8:RC | lwz TMP2, L->maxstack - | .toc lp TMP3, 0(RD) + | .opd lp TMP3, 0(FUNCREG) | add RC, BASE, NARGS8:RC | stp BASE, L->base | cmplw TMP1, TMP2 | stp RC, L->top | li_vmstate C - |.if TOC + |.if OPD | mtctr TMP3 |.else - | mtctr RD + | mtctr FUNCREG |.endif if (op == BC_FUNCCW) { | lp CARG2, CFUNC:RB->f } | mr CARG1, L | bgt ->vm_growstack_c // Need to grow stack. - | .toc lp TOCREG, TOC_OFS(RD) - | .tocenv lp ENVREG, ENV_OFS(RD) + | .opd lp TOCREG, TOC_OFS(FUNCREG) + | .opdenv lp ENVREG, ENV_OFS(FUNCREG) | st_vmstate | bctrl // (lua_State *L [, lua_CFunction f]) + | .toc lp TOCREG, SAVE_TOC | // Returns nresults. | lp BASE, L->base - | .toc ld TOCREG, SAVE_TOC | slwi RD, CRET1, 3 | lp TMP1, L->top | li_vmstate INTERP @@ -5932,7 +6387,11 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.byte 0x1\n" "\t.string \"\"\n" "\t.uleb128 0x1\n" +#if LJ_ARCH_PPC32ON64 + "\t.sleb128 -8\n" +#else "\t.sleb128 -4\n" +#endif "\t.byte 65\n" "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" "\t.align 2\n" @@ -5945,14 +6404,24 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long .Lbegin\n" "\t.long %d\n" "\t.byte 0xe\n\t.uleb128 %d\n" +#if LJ_ARCH_PPC32ON64 + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" + "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n", +#else "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", +#endif fcofs, CFRAME_SIZE); for (i = 14; i <= 31; i++) fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n" "\t.byte %d\n\t.uleb128 %d\n", - 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); +#if LJ_ARCH_PPC32ON64 + 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i) +#else + 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i) +#endif + ); fprintf(ctx->fp, "\t.align 2\n" ".LEFDE0:\n\n"); @@ -5968,8 +6437,12 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long lj_vm_ffi_call\n" #endif "\t.long %d\n" +#if LJ_ARCH_PPC32ON64 + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" +#else "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x8e\n\t.uleb128 2\n" +#endif + "\t.byte 0x8e\n\t.uleb128 1\n" "\t.byte 0xd\n\t.uleb128 0xe\n" "\t.align 2\n" ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); @@ -5984,7 +6457,11 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.byte 0x1\n" "\t.string \"zPR\"\n" "\t.uleb128 0x1\n" +#if LJ_ARCH_PPC32ON64 + "\t.sleb128 -8\n" +#else "\t.sleb128 -4\n" +#endif "\t.byte 65\n" "\t.uleb128 6\n" /* augmentation length */ "\t.byte 0x1b\n" /* pcrel|sdata4 */ @@ -6002,14 +6479,24 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long %d\n" "\t.uleb128 0\n" /* augmentation length */ "\t.byte 0xe\n\t.uleb128 %d\n" +#if LJ_ARCH_PPC32ON64 + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" + "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n", +#else "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", +#endif fcofs, CFRAME_SIZE); for (i = 14; i <= 31; i++) fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n" "\t.byte %d\n\t.uleb128 %d\n", - 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); +#if LJ_ARCH_PPC32ON64 + 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i) +#else + 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i) +#endif + ); fprintf(ctx->fp, "\t.align 2\n" ".LEFDE2:\n\n"); @@ -6037,8 +6524,12 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long lj_vm_ffi_call-.\n" "\t.long %d\n" "\t.uleb128 0\n" /* augmentation length */ +#if LJ_ARCH_PPC32ON64 + "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" +#else "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x8e\n\t.uleb128 2\n" +#endif + "\t.byte 0x8e\n\t.uleb128 1\n" "\t.byte 0xd\n\t.uleb128 0xe\n" "\t.align 2\n" ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);