Skip to content

Commit

Permalink
FFI: Unify stack setup for C calls in interpreter.
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Pall committed Aug 29, 2023
1 parent 7cc53f0 commit cf903ed
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 53 deletions.
57 changes: 31 additions & 26 deletions src/lj_ccall.c
Expand Up @@ -20,12 +20,15 @@
#if LJ_TARGET_X86
/* -- x86 calling conventions --------------------------------------------- */

#define CCALL_PUSH(arg) \
*(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR

#if LJ_ABI_WIN

#define CCALL_HANDLE_STRUCTRET \
/* Return structs bigger than 8 by reference (on stack only). */ \
cc->retref = (sz > 8); \
if (cc->retref) cc->stack[nsp++] = (GPRArg)dp;
if (cc->retref) CCALL_PUSH(dp);

#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET

Expand All @@ -40,7 +43,7 @@
if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \
else \
cc->stack[nsp++] = (GPRArg)dp; \
CCALL_PUSH(dp); \
} else { /* Struct with single FP field ends up in FPR. */ \
cc->resx87 = ccall_classify_struct(cts, ctr); \
}
Expand All @@ -56,7 +59,7 @@
if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \
else \
cc->stack[nsp++] = (GPRArg)dp;
CCALL_PUSH(dp);

#endif

Expand All @@ -67,7 +70,7 @@
if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \
else \
cc->stack[nsp++] = (GPRArg)dp; \
CCALL_PUSH(dp); \
}

#endif
Expand Down Expand Up @@ -278,8 +281,8 @@
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
Expand Down Expand Up @@ -471,8 +474,8 @@
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
Expand Down Expand Up @@ -565,8 +568,8 @@
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
Expand Down Expand Up @@ -698,10 +701,11 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
if (ccall_struct_reg(cc, cts, dp, rcl)) {
/* Register overflow? Pass on stack. */
MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1;
if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */
cc->nsp = nsp + n;
memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR);
MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR;
if (nsp + sz > CCALL_SIZE_STACK)
return 1; /* Too many arguments. */
cc->nsp = nsp + sz;
memcpy((uint8_t *)cc->stack + nsp, dp, sz);
}
return 0; /* Ok. */
}
Expand Down Expand Up @@ -1022,22 +1026,23 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
} else {
sz = CTSIZE_PTR;
}
sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */

CCALL_HANDLE_REGARG /* Handle register arguments. */

/* Otherwise pass argument on stack. */
if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
nsp = (nsp + align) & ~align; /* Align argument on stack. */
if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */
MSize align = (1u << ctype_align(d->info)) - 1;
if (rp)
align = CTSIZE_PTR-1;
nsp = (nsp + align) & ~align;
}
if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */
dp = ((uint8_t *)cc->stack) + nsp;
nsp += n * CTSIZE_PTR;
if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
err_nyi:
lj_err_caller(L, LJ_ERR_FFI_NYICALL);
}
dp = &cc->stack[nsp];
nsp += n;
isva = 0;

done:
Expand Down Expand Up @@ -1099,10 +1104,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
cc->nfpr = nfpr; /* Required for vararg functions. */
#endif
cc->nsp = nsp;
cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR;
if (nsp > CCALL_SPS_FREE)
cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u);
cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR;
if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR)
cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u);
return gcsteps;
}

Expand Down
7 changes: 4 additions & 3 deletions src/lj_ccall.h
Expand Up @@ -152,14 +152,15 @@ typedef union FPRArg {
LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);

#define CCALL_MAXSTACK 32
#define CCALL_NUM_STACK 31
#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR)

/* -- C call state -------------------------------------------------------- */

typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
void (*func)(void); /* Pointer to called function. */
uint32_t spadj; /* Stack pointer adjustment. */
uint8_t nsp; /* Number of stack slots. */
uint8_t nsp; /* Number of bytes on stack. */
uint8_t retref; /* Return value by reference. */
#if LJ_TARGET_X64
uint8_t ngpr; /* Number of arguments in GPRs. */
Expand All @@ -178,7 +179,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */
#endif
GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */
GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */
GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */
} CCallState;

/* -- C call handling ----------------------------------------------------- */
Expand Down
8 changes: 4 additions & 4 deletions src/vm_arm.dasc
Expand Up @@ -2571,16 +2571,16 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| mov r11, sp
| sub sp, sp, CARG1 // Readjust stack.
| subs CARG2, CARG2, #1
| subs CARG2, CARG2, #4
|.if HFABI
| vldm RB, {d0-d7}
|.endif
| ldr RB, CCSTATE->func
| bmi >2
|1: // Copy stack slots.
| ldr CARG4, [CARG3, CARG2, lsl #2]
| str CARG4, [sp, CARG2, lsl #2]
| subs CARG2, CARG2, #1
| ldr CARG4, [CARG3, CARG2]
| str CARG4, [sp, CARG2]
| subs CARG2, CARG2, #4
| bpl <1
|2:
| ldrd CARG12, CCSTATE->gpr[0]
Expand Down
8 changes: 4 additions & 4 deletions src/vm_arm64.dasc
Expand Up @@ -2222,14 +2222,14 @@ static void build_subroutines(BuildCtx *ctx)
| ldr TMP0w, CCSTATE:x0->spadj
| ldrb TMP1w, CCSTATE->nsp
| add TMP2, CCSTATE, #offsetof(CCallState, stack)
| subs TMP1, TMP1, #1
| subs TMP1, TMP1, #8
| ldr TMP3, CCSTATE->func
| sub sp, sp, TMP0
| bmi >2
|1: // Copy stack slots
| ldr TMP0, [TMP2, TMP1, lsl #3]
| str TMP0, [sp, TMP1, lsl #3]
| subs TMP1, TMP1, #1
| ldr TMP0, [TMP2, TMP1]
| str TMP0, [sp, TMP1]
| subs TMP1, TMP1, #8
| bpl <1
|2:
| ldp x0, x1, CCSTATE->gpr[0]
Expand Down
1 change: 0 additions & 1 deletion src/vm_mips.dasc
Expand Up @@ -2951,7 +2951,6 @@ static void build_subroutines(BuildCtx *ctx)
| move TMP2, sp
| subu sp, sp, TMP1
| sw ra, -4(TMP2)
| sll CARG2, CARG2, 2
| sw r16, -8(TMP2)
| sw CCSTATE, -12(TMP2)
| move r16, TMP2
Expand Down
1 change: 0 additions & 1 deletion src/vm_mips64.dasc
Expand Up @@ -3065,7 +3065,6 @@ static void build_subroutines(BuildCtx *ctx)
| move TMP2, sp
| dsubu sp, sp, TMP1
| sd ra, -8(TMP2)
| sll CARG2, CARG2, 3
| sd r16, -16(TMP2)
| sd CCSTATE, -24(TMP2)
| move r16, TMP2
Expand Down
3 changes: 1 addition & 2 deletions src/vm_ppc.dasc
Expand Up @@ -3269,14 +3269,13 @@ static void build_subroutines(BuildCtx *ctx)
| stw TMP0, 4(sp)
| cmpwi cr1, CARG3, 0
| mr TMP2, sp
| addic. CARG2, CARG2, -1
| addic. CARG2, CARG2, -4
| stwux sp, sp, TMP1
| crnot 4*cr1+eq, 4*cr1+eq // For vararg calls.
| stw r14, -4(TMP2)
| stw CCSTATE, -8(TMP2)
| mr r14, TMP2
| la TMP1, CCSTATE->stack
| slwi CARG2, CARG2, 2
| blty >2
| la TMP2, 8(sp)
|1:
Expand Down
8 changes: 4 additions & 4 deletions src/vm_x64.dasc
Expand Up @@ -2755,12 +2755,12 @@ static void build_subroutines(BuildCtx *ctx)
|
| // Copy stack slots.
| movzx ecx, byte CCSTATE->nsp
| sub ecx, 1
| sub ecx, 8
| js >2
|1:
| mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
| mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
| sub ecx, 1
| mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
| mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
| sub ecx, 8
| jns <1
|2:
|
Expand Down
22 changes: 14 additions & 8 deletions src/vm_x86.dasc
Expand Up @@ -3314,19 +3314,25 @@ static void build_subroutines(BuildCtx *ctx)
|
| // Copy stack slots.
| movzx ecx, byte CCSTATE->nsp
| sub ecx, 1
|.if X64
| sub ecx, 8
| js >2
|1:
|.if X64
| mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
| mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
| mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
| mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
| sub ecx, 8
| jns <1
|2:
|.else
| mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
| mov [esp+ecx*4], eax
|.endif
| sub ecx, 1
| sub ecx, 4
| js >2
|1:
| mov eax, [CCSTATE+ecx+offsetof(CCallState, stack)]
| mov [esp+ecx], eax
| sub ecx, 4
| jns <1
|2:
|.endif
|
|.if X64
| movzx eax, byte CCSTATE->nfpr
Expand Down

0 comments on commit cf903ed

Please sign in to comment.