Skip to content

Commit

Permalink
Save and resume full 128-bits of xmm argument registers in trampoline…
Browse files Browse the repository at this point in the history
…. Add definition of MonoContextSimdReg on amd64 for Windows platform and use for size/offset calculations.
  • Loading branch information
joncham committed Oct 19, 2017
1 parent c50b4ef commit ce596ec
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 5 deletions.
29 changes: 29 additions & 0 deletions mono/arch/amd64/amd64-codegen.h
Expand Up @@ -105,6 +105,10 @@ typedef enum
#define AMD64_ARGUMENT_REGS ((1<<AMD64_RDX) | (1<<AMD64_RCX) | (1<<AMD64_R8) | (1<<AMD64_R9))
#define AMD64_IS_ARGUMENT_REG(reg) (AMD64_ARGUMENT_REGS & (1 << (reg)))

/* xmm0-xmm3 for standard calling convention, additionally xmm4-xmm5 for __vectorcall (not currently used) */
#define AMD64_ARGUMENT_XREGS ((1<<AMD64_XMM0) | (1<<AMD64_XMM1) | (1<<AMD64_XMM2) | (1<<AMD64_XMM3) | (1<<AMD64_XMM4) | (1<<AMD64_XMM5))
#define AMD64_IS_ARGUMENT_XREG(reg) (AMD64_ARGUMENT_XREGS & (1 << (reg)))

#define AMD64_CALLEE_SAVED_REGS ((1<<AMD64_RDI) | (1<<AMD64_RSI) | (1<<AMD64_RBX) | (1<<AMD64_R12) | (1<<AMD64_R13) | (1<<AMD64_R14) | (1<<AMD64_R15) | (1<<AMD64_RBP))
#define AMD64_IS_CALLEE_SAVED_REG(reg) (AMD64_CALLEE_SAVED_REGS & (1 << (reg)))
#else
Expand All @@ -114,6 +118,9 @@ typedef enum
#define AMD64_ARGUMENT_REGS ((1<<AMD64_RDI) | (1<<AMD64_RSI) | (1<<AMD64_RDX) | (1<<AMD64_RCX) | (1<<AMD64_R8) | (1<<AMD64_R9))
#define AMD64_IS_ARGUMENT_REG(reg) (AMD64_ARGUMENT_REGS & (1 << (reg)))

#define AMD64_ARGUMENT_XREGS ((1<<AMD64_XMM0) | (1<<AMD64_XMM1) | (1<<AMD64_XMM2) | (1<<AMD64_XMM3) | (1<<AMD64_XMM4) | (1<<AMD64_XMM5) | (1<<AMD64_XMM6) | (1<<AMD64_XMM7))
#define AMD64_IS_ARGUMENT_XREG(reg) (AMD64_ARGUMENT_XREGS & (1 << (reg)))

#define AMD64_CALLEE_SAVED_REGS ((1<<AMD64_RBX) | (1<<AMD64_R12) | (1<<AMD64_R13) | (1<<AMD64_R14) | (1<<AMD64_R15) | (1<<AMD64_RBP))
#define AMD64_IS_CALLEE_SAVED_REG(reg) (AMD64_CALLEE_SAVED_REGS & (1 << (reg)))
#endif
Expand Down Expand Up @@ -537,6 +544,17 @@ typedef union {
amd64_codegen_post(inst); \
} while (0)

#define amd64_movdqu_reg_membase(inst,reg,basereg,disp) \
do { \
amd64_codegen_pre(inst); \
x86_prefix((inst), 0xf3); \
amd64_emit_rex(inst, 0, (reg), 0, (basereg)); \
*(inst)++ = (unsigned char)0x0f; \
*(inst)++ = (unsigned char)0x6f; \
x86_membase_emit ((inst), (reg) & 0x7, (basereg) & 0x7, (disp)); \
amd64_codegen_post(inst); \
} while (0)

#define amd64_movsd_reg_membase(inst,reg,basereg,disp) \
do { \
amd64_codegen_pre(inst); \
Expand All @@ -559,6 +577,17 @@ typedef union {
amd64_codegen_post(inst); \
} while (0)

#define amd64_movdqu_membase_reg(inst,basereg,disp,reg) \
do { \
amd64_codegen_pre(inst); \
x86_prefix((inst), 0xf3); \
amd64_emit_rex(inst, 0, (reg), 0, (basereg)); \
*(inst)++ = (unsigned char)0x0f; \
*(inst)++ = (unsigned char)0x7f; \
x86_membase_emit ((inst), (reg) & 0x7, (basereg) & 0x7, (disp)); \
amd64_codegen_post(inst); \
} while (0)

#define amd64_movsd_membase_reg(inst,basereg,disp,reg) \
do { \
amd64_codegen_pre(inst); \
Expand Down
10 changes: 6 additions & 4 deletions mono/mini/tramp-amd64.c
Expand Up @@ -360,8 +360,9 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf
/* cfa = rbp + cfa_offset */
mono_add_unwind_op_offset (unwind_ops, code, buf, i, - cfa_offset + saved_regs_offset + (i * sizeof (mgreg_t)));
}
for (i = 0; i < 8; ++i)
amd64_movsd_membase_reg (code, AMD64_RBP, saved_fpregs_offset + (i * sizeof(mgreg_t)), i);
for (i = 0; i < AMD64_XMM_NREG; ++i)
if (AMD64_IS_ARGUMENT_XREG (i))
amd64_movdqu_membase_reg (code, AMD64_RBP, saved_fpregs_offset + (i * sizeof(MonoContextSimdReg)), i);

/* Check that the stack is aligned */
amd64_mov_reg_reg (code, AMD64_R11, AMD64_RSP, sizeof (mgreg_t));
Expand Down Expand Up @@ -543,8 +544,9 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf
for (i = 0; i < AMD64_NREG; ++i)
if (AMD64_IS_ARGUMENT_REG (i) || i == AMD64_R10 || i == AMD64_RAX)
amd64_mov_reg_membase (code, i, AMD64_RBP, saved_regs_offset + (i * sizeof(mgreg_t)), sizeof(mgreg_t));
for (i = 0; i < 8; ++i)
amd64_movsd_reg_membase (code, i, AMD64_RBP, saved_fpregs_offset + (i * sizeof(mgreg_t)));
for (i = 0; i < AMD64_XMM_NREG; ++i)
if (AMD64_IS_ARGUMENT_XREG (i))
amd64_movdqu_reg_membase (code, i, AMD64_RBP, saved_fpregs_offset + (i * sizeof(MonoContextSimdReg)));

/* Restore stack */
#if TARGET_WIN32
Expand Down
5 changes: 4 additions & 1 deletion mono/utils/mono-context.h
Expand Up @@ -31,6 +31,9 @@ typedef struct __darwin_xmm_reg MonoContextSimdReg;
typedef struct __darwin_xmm_reg MonoContextSimdReg;
#elif defined(__linux__) && defined(__GLIBC__)
typedef struct _libc_xmmreg MonoContextSimdReg;
#elif defined(HOST_WIN32)
#include <emmintrin.h>
typedef __m128d MonoContextSimdReg;
#endif
#elif defined(TARGET_ARM64)
typedef __uint128_t MonoContextSimdReg;
Expand Down Expand Up @@ -247,7 +250,7 @@ typedef struct {

typedef struct {
mgreg_t gregs [AMD64_NREG];
#if defined(__APPLE__) || (defined(__linux__) && defined(__GLIBC__))
#if defined(__APPLE__) || (defined(__linux__) && defined(__GLIBC__)) || defined(HOST_WIN32)
MonoContextSimdReg fregs [AMD64_XMM_NREG];
#else
double fregs [AMD64_XMM_NREG];
Expand Down

0 comments on commit ce596ec

Please sign in to comment.