Skip to content

Commit

Permalink
Merge branch 'master' into t_mod_New
Browse files Browse the repository at this point in the history
  • Loading branch information
jpsdr committed Mar 16, 2024
2 parents 79ab451 + 585e019 commit 5e8a29f
Showing 1 changed file with 90 additions and 44 deletions.
134 changes: 90 additions & 44 deletions common/x86/x86inc.asm
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
%endif

%define HAVE_PRIVATE_EXTERN 1
%ifdef __NASM_VER__
%ifdef __NASM_VERSION_ID__
%use smartalign
%if __NASM_VERSION_ID__ < 0x020e0000 ; 2.14
%define HAVE_PRIVATE_EXTERN 0
Expand Down Expand Up @@ -393,7 +393,24 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%endif
%endmacro

%macro ALLOC_STACK 0-2 0, 0 ; stack_size, n_xmm_regs (for win64 only)
%macro RESET_STACK_STATE 0
%ifidn rstk, rsp
%assign stack_offset stack_offset - stack_size_padded
%else
%xdefine rstk rsp
%endif
%assign stack_size 0
%assign stack_size_padded 0
%assign xmm_regs_used 0
%endmacro

%macro ALLOC_STACK 0-2 0, 0 ; stack_size, n_xmm_regs
RESET_STACK_STATE
%ifnum %2
%if mmsize != 8
%assign xmm_regs_used %2
%endif
%endif
%ifnum %1
%if %1 != 0
%assign %%pad 0
Expand All @@ -403,11 +420,8 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%endif
%if WIN64
%assign %%pad %%pad + 32 ; shadow space
%if mmsize != 8
%assign xmm_regs_used %2
%if xmm_regs_used > 8
%assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
%endif
%if xmm_regs_used > 8
%assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
%endif
%endif
%if required_stack_alignment <= STACK_ALIGNMENT
Expand Down Expand Up @@ -503,35 +517,62 @@ DECLARE_REG 14, R13, 120
%endif
%endmacro

%macro WIN64_PUSH_XMM 0
; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated.
%if xmm_regs_used > 6 + high_mm_regs
movaps [rstk + stack_offset + 8], xmm6
%endif
%if xmm_regs_used > 7 + high_mm_regs
movaps [rstk + stack_offset + 24], xmm7
%endif
%assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
%if %%xmm_regs_on_stack > 0
%assign %%i 8
%rep %%xmm_regs_on_stack
movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i
%assign %%i %%i+1
%endrep
; Push XMM registers to the stack. If no argument is specified all used register
; will be pushed, otherwise only push previously unpushed registers.
%macro WIN64_PUSH_XMM 0-2 ; new_xmm_regs_used, xmm_regs_pushed
%if mmsize != 8
%if %0 == 2
%assign %%pushed %2
%assign xmm_regs_used %1
%elif %0 == 1
%assign %%pushed xmm_regs_used
%assign xmm_regs_used %1
%else
%assign %%pushed 0
%endif
; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated.
%if %%pushed <= 6 + high_mm_regs && xmm_regs_used > 6 + high_mm_regs
movaps [rstk + stack_offset + 8], xmm6
%endif
%if %%pushed <= 7 + high_mm_regs && xmm_regs_used > 7 + high_mm_regs
movaps [rstk + stack_offset + 24], xmm7
%endif
%assign %%pushed %%pushed - high_mm_regs - 8
%if %%pushed < 0
%assign %%pushed 0
%endif
%assign %%regs_to_push xmm_regs_used - %%pushed - high_mm_regs - 8
%if %%regs_to_push > 0
ASSERT (%%regs_to_push + %%pushed) * 16 <= stack_size_padded - stack_size - 32
%assign %%i %%pushed + 8
%rep %%regs_to_push
movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i
%assign %%i %%i+1
%endrep
%endif
%endif
%endmacro

%macro WIN64_SPILL_XMM 1
%assign xmm_regs_used %1
ASSERT xmm_regs_used <= 16 + high_mm_regs
%assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
%if %%xmm_regs_on_stack > 0
; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
%assign %%pad %%xmm_regs_on_stack*16 + 32
%assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
SUB rsp, stack_size_padded
; Allocated stack space for XMM registers and push all, or a subset, of those
%macro WIN64_SPILL_XMM 1-2 ; xmm_regs_used, xmm_regs_reserved
RESET_STACK_STATE
%if mmsize != 8
%assign xmm_regs_used %1
ASSERT xmm_regs_used <= 16 + high_mm_regs
%if %0 == 2
ASSERT %2 >= %1
%assign %%xmm_regs_on_stack %2 - high_mm_regs - 8
%else
%assign %%xmm_regs_on_stack %1 - high_mm_regs - 8
%endif
%if %%xmm_regs_on_stack > 0
; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
%assign %%pad %%xmm_regs_on_stack*16 + 32
%assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
SUB rsp, stack_size_padded
%endif
WIN64_PUSH_XMM
%endif
WIN64_PUSH_XMM
%endmacro

%macro WIN64_RESTORE_XMM_INTERNAL 0
Expand Down Expand Up @@ -562,9 +603,7 @@ DECLARE_REG 14, R13, 120

%macro WIN64_RESTORE_XMM 0
WIN64_RESTORE_XMM_INTERNAL
%assign stack_offset (stack_offset-stack_size_padded)
%assign stack_size_padded 0
%assign xmm_regs_used 0
RESET_STACK_STATE
%endmacro

%define has_epilogue regs_used > 7 || stack_size > 0 || vzeroupper_required || xmm_regs_used > 6+high_mm_regs
Expand Down Expand Up @@ -599,12 +638,11 @@ DECLARE_REG 14, R13, 72
%macro PROLOGUE 2-5+ 0, 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
%assign num_args %1
%assign regs_used %2
%assign xmm_regs_used %3
ASSERT regs_used >= num_args
SETUP_STACK_POINTER %4
ASSERT regs_used <= 15
PUSH_IF_USED 9, 10, 11, 12, 13, 14
ALLOC_STACK %4
ALLOC_STACK %4, %3
LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
%if %0 > 4
%ifnum %4
Expand Down Expand Up @@ -668,7 +706,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
SETUP_STACK_POINTER %4
ASSERT regs_used <= 7
PUSH_IF_USED 3, 4, 5, 6
ALLOC_STACK %4
ALLOC_STACK %4, %3
LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
%if %0 > 4
%ifnum %4
Expand Down Expand Up @@ -701,13 +739,21 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%endif ;======================================================================

%if WIN64 == 0
%macro WIN64_SPILL_XMM 1
%macro WIN64_SPILL_XMM 1-2
RESET_STACK_STATE
%if mmsize != 8
%assign xmm_regs_used %1
%endif
%endmacro
%macro WIN64_RESTORE_XMM_INTERNAL 0
%endmacro
%macro WIN64_RESTORE_XMM 0
RESET_STACK_STATE
%endmacro
%macro WIN64_PUSH_XMM 0
%macro WIN64_PUSH_XMM 0-2
%if mmsize != 8 && %0 >= 1
%assign xmm_regs_used %1
%endif
%endmacro
%endif

Expand Down Expand Up @@ -856,8 +902,8 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
; The GNU linker assumes the stack is executable by default.
[SECTION .note.GNU-stack noalloc noexec nowrite progbits]

%ifdef __NASM_VER__
%if __NASM_VER__ >= 0x020e0300 ; 2.14.03
%ifdef __NASM_VERSION_ID__
%if __NASM_VERSION_ID__ >= 0x020e0300 ; 2.14.03
%if ARCH_X86_64
; Control-flow Enforcement Technology (CET) properties.
[SECTION .note.gnu.property alloc noexec nowrite note align=gprsize]
Expand Down Expand Up @@ -964,13 +1010,13 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%endif

%if ARCH_X86_64 || cpuflag(sse2)
%ifdef __NASM_VER__
%ifdef __NASM_VERSION_ID__
ALIGNMODE p6
%else
CPU amdnop
%endif
%else
%ifdef __NASM_VER__
%ifdef __NASM_VERSION_ID__
ALIGNMODE nop
%else
CPU basicnop
Expand Down

0 comments on commit 5e8a29f

Please sign in to comment.