Skip to content

Commit

Permalink
Upstream: Unify assembly files with macOS
Browse files Browse the repository at this point in the history
Signed-off-by: Jorgen Lundman <lundman@lundman.net>
  • Loading branch information
lundman committed Feb 2, 2023
1 parent d696ca8 commit 3c59843
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 42 deletions.
16 changes: 8 additions & 8 deletions module/icp/asm-x86_64/aes/aes_aesni.S
Expand Up @@ -378,7 +378,7 @@ rijndael_key_setup_enc_intel_local:
FRAME_END
RET

.align 4
.balign 4
.Lenc_key192:
cmp $192, %KEYSIZE32
jnz .Lenc_key128
Expand Down Expand Up @@ -415,7 +415,7 @@ rijndael_key_setup_enc_intel_local:
FRAME_END
RET

.align 4
.balign 4
.Lenc_key128:
cmp $128, %KEYSIZE32
jnz .Lenc_key_invalid_key_bits
Expand Down Expand Up @@ -522,7 +522,7 @@ FRAME_BEGIN
add %AESKEY, %ROUNDS64
mov %ROUNDS64, %ENDAESKEY

.align 4
.balign 4
.Ldec_key_reorder_loop:
movups (%AESKEY), %xmm0
movups (%ROUNDS64), %xmm1
Expand All @@ -533,7 +533,7 @@ FRAME_BEGIN
cmp %AESKEY, %ROUNDS64
ja .Ldec_key_reorder_loop

.align 4
.balign 4
.Ldec_key_inv_loop:
movups (%rcx), %xmm0
// Convert an encryption round key to a form usable for decryption
Expand Down Expand Up @@ -622,15 +622,15 @@ ENTRY_NP(aes_encrypt_intel)
movups -0x50(%KEYP), %KEY
aesenc %KEY, %STATE

.align 4
.balign 4
.Lenc192:
// AES 192 and 256
movups -0x40(%KEYP), %KEY
aesenc %KEY, %STATE
movups -0x30(%KEYP), %KEY
aesenc %KEY, %STATE

.align 4
.balign 4
.Lenc128:
// AES 128, 192, and 256
movups -0x20(%KEYP), %KEY
Expand Down Expand Up @@ -705,15 +705,15 @@ ENTRY_NP(aes_decrypt_intel)
movups -0x50(%KEYP), %KEY
aesdec %KEY, %STATE

.align 4
.balign 4
.Ldec192:
// AES 192 and 256
movups -0x40(%KEYP), %KEY
aesdec %KEY, %STATE
movups -0x30(%KEYP), %KEY
aesdec %KEY, %STATE

.align 4
.balign 4
.Ldec128:
// AES 128, 192, and 256
movups -0x20(%KEYP), %KEY
Expand Down
4 changes: 2 additions & 2 deletions module/icp/asm-x86_64/aes/aes_amd64.S
Expand Up @@ -694,7 +694,7 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
* unsigned char *out, const aes_encrypt_ctx cx[1])/
*/
SECTION_STATIC
.align 64
.balign 64
enc_tab:
enc_vals(u8)
#ifdef LAST_ROUND_TABLES
Expand Down Expand Up @@ -800,7 +800,7 @@ ENTRY_NP(aes_encrypt_amd64)
* unsigned char *out, const aes_encrypt_ctx cx[1])/
*/
SECTION_STATIC
.align 64
.balign 64
dec_tab:
dec_vals(v8)
#ifdef LAST_ROUND_TABLES
Expand Down
1 change: 0 additions & 1 deletion module/icp/asm-x86_64/blake3/blake3_avx2.S
Expand Up @@ -1791,7 +1791,6 @@ ENTRY_ALIGN(zfs_blake3_hash_many_avx2, 64)
SET_SIZE(zfs_blake3_hash_many_avx2)

SECTION_STATIC
.section .rodata

.p2align 6
ADD0:
Expand Down
31 changes: 18 additions & 13 deletions module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
Expand Up @@ -53,12 +53,17 @@
/* Windows userland links with OpenSSL */
#if !defined (_WIN32) || defined (_KERNEL)

/* Apple needs _ */
#if defined (__APPLE__)
#define gcm_avx_can_use_movbe _gcm_avx_can_use_movbe
#endif

.extern gcm_avx_can_use_movbe

.text

#ifdef HAVE_MOVBE
.align 32
.balign 32
FUNCTION(_aesni_ctr32_ghash_6x)
.cfi_startproc
ENDBR
Expand All @@ -75,7 +80,7 @@ FUNCTION(_aesni_ctr32_ghash_6x)
vmovdqu %xmm4,16+8(%rsp)
jmp .Loop6x

.align 32
.balign 32
.Loop6x:
addl $100663296,%ebx
jc .Lhandle_ctr32
Expand Down Expand Up @@ -287,7 +292,7 @@ FUNCTION(_aesni_ctr32_ghash_6x)
vmovups 224-128(%rcx),%xmm1
jmp .Lenc_tail

.align 32
.balign 32
.Lhandle_ctr32:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
Expand All @@ -309,7 +314,7 @@ FUNCTION(_aesni_ctr32_ghash_6x)
vpshufb %xmm0,%xmm1,%xmm1
jmp .Lresume_ctr32

.align 32
.balign 32
.Lenc_tail:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
Expand Down Expand Up @@ -374,7 +379,7 @@ FUNCTION(_aesni_ctr32_ghash_6x)
SET_SIZE(_aesni_ctr32_ghash_6x)
#endif /* ifdef HAVE_MOVBE */

.align 32
.balign 32
FUNCTION(_aesni_ctr32_ghash_no_movbe_6x)
.cfi_startproc
ENDBR
Expand All @@ -391,7 +396,7 @@ FUNCTION(_aesni_ctr32_ghash_no_movbe_6x)
vmovdqu %xmm4,16+8(%rsp)
jmp .Loop6x_nmb

.align 32
.balign 32
.Loop6x_nmb:
addl $100663296,%ebx
jc .Lhandle_ctr32_nmb
Expand Down Expand Up @@ -615,7 +620,7 @@ FUNCTION(_aesni_ctr32_ghash_no_movbe_6x)
vmovups 224-128(%rcx),%xmm1
jmp .Lenc_tail_nmb

.align 32
.balign 32
.Lhandle_ctr32_nmb:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
Expand All @@ -637,7 +642,7 @@ FUNCTION(_aesni_ctr32_ghash_no_movbe_6x)
vpshufb %xmm0,%xmm1,%xmm1
jmp .Lresume_ctr32_nmb

.align 32
.balign 32
.Lenc_tail_nmb:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
Expand Down Expand Up @@ -818,7 +823,7 @@ ENTRY_ALIGN(aesni_gcm_decrypt, 32)
.cfi_endproc
SET_SIZE(aesni_gcm_decrypt)

.align 32
.balign 32
FUNCTION(_aesni_ctr32_6x)
.cfi_startproc
ENDBR
Expand All @@ -843,7 +848,7 @@ FUNCTION(_aesni_ctr32_6x)
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32

.align 16
.balign 16
.Loop_ctr32:
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
Expand Down Expand Up @@ -886,7 +891,7 @@ FUNCTION(_aesni_ctr32_6x)
leaq 96(%rsi),%rsi

RET
.align 32
.balign 32
.Lhandle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
Expand Down Expand Up @@ -1237,7 +1242,7 @@ SET_SIZE(atomic_toggle_boolean_nv)

SECTION_STATIC

.align 64
.balign 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.Lpoly:
Expand All @@ -1249,7 +1254,7 @@ SECTION_STATIC
.Lone_lsb:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.balign 64

/* Mark the stack non-executable. */
#if defined(__linux__) && defined(__ELF__)
Expand Down
4 changes: 2 additions & 2 deletions module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
Expand Up @@ -101,8 +101,8 @@ gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) {

// static uint8_t byte_swap16_mask[] = {
// 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 };
.section .rodata
.align XMM_ALIGN
SECTION_STATIC
.balign XMM_ALIGN
.Lbyte_swap16_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0

Expand Down
14 changes: 7 additions & 7 deletions module/icp/asm-x86_64/modes/ghash-x86_64.S
Expand Up @@ -188,7 +188,7 @@ ENTRY_ALIGN(gcm_init_htab_avx, 32)
vpxor %xmm2,%xmm6,%xmm6
movq $4,%r10
jmp .Linit_start_avx
.align 32
.balign 32
.Linit_loop_avx:
vpalignr $8,%xmm3,%xmm4,%xmm5
vmovdqu %xmm5,-16(%rdi)
Expand Down Expand Up @@ -386,7 +386,7 @@ ENTRY_ALIGN(gcm_ghash_avx, 32)
subq $0x80,%rcx
jmp .Loop8x_avx

.align 32
.balign 32
.Loop8x_avx:
vpunpckhqdq %xmm15,%xmm15,%xmm8
vmovdqu 112(%rdx),%xmm14
Expand Down Expand Up @@ -506,7 +506,7 @@ ENTRY_ALIGN(gcm_ghash_avx, 32)
addq $0x80,%rcx
jmp .Ltail_no_xor_avx

.align 32
.balign 32
.Lshort_avx:
vmovdqu -16(%rdx,%rcx,1),%xmm14
leaq (%rdx,%rcx,1),%rdx
Expand Down Expand Up @@ -610,7 +610,7 @@ ENTRY_ALIGN(gcm_ghash_avx, 32)
subq $0x10,%rcx
jmp .Ltail_avx

.align 32
.balign 32
.Ltail_avx:
vpxor %xmm10,%xmm15,%xmm15
.Ltail_no_xor_avx:
Expand Down Expand Up @@ -658,7 +658,7 @@ SET_SIZE(gcm_ghash_avx)
#endif /* !_WIN32 || _KERNEL */

SECTION_STATIC
.align 64
.balign 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.L0x1c2_polynomial:
Expand All @@ -667,7 +667,7 @@ SECTION_STATIC
.long 7,0,7,0
.L7_mask_poly:
.long 7,0,450,0
.align 64
.balign 64
SET_OBJ(.Lrem_4bit)
.Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
Expand Down Expand Up @@ -710,7 +710,7 @@ SET_OBJ(.Lrem_8bit)
.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE

.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.balign 64

/* Mark the stack non-executable. */
#if defined(__linux__) && defined(__ELF__)
Expand Down
8 changes: 4 additions & 4 deletions module/icp/asm-x86_64/sha2/sha256_impl.S
Expand Up @@ -133,7 +133,7 @@ ENTRY_NP(SHA256TransformBlocks)
mov 4*7(%rdi),%r11d
jmp .Lloop

.align 16
.balign 16
.Lloop:
xor %rdi,%rdi
mov 4*0(%rsi),%r12d
Expand Down Expand Up @@ -873,7 +873,7 @@ ENTRY_NP(SHA256TransformBlocks)

add %r14d,%eax # h+=Maj(a,b,c)
jmp .Lrounds_16_xx
.align 16
.balign 16
.Lrounds_16_xx:
mov 4(%rsp),%r13d
mov 56(%rsp),%r12d
Expand Down Expand Up @@ -2063,8 +2063,8 @@ ENTRY_NP(SHA256TransformBlocks)
.cfi_endproc
SET_SIZE(SHA256TransformBlocks)

.section .rodata
.align 64
SECTION_STATIC
.balign 64
SET_OBJ(K256)
K256:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
Expand Down
9 changes: 4 additions & 5 deletions module/icp/asm-x86_64/sha2/sha512_impl.S
Expand Up @@ -134,7 +134,7 @@ ENTRY_NP(SHA512TransformBlocks)
mov 8*7(%rdi),%r11
jmp .Lloop

.align 16
.balign 16
.Lloop:
xor %rdi,%rdi
mov 8*0(%rsi),%r12
Expand Down Expand Up @@ -874,7 +874,7 @@ ENTRY_NP(SHA512TransformBlocks)

add %r14,%rax # h+=Maj(a,b,c)
jmp .Lrounds_16_xx
.align 16
.balign 16
.Lrounds_16_xx:
mov 8(%rsp),%r13
mov 112(%rsp),%r12
Expand Down Expand Up @@ -2064,8 +2064,8 @@ ENTRY_NP(SHA512TransformBlocks)
.cfi_endproc
SET_SIZE(SHA512TransformBlocks)

.section .rodata
.align 64
SECTION_STATIC
.balign 64
SET_OBJ(K512)
K512:
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
Expand Down Expand Up @@ -2113,4 +2113,3 @@ K512:
#if defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

0 comments on commit 3c59843

Please sign in to comment.