diff --git a/module/icp/asm-x86_64/aes/aes_aesni.S b/module/icp/asm-x86_64/aes/aes_aesni.S index f622235bd15b..4f3fe3ec65d6 100644 --- a/module/icp/asm-x86_64/aes/aes_aesni.S +++ b/module/icp/asm-x86_64/aes/aes_aesni.S @@ -378,7 +378,7 @@ rijndael_key_setup_enc_intel_local: FRAME_END RET -.align 4 +.balign 4 .Lenc_key192: cmp $192, %KEYSIZE32 jnz .Lenc_key128 @@ -415,7 +415,7 @@ rijndael_key_setup_enc_intel_local: FRAME_END RET -.align 4 +.balign 4 .Lenc_key128: cmp $128, %KEYSIZE32 jnz .Lenc_key_invalid_key_bits @@ -522,7 +522,7 @@ FRAME_BEGIN add %AESKEY, %ROUNDS64 mov %ROUNDS64, %ENDAESKEY -.align 4 +.balign 4 .Ldec_key_reorder_loop: movups (%AESKEY), %xmm0 movups (%ROUNDS64), %xmm1 @@ -533,7 +533,7 @@ FRAME_BEGIN cmp %AESKEY, %ROUNDS64 ja .Ldec_key_reorder_loop -.align 4 +.balign 4 .Ldec_key_inv_loop: movups (%rcx), %xmm0 // Convert an encryption round key to a form usable for decryption @@ -622,7 +622,7 @@ ENTRY_NP(aes_encrypt_intel) movups -0x50(%KEYP), %KEY aesenc %KEY, %STATE -.align 4 +.balign 4 .Lenc192: // AES 192 and 256 movups -0x40(%KEYP), %KEY @@ -630,7 +630,7 @@ ENTRY_NP(aes_encrypt_intel) movups -0x30(%KEYP), %KEY aesenc %KEY, %STATE -.align 4 +.balign 4 .Lenc128: // AES 128, 192, and 256 movups -0x20(%KEYP), %KEY @@ -705,7 +705,7 @@ ENTRY_NP(aes_decrypt_intel) movups -0x50(%KEYP), %KEY aesdec %KEY, %STATE -.align 4 +.balign 4 .Ldec192: // AES 192 and 256 movups -0x40(%KEYP), %KEY @@ -713,7 +713,7 @@ ENTRY_NP(aes_decrypt_intel) movups -0x30(%KEYP), %KEY aesdec %KEY, %STATE -.align 4 +.balign 4 .Ldec128: // AES 128, 192, and 256 movups -0x20(%KEYP), %KEY diff --git a/module/icp/asm-x86_64/aes/aes_amd64.S b/module/icp/asm-x86_64/aes/aes_amd64.S index d5cf4040fb93..c4870a28ead6 100644 --- a/module/icp/asm-x86_64/aes/aes_amd64.S +++ b/module/icp/asm-x86_64/aes/aes_amd64.S @@ -694,7 +694,7 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4], * unsigned char *out, const aes_encrypt_ctx cx[1])/ */ SECTION_STATIC -.align 64 +.balign 64 enc_tab: enc_vals(u8) #ifdef LAST_ROUND_TABLES @@ -800,7 +800,7 @@ ENTRY_NP(aes_encrypt_amd64) * unsigned char *out, const aes_encrypt_ctx cx[1])/ */ SECTION_STATIC -.align 64 +.balign 64 dec_tab: dec_vals(v8) #ifdef LAST_ROUND_TABLES diff --git a/module/icp/asm-x86_64/blake3/blake3_avx2.S b/module/icp/asm-x86_64/blake3/blake3_avx2.S index 8f9e766486f1..0ebec5c1095e 100644 --- a/module/icp/asm-x86_64/blake3/blake3_avx2.S +++ b/module/icp/asm-x86_64/blake3/blake3_avx2.S @@ -1791,7 +1791,6 @@ ENTRY_ALIGN(zfs_blake3_hash_many_avx2, 64) SET_SIZE(zfs_blake3_hash_many_avx2) SECTION_STATIC -.section .rodata .p2align 6 ADD0: diff --git a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S index 75dd2c721f56..909b2147dff9 100644 --- a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S +++ b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S @@ -53,12 +53,17 @@ /* Windows userland links with OpenSSL */ #if !defined (_WIN32) || defined (_KERNEL) +/* Apple needs _ */ +#if defined (__APPLE__) +#define gcm_avx_can_use_movbe _gcm_avx_can_use_movbe +#endif + .extern gcm_avx_can_use_movbe .text #ifdef HAVE_MOVBE -.align 32 +.balign 32 FUNCTION(_aesni_ctr32_ghash_6x) .cfi_startproc ENDBR @@ -75,7 +80,7 @@ FUNCTION(_aesni_ctr32_ghash_6x) vmovdqu %xmm4,16+8(%rsp) jmp .Loop6x -.align 32 +.balign 32 .Loop6x: addl $100663296,%ebx jc .Lhandle_ctr32 @@ -287,7 +292,7 @@ FUNCTION(_aesni_ctr32_ghash_6x) vmovups 224-128(%rcx),%xmm1 jmp .Lenc_tail -.align 32 +.balign 32 .Lhandle_ctr32: vmovdqu (%r11),%xmm0 vpshufb %xmm0,%xmm1,%xmm6 @@ -309,7 +314,7 @@ FUNCTION(_aesni_ctr32_ghash_6x) vpshufb %xmm0,%xmm1,%xmm1 jmp .Lresume_ctr32 -.align 32 +.balign 32 .Lenc_tail: vaesenc %xmm15,%xmm9,%xmm9 vmovdqu %xmm7,16+8(%rsp) @@ -374,7 +379,7 @@ FUNCTION(_aesni_ctr32_ghash_6x) SET_SIZE(_aesni_ctr32_ghash_6x) #endif /* ifdef HAVE_MOVBE */ -.align 32 +.balign 32 FUNCTION(_aesni_ctr32_ghash_no_movbe_6x) .cfi_startproc ENDBR @@ -391,7 +396,7 @@ FUNCTION(_aesni_ctr32_ghash_no_movbe_6x) vmovdqu %xmm4,16+8(%rsp) jmp .Loop6x_nmb -.align 32 +.balign 32 .Loop6x_nmb: addl $100663296,%ebx jc .Lhandle_ctr32_nmb @@ -615,7 +620,7 @@ FUNCTION(_aesni_ctr32_ghash_no_movbe_6x) vmovups 224-128(%rcx),%xmm1 jmp .Lenc_tail_nmb -.align 32 +.balign 32 .Lhandle_ctr32_nmb: vmovdqu (%r11),%xmm0 vpshufb %xmm0,%xmm1,%xmm6 @@ -637,7 +642,7 @@ FUNCTION(_aesni_ctr32_ghash_no_movbe_6x) vpshufb %xmm0,%xmm1,%xmm1 jmp .Lresume_ctr32_nmb -.align 32 +.balign 32 .Lenc_tail_nmb: vaesenc %xmm15,%xmm9,%xmm9 vmovdqu %xmm7,16+8(%rsp) @@ -818,7 +823,7 @@ ENTRY_ALIGN(aesni_gcm_decrypt, 32) .cfi_endproc SET_SIZE(aesni_gcm_decrypt) -.align 32 +.balign 32 FUNCTION(_aesni_ctr32_6x) .cfi_startproc ENDBR @@ -843,7 +848,7 @@ FUNCTION(_aesni_ctr32_6x) vpxor %xmm4,%xmm14,%xmm14 jmp .Loop_ctr32 -.align 16 +.balign 16 .Loop_ctr32: vaesenc %xmm15,%xmm9,%xmm9 vaesenc %xmm15,%xmm10,%xmm10 @@ -886,7 +891,7 @@ FUNCTION(_aesni_ctr32_6x) leaq 96(%rsi),%rsi RET -.align 32 +.balign 32 .Lhandle_ctr32_2: vpshufb %xmm0,%xmm1,%xmm6 vmovdqu 48(%r11),%xmm5 @@ -1237,7 +1242,7 @@ SET_SIZE(atomic_toggle_boolean_nv) SECTION_STATIC -.align 64 +.balign 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .Lpoly: @@ -1249,7 +1254,7 @@ SECTION_STATIC .Lone_lsb: .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 .byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.align 64 +.balign 64 /* Mark the stack non-executable. */ #if defined(__linux__) && defined(__ELF__) diff --git a/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S b/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S index eb9514e10cda..dec782fda33e 100644 --- a/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S +++ b/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S @@ -101,8 +101,8 @@ gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) { // static uint8_t byte_swap16_mask[] = { // 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 }; -.section .rodata -.align XMM_ALIGN +SECTION_STATIC +.balign XMM_ALIGN .Lbyte_swap16_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 diff --git a/module/icp/asm-x86_64/modes/ghash-x86_64.S b/module/icp/asm-x86_64/modes/ghash-x86_64.S index d48b4f2155cc..f62e056d4b64 100644 --- a/module/icp/asm-x86_64/modes/ghash-x86_64.S +++ b/module/icp/asm-x86_64/modes/ghash-x86_64.S @@ -188,7 +188,7 @@ ENTRY_ALIGN(gcm_init_htab_avx, 32) vpxor %xmm2,%xmm6,%xmm6 movq $4,%r10 jmp .Linit_start_avx -.align 32 +.balign 32 .Linit_loop_avx: vpalignr $8,%xmm3,%xmm4,%xmm5 vmovdqu %xmm5,-16(%rdi) @@ -386,7 +386,7 @@ ENTRY_ALIGN(gcm_ghash_avx, 32) subq $0x80,%rcx jmp .Loop8x_avx -.align 32 +.balign 32 .Loop8x_avx: vpunpckhqdq %xmm15,%xmm15,%xmm8 vmovdqu 112(%rdx),%xmm14 @@ -506,7 +506,7 @@ ENTRY_ALIGN(gcm_ghash_avx, 32) addq $0x80,%rcx jmp .Ltail_no_xor_avx -.align 32 +.balign 32 .Lshort_avx: vmovdqu -16(%rdx,%rcx,1),%xmm14 leaq (%rdx,%rcx,1),%rdx @@ -610,7 +610,7 @@ ENTRY_ALIGN(gcm_ghash_avx, 32) subq $0x10,%rcx jmp .Ltail_avx -.align 32 +.balign 32 .Ltail_avx: vpxor %xmm10,%xmm15,%xmm15 .Ltail_no_xor_avx: @@ -658,7 +658,7 @@ SET_SIZE(gcm_ghash_avx) #endif /* !_WIN32 || _KERNEL */ SECTION_STATIC -.align 64 +.balign 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .L0x1c2_polynomial: @@ -667,7 +667,7 @@ SECTION_STATIC .long 7,0,7,0 .L7_mask_poly: .long 7,0,450,0 -.align 64 +.balign 64 SET_OBJ(.Lrem_4bit) .Lrem_4bit: .long 0,0,0,471859200,0,943718400,0,610271232 @@ -710,7 +710,7 @@ SET_OBJ(.Lrem_8bit) .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.align 64 +.balign 64 /* Mark the stack non-executable. */ #if defined(__linux__) && defined(__ELF__) diff --git a/module/icp/asm-x86_64/sha2/sha256_impl.S b/module/icp/asm-x86_64/sha2/sha256_impl.S index 321d5da461db..f1fde51c1d69 100644 --- a/module/icp/asm-x86_64/sha2/sha256_impl.S +++ b/module/icp/asm-x86_64/sha2/sha256_impl.S @@ -133,7 +133,7 @@ ENTRY_NP(SHA256TransformBlocks) mov 4*7(%rdi),%r11d jmp .Lloop -.align 16 +.balign 16 .Lloop: xor %rdi,%rdi mov 4*0(%rsi),%r12d @@ -873,7 +873,7 @@ ENTRY_NP(SHA256TransformBlocks) add %r14d,%eax # h+=Maj(a,b,c) jmp .Lrounds_16_xx -.align 16 +.balign 16 .Lrounds_16_xx: mov 4(%rsp),%r13d mov 56(%rsp),%r12d @@ -2063,8 +2063,8 @@ ENTRY_NP(SHA256TransformBlocks) .cfi_endproc SET_SIZE(SHA256TransformBlocks) -.section .rodata -.align 64 +SECTION_STATIC +.balign 64 SET_OBJ(K256) K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 diff --git a/module/icp/asm-x86_64/sha2/sha512_impl.S b/module/icp/asm-x86_64/sha2/sha512_impl.S index 180f8e366060..b2f7d4863d8a 100644 --- a/module/icp/asm-x86_64/sha2/sha512_impl.S +++ b/module/icp/asm-x86_64/sha2/sha512_impl.S @@ -134,7 +134,7 @@ ENTRY_NP(SHA512TransformBlocks) mov 8*7(%rdi),%r11 jmp .Lloop -.align 16 +.balign 16 .Lloop: xor %rdi,%rdi mov 8*0(%rsi),%r12 @@ -874,7 +874,7 @@ ENTRY_NP(SHA512TransformBlocks) add %r14,%rax # h+=Maj(a,b,c) jmp .Lrounds_16_xx -.align 16 +.balign 16 .Lrounds_16_xx: mov 8(%rsp),%r13 mov 112(%rsp),%r12 @@ -2064,8 +2064,8 @@ ENTRY_NP(SHA512TransformBlocks) .cfi_endproc SET_SIZE(SHA512TransformBlocks) -.section .rodata -.align 64 +SECTION_STATIC +.balign 64 SET_OBJ(K512) K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd @@ -2113,4 +2113,3 @@ K512: #if defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif -