diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 436b21fd13463..bec13484450d7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1308,6 +1308,8 @@ class AArch64TargetLowering : public TargetLowering { bool preferScalarizeSplat(SDNode *N) const override; unsigned getMinimumJumpTableEntries() const override; + + bool softPromoteHalfType() const override { return true; } }; namespace AArch64 { diff --git a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll index 37186cf22ccc7..a34f7abcc22a3 100644 --- a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll +++ b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll @@ -70,22 +70,20 @@ define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 { ; NOFP16-NEXT: .cfi_offset w22, -32 ; NOFP16-NEXT: .cfi_offset w30, -48 ; NOFP16-NEXT: mov w21, w0 -; NOFP16-NEXT: and w0, w2, #0xffff +; NOFP16-NEXT: and w0, w1, #0xffff ; NOFP16-NEXT: mov x19, x3 -; NOFP16-NEXT: mov w20, w1 +; NOFP16-NEXT: mov w20, w2 ; NOFP16-NEXT: bl __gnu_h2f_ieee ; NOFP16-NEXT: mov w22, w0 ; NOFP16-NEXT: and w0, w21, #0xffff ; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: mov w21, w0 +; NOFP16-NEXT: mov w8, w0 ; NOFP16-NEXT: and w0, w20, #0xffff +; NOFP16-NEXT: orr x21, x8, x22, lsl #32 ; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: mov w8, w21 -; NOFP16-NEXT: // kill: def $w0 killed $w0 def $x0 -; NOFP16-NEXT: str w22, [x19, #8] -; NOFP16-NEXT: orr x8, x8, x0, lsl #32 +; NOFP16-NEXT: str x21, [x19] ; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; NOFP16-NEXT: str x8, [x19] +; NOFP16-NEXT: str w0, [x19, #8] ; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; NOFP16-NEXT: ret @@ -182,46 +180,17 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 { define void @outgoing_v4f16_return(ptr %ptr) #0 { ; NOFP16-LABEL: outgoing_v4f16_return: ; NOFP16: // %bb.0: -; NOFP16-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill -; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; NOFP16-NEXT: .cfi_def_cfa_offset 48 +; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; NOFP16-NEXT: .cfi_def_cfa_offset 16 ; NOFP16-NEXT: .cfi_offset w19, -8 -; NOFP16-NEXT: .cfi_offset w20, -16 -; NOFP16-NEXT: .cfi_offset w21, -24 -; NOFP16-NEXT: .cfi_offset w22, -32 -; NOFP16-NEXT: .cfi_offset w23, -40 -; NOFP16-NEXT: .cfi_offset w30, -48 +; NOFP16-NEXT: .cfi_offset w30, -16 ; NOFP16-NEXT: mov x19, x0 ; NOFP16-NEXT: bl v4f16_result -; NOFP16-NEXT: and w0, w0, #0xffff -; NOFP16-NEXT: mov w20, w1 -; NOFP16-NEXT: mov w21, w2 -; NOFP16-NEXT: mov w22, w3 -; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: mov w23, w0 -; NOFP16-NEXT: and w0, w20, #0xffff -; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: mov w20, w0 -; NOFP16-NEXT: and w0, w21, #0xffff -; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: mov w21, w0 -; NOFP16-NEXT: and w0, w22, #0xffff -; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: bl __gnu_f2h_ieee -; NOFP16-NEXT: strh w0, [x19, #6] -; NOFP16-NEXT: mov w0, w21 -; NOFP16-NEXT: bl __gnu_f2h_ieee -; NOFP16-NEXT: strh w0, [x19, #4] -; NOFP16-NEXT: mov w0, w20 -; NOFP16-NEXT: bl __gnu_f2h_ieee -; NOFP16-NEXT: strh w0, [x19, #2] -; NOFP16-NEXT: mov w0, w23 -; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: strh w2, [x19, #4] +; NOFP16-NEXT: strh w3, [x19, #6] +; NOFP16-NEXT: strh w1, [x19, #2] ; NOFP16-NEXT: strh w0, [x19] -; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; NOFP16-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload +; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; NOFP16-NEXT: ret %val = call <4 x half> @v4f16_result() store <4 x half> %val, ptr %ptr @@ -231,82 +200,21 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 { define void @outgoing_v8f16_return(ptr %ptr) #0 { ; NOFP16-LABEL: outgoing_v8f16_return: ; NOFP16: // %bb.0: -; NOFP16-NEXT: stp x30, x27, [sp, #-80]! // 16-byte Folded Spill -; NOFP16-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill -; NOFP16-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill -; NOFP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill -; NOFP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; NOFP16-NEXT: .cfi_def_cfa_offset 80 +; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; NOFP16-NEXT: .cfi_def_cfa_offset 16 ; NOFP16-NEXT: .cfi_offset w19, -8 -; NOFP16-NEXT: .cfi_offset w20, -16 -; NOFP16-NEXT: .cfi_offset w21, -24 -; NOFP16-NEXT: .cfi_offset w22, -32 -; NOFP16-NEXT: .cfi_offset w23, -40 -; NOFP16-NEXT: .cfi_offset w24, -48 -; NOFP16-NEXT: .cfi_offset w25, -56 -; NOFP16-NEXT: .cfi_offset w26, -64 -; NOFP16-NEXT: .cfi_offset w27, -72 -; NOFP16-NEXT: .cfi_offset w30, -80 +; NOFP16-NEXT: .cfi_offset w30, -16 ; NOFP16-NEXT: mov x19, x0 ; NOFP16-NEXT: bl v8f16_result -; NOFP16-NEXT: and w0, w0, #0xffff -; NOFP16-NEXT: mov w21, w1 -; NOFP16-NEXT: mov w22, w2 -; NOFP16-NEXT: mov w23, w3 -; NOFP16-NEXT: mov w24, w4 -; NOFP16-NEXT: mov w25, w5 -; NOFP16-NEXT: mov w26, w6 -; NOFP16-NEXT: mov w27, w7 -; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: mov w20, w0 -; NOFP16-NEXT: and w0, w21, #0xffff -; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: mov w21, w0 -; NOFP16-NEXT: and w0, w22, #0xffff -; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: mov w22, w0 -; NOFP16-NEXT: and w0, w23, #0xffff -; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: mov w23, w0 -; NOFP16-NEXT: and w0, w24, #0xffff -; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: mov w24, w0 -; NOFP16-NEXT: and w0, w25, #0xffff -; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: mov w25, w0 -; NOFP16-NEXT: and w0, w26, #0xffff -; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: mov w26, w0 -; NOFP16-NEXT: and w0, w27, #0xffff -; NOFP16-NEXT: bl __gnu_h2f_ieee -; NOFP16-NEXT: bl __gnu_f2h_ieee -; NOFP16-NEXT: strh w0, [x19, #14] -; NOFP16-NEXT: mov w0, w26 -; NOFP16-NEXT: bl __gnu_f2h_ieee -; NOFP16-NEXT: strh w0, [x19, #12] -; NOFP16-NEXT: mov w0, w25 -; NOFP16-NEXT: bl __gnu_f2h_ieee -; NOFP16-NEXT: strh w0, [x19, #10] -; NOFP16-NEXT: mov w0, w24 -; NOFP16-NEXT: bl __gnu_f2h_ieee -; NOFP16-NEXT: strh w0, [x19, #8] -; NOFP16-NEXT: mov w0, w23 -; NOFP16-NEXT: bl __gnu_f2h_ieee -; NOFP16-NEXT: strh w0, [x19, #6] -; NOFP16-NEXT: mov w0, w22 -; NOFP16-NEXT: bl __gnu_f2h_ieee -; NOFP16-NEXT: strh w0, [x19, #4] -; NOFP16-NEXT: mov w0, w21 -; NOFP16-NEXT: bl __gnu_f2h_ieee -; NOFP16-NEXT: strh w0, [x19, #2] -; NOFP16-NEXT: mov w0, w20 -; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: strh w5, [x19, #10] +; NOFP16-NEXT: strh w7, [x19, #14] +; NOFP16-NEXT: strh w6, [x19, #12] +; NOFP16-NEXT: strh w4, [x19, #8] +; NOFP16-NEXT: strh w3, [x19, #6] +; NOFP16-NEXT: strh w2, [x19, #4] +; NOFP16-NEXT: strh w1, [x19, #2] ; NOFP16-NEXT: strh w0, [x19] -; NOFP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; NOFP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; NOFP16-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload -; NOFP16-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload -; NOFP16-NEXT: ldp x30, x27, [sp], #80 // 16-byte Folded Reload +; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; NOFP16-NEXT: ret %val = call <8 x half> @v8f16_result() store <8 x half> %val, ptr %ptr