From 5e9cf7c6c1e115380204536ac37614d0ed196e25 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Mon, 17 Nov 2025 12:09:41 +0000 Subject: [PATCH 1/2] Add tests for splatting nzero, pinf, ninf and nan. --- llvm/test/CodeGen/AArch64/sve-vector-splat.ll | 340 +++++++++++++++++- 1 file changed, 338 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll index 5cca5539048b5..19e13ec484380 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -509,6 +509,342 @@ define @splat_nxv2bf16_imm() { ret splat(bfloat 1.0) } +define @splat_nzero_nxv2f16() { +; CHECK-LABEL: splat_nzero_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ret + ret splat (half -0.0) +} + +define @splat_nzero_nxv4f16() { +; CHECK-LABEL: splat_nzero_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ret + ret splat (half -0.0) +} + +define @splat_nzero_nxv8f16() { +; CHECK-LABEL: splat_nzero_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ret + ret splat (half -0.0) +} + +define @splat_nzero_nxv2f32() { +; CHECK-LABEL: splat_nzero_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ret + ret splat (float -0.0) +} + +define @splat_nzero_nxv4f32() { +; CHECK-LABEL: splat_nzero_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ret + ret splat (float -0.0) +} + +define @splat_nzero_nxv2f64() { +; CHECK-LABEL: splat_nzero_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: ret + ret splat (double -0.0) +} + +define @splat_nzero_nxv2bf16() { +; CHECK-LABEL: splat_nzero_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: fmov h0, w8 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + ret splat (bfloat -0.0) +} + +define @splat_nzero_nxv4bf16() { +; CHECK-LABEL: splat_nzero_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: fmov h0, w8 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + ret splat (bfloat -0.0) +} + +define @splat_nzero_nxv8bf16() { +; CHECK-LABEL: splat_nzero_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: fmov h0, w8 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + ret splat (bfloat -0.0) +} + +define @splat_pinf_nxv2f16() { +; CHECK-LABEL: splat_pinf_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #31744 // =0x7c00 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ret + ret splat (half 0x7FF0000000000000) +} + +define @splat_pinf_nxv4f16() { +; CHECK-LABEL: splat_pinf_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #31744 // =0x7c00 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ret + ret splat (half 0x7FF0000000000000) +} + +define @splat_pinf_nxv8f16() { +; CHECK-LABEL: splat_pinf_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #31744 // =0x7c00 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ret + ret splat (half 0x7FF0000000000000) +} + +define @splat_pinf_nxv2f32() { +; CHECK-LABEL: splat_pinf_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ret + ret splat (float 0x7FF0000000000000) +} + +define @splat_pinf_nxv4f32() { +; CHECK-LABEL: splat_pinf_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ret + ret splat (float 0x7FF0000000000000) +} + +define @splat_pinf_nxv2f64() { +; CHECK-LABEL: splat_pinf_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000 +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: ret + ret splat (double 0x7FF0000000000000) +} + +define @splat_pinf_nxv2bf16() { +; CHECK-LABEL: splat_pinf_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32640 // =0x7f80 +; CHECK-NEXT: fmov h0, w8 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF0000000000000) +} + +define @splat_pinf_nxv4bf16() { +; CHECK-LABEL: splat_pinf_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32640 // =0x7f80 +; CHECK-NEXT: fmov h0, w8 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF0000000000000) +} + +define @splat_pinf_nxv8bf16() { +; CHECK-LABEL: splat_pinf_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32640 // =0x7f80 +; CHECK-NEXT: fmov h0, w8 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF0000000000000) +} + +define @splat_ninf_nxv2f16() { +; CHECK-LABEL: splat_ninf_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #64512 // =0xfc00 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ret + ret splat (half 0xFFF0000000000000) +} + +define @splat_ninf_nxv4f16() { +; CHECK-LABEL: splat_ninf_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #64512 // =0xfc00 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ret + ret splat (half 0xFFF0000000000000) +} + +define @splat_ninf_nxv8f16() { +; CHECK-LABEL: splat_ninf_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #64512 // =0xfc00 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ret + ret splat (half 0xFFF0000000000000) +} + +define @splat_ninf_nxv2f32() { +; CHECK-LABEL: splat_ninf_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-8388608 // =0xff800000 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ret + ret splat (float 0xFFF0000000000000) +} + +define @splat_ninf_nxv4f32() { +; CHECK-LABEL: splat_ninf_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-8388608 // =0xff800000 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ret + ret splat (float 0xFFF0000000000000) +} + +define @splat_ninf_nxv2f64() { +; CHECK-LABEL: splat_ninf_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-4503599627370496 // =0xfff0000000000000 +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: ret + ret splat (double 0xFFF0000000000000) +} + +define @splat_ninf_nxv2bf16() { +; CHECK-LABEL: splat_ninf_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65408 // =0xff80 +; CHECK-NEXT: fmov h0, w8 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + ret splat (bfloat 0xFFF0000000000000) +} + +define @splat_ninf_nxv4bf16() { +; CHECK-LABEL: splat_ninf_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65408 // =0xff80 +; CHECK-NEXT: fmov h0, w8 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + ret splat (bfloat 0xFFF0000000000000) +} + +define @splat_ninf_nxv8bf16() { +; CHECK-LABEL: splat_ninf_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65408 // =0xff80 +; CHECK-NEXT: fmov h0, w8 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + ret splat (bfloat 0xFFF0000000000000) +} + +define @splat_nan_nxv2f16() { +; CHECK-LABEL: splat_nan_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32256 // =0x7e00 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ret + ret splat (half 0x7FF8000000000000) +} + +define @splat_nan_nxv4f16() { +; CHECK-LABEL: splat_nan_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32256 // =0x7e00 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ret + ret splat (half 0x7FF8000000000000) +} + +define @splat_nan_nxv8f16() { +; CHECK-LABEL: splat_nan_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32256 // =0x7e00 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ret + ret splat (half 0x7FF8000000000000) +} + +define @splat_nan_nxv2f32() { +; CHECK-LABEL: splat_nan_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2143289344 // =0x7fc00000 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ret + ret splat (float 0x7FF8000000000000) +} + +define @splat_nan_nxv4f32() { +; CHECK-LABEL: splat_nan_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2143289344 // =0x7fc00000 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ret + ret splat (float 0x7FF8000000000000) +} + +define @splat_nan_nxv2f64() { +; CHECK-LABEL: splat_nan_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #9221120237041090560 // =0x7ff8000000000000 +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: ret + ret splat (double 0x7FF8000000000000) +} + +define @splat_nan_nxv2bf16() { +; CHECK-LABEL: splat_nan_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32704 // =0x7fc0 +; CHECK-NEXT: fmov h0, w8 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF8000000000000) +} + +define @splat_nan_nxv4bf16() { +; CHECK-LABEL: splat_nan_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32704 // =0x7fc0 +; CHECK-NEXT: fmov h0, w8 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF8000000000000) +} + +define @splat_nan_nxv8bf16() { +; CHECK-LABEL: splat_nan_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32704 // =0x7fc0 +; CHECK-NEXT: fmov h0, w8 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF8000000000000) +} + define @splat_nxv4i32_fold( %x) { ; CHECK-LABEL: splat_nxv4i32_fold: ; CHECK: // %bb.0: @@ -581,8 +917,8 @@ define @splat_nxv2f64_imm_out_of_range() { ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: adrp x8, .LCPI60_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI60_0 +; CHECK-NEXT: adrp x8, .LCPI96_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI96_0 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret ret splat(double 3.33) From 22bfdf1eba89605a26be3413cd64d82e838d57bf Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Mon, 17 Nov 2025 12:57:54 +0000 Subject: [PATCH 2/2] [LLVM][CodeGen][SVE] Use DUPM for constantfp splats. This helps cases where the immediate range of FDUP is not sufficient. --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 71 +++--- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 2 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 25 +++ .../test/CodeGen/AArch64/sve-bf16-combines.ll | 8 +- llvm/test/CodeGen/AArch64/sve-fp-combine.ll | 15 +- .../CodeGen/AArch64/sve-fp-reduce-fadda.ll | 15 +- llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll | 121 +++++------ llvm/test/CodeGen/AArch64/sve-llrint.ll | 202 +++++++++--------- llvm/test/CodeGen/AArch64/sve-lrint.ll | 202 +++++++++--------- llvm/test/CodeGen/AArch64/sve-vector-splat.ll | 120 ++++------- llvm/test/CodeGen/AArch64/sve-vselect-imm.ll | 18 +- 11 files changed, 372 insertions(+), 427 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index f1db05dda4e40..08466667c0fa5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4403,43 +4403,46 @@ bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert) { - if (auto CNode = dyn_cast(N)) { - uint64_t ImmVal = CNode->getZExtValue(); - SDLoc DL(N); - - if (Invert) - ImmVal = ~ImmVal; + uint64_t ImmVal; + if (auto CI = dyn_cast(N)) + ImmVal = CI->getZExtValue(); + else if (auto CFP = dyn_cast(N)) + ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + else + return false; - // Shift mask depending on type size. - switch (VT.SimpleTy) { - case MVT::i8: - ImmVal &= 0xFF; - ImmVal |= ImmVal << 8; - ImmVal |= ImmVal << 16; - ImmVal |= ImmVal << 32; - break; - case MVT::i16: - ImmVal &= 0xFFFF; - ImmVal |= ImmVal << 16; - ImmVal |= ImmVal << 32; - break; - case MVT::i32: - ImmVal &= 0xFFFFFFFF; - ImmVal |= ImmVal << 32; - break; - case MVT::i64: - break; - default: - llvm_unreachable("Unexpected type"); - } + if (Invert) + ImmVal = ~ImmVal; - uint64_t encoding; - if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { - Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); - return true; - } + // Shift mask depending on type size. + switch (VT.SimpleTy) { + case MVT::i8: + ImmVal &= 0xFF; + ImmVal |= ImmVal << 8; + ImmVal |= ImmVal << 16; + ImmVal |= ImmVal << 32; + break; + case MVT::i16: + ImmVal &= 0xFFFF; + ImmVal |= ImmVal << 16; + ImmVal |= ImmVal << 32; + break; + case MVT::i32: + ImmVal &= 0xFFFFFFFF; + ImmVal |= ImmVal << 32; + break; + case MVT::i64: + break; + default: + llvm_unreachable("Unexpected type"); } - return false; + + uint64_t encoding; + if (!AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) + return false; + + Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64); + return true; } // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c8c21c4822ffe..e99b3f8ff07e0 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -989,7 +989,7 @@ let Predicates = [HasSVE_or_SME] in { (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>; // Duplicate FP immediate into all vector elements - let AddedComplexity = 2 in { + let AddedComplexity = 3 in { def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)), (FDUP_ZI_H fpimm16:$imm8)>; def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)), diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 1664f4ad0c8fa..1e771e1fb9403 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -347,6 +347,11 @@ def SVELogicalImm16Pat : ComplexPattern", def SVELogicalImm32Pat : ComplexPattern", []>; def SVELogicalImm64Pat : ComplexPattern", []>; +def SVELogicalFPImm16Pat : ComplexPattern", []>; +def SVELogicalFPImm32Pat : ComplexPattern", []>; +def SVELogicalFPImm64Pat : ComplexPattern", []>; +def SVELogicalBFPImmPat : ComplexPattern", []>; + def SVELogicalImm8NotPat : ComplexPattern", []>; def SVELogicalImm16NotPat : ComplexPattern", []>; def SVELogicalImm32NotPat : ComplexPattern", []>; @@ -2160,6 +2165,26 @@ multiclass sve_int_dup_mask_imm { (!cast(NAME) i64:$imm)>; def : Pat<(nxv2i64 (splat_vector (i64 (SVELogicalImm64Pat i64:$imm)))), (!cast(NAME) i64:$imm)>; + + def : Pat<(nxv8f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv4f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv4f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2f64 (splat_vector (f64 (SVELogicalFPImm64Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + + def : Pat<(nxv8bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv4bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))), + (!cast(NAME) i64:$imm)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll index 16e8feb0dc5bb..fc3e018f2ec7a 100644 --- a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll +++ b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll @@ -632,7 +632,6 @@ define @fsub_sel_fmul_negzero_nxv8bf16( @fsub_sel_fmul_negzero_nxv8bf16( @fsub_sel_fmul_negzero_nxv8bf16( @fadd_sel_fmul_d_negzero( %a, define @fsub_sel_fmul_h_negzero( %a, %b, %c, %mask) { ; CHECK-LABEL: fsub_sel_fmul_h_negzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: dupm z3.h, #0x8000 ; CHECK-NEXT: fmul z1.h, z1.h, z2.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h +; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h ; CHECK-NEXT: fsub z0.h, z0.h, z1.h ; CHECK-NEXT: ret %fmul = fmul %b, %c @@ -1150,10 +1149,9 @@ define @fsub_sel_fmul_h_negzero( %a, @fsub_sel_fmul_s_negzero( %a, %b, %c, %mask) { ; CHECK-LABEL: fsub_sel_fmul_s_negzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-NEXT: mov z3.s, #0x80000000 ; CHECK-NEXT: fmul z1.s, z1.s, z2.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s +; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s ; CHECK-NEXT: fsub z0.s, z0.s, z1.s ; CHECK-NEXT: ret %fmul = fmul %b, %c @@ -1166,10 +1164,9 @@ define @fsub_sel_fmul_s_negzero( %a, @fsub_sel_fmul_d_negzero( %a, %b, %c, %mask) { ; CHECK-LABEL: fsub_sel_fmul_d_negzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: fmul z1.d, z1.d, z2.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d +; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d ; CHECK-NEXT: fsub z0.d, z0.d, z1.d ; CHECK-NEXT: ret %fmul = fmul %b, %c diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll index 8750867c56731..1223ae1c0cbdd 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll @@ -51,10 +51,9 @@ define half @fadda_nxv6f16( %v, half %s) { ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: dupm z2.h, #0x8000 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: str z0, [sp] -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: st1h { z2.d }, p0, [sp, #3, mul vl] ; CHECK-NEXT: ptrue p0.h @@ -77,12 +76,11 @@ define half @fadda_nxv10f16( %v, half %s) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 -; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: str z1, [sp] +; CHECK-NEXT: addvl x8, sp, #1 ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: fadda h2, p0, h2, z0.h -; CHECK-NEXT: mov z0.h, w8 -; CHECK-NEXT: addvl x8, sp, #1 +; CHECK-NEXT: dupm z0.h, #0x8000 ; CHECK-NEXT: st1h { z0.d }, p1, [sp, #1, mul vl] ; CHECK-NEXT: ldr z1, [sp] ; CHECK-NEXT: str z1, [sp, #1, mul vl] @@ -105,11 +103,10 @@ define half @fadda_nxv12f16( %v, half %s) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 -; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: fadda h2, p0, h2, z0.h -; CHECK-NEXT: uunpklo z0.s, z1.h -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h ; CHECK-NEXT: fadda h2, p0, h2, z0.h ; CHECK-NEXT: fmov s0, s2 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll index 4ae7ac7b292e9..897ade00320db 100644 --- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll @@ -454,18 +454,17 @@ declare @llvm.fptosi.sat.nxv4f16.nxv4i64() define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.d, #0xffffffff80000000 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.d, #0xffffffff80000000 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x7fffffff +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.d, #0x7fffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h -; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f16.nxv2i32( %f) @@ -475,18 +474,17 @@ define @test_signed_v2f16_v2i32( %f) { define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.s, #0x80000000 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.s, #0x80000000 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.s, #0x7fffffff +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.s, #0x7fffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: fcvtzs z1.s, p1/m, z0.h -; CHECK-NEXT: sel z0.s, p2, z2.s, z1.s +; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i32( %f) @@ -496,26 +494,25 @@ define @test_signed_v4f16_v4i32( %f) { define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff +; CHECK-NEXT: uunpklo z2.s, z0.h +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: mov z3.s, #0x80000000 ; CHECK-NEXT: mov z4.s, #0x80000000 ; CHECK-NEXT: mov z5.h, w8 -; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h -; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.s, #0x7fffffff +; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z1.h +; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.s, #0x7fffffff ; CHECK-NEXT: fcmgt p3.h, p0/z, z0.h, z5.h -; CHECK-NEXT: fcvtzs z3.s, p1/m, z1.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z5.h +; CHECK-NEXT: fcvtzs z3.s, p1/m, z2.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z5.h ; CHECK-NEXT: fcvtzs z4.s, p2/m, z0.h -; CHECK-NEXT: fcmuo p2.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z0.s, p1, z2.s, z3.s -; CHECK-NEXT: sel z1.s, p3, z2.s, z4.s +; CHECK-NEXT: sel z0.s, p1, z1.s, z3.s +; CHECK-NEXT: sel z1.s, p3, z1.s, z4.s ; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret @@ -526,18 +523,17 @@ define @test_signed_v8f16_v8i32( %f) { define @test_signed_v4f16_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #63488 // =0xf800 +; CHECK-NEXT: dupm z1.h, #0xf800 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #30719 // =0x77ff +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.s, #32767 // =0x7fff -; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.h +; CHECK-NEXT: mov z1.s, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcvtzs z1.s, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.s, #32767 // =0x7fff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z0.s, p2, z1.s, z2.s +; CHECK-NEXT: sel z0.s, p1, z2.s, z1.s ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i16( %f) @@ -547,18 +543,17 @@ define @test_signed_v4f16_v4i16( %f) { define @test_signed_v8f16_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #63488 // =0xf800 +; CHECK-NEXT: dupm z1.h, #0xf800 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z2.h, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #30719 // =0x77ff +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, #32767 // =0x7fff -; CHECK-NEXT: fcvtzs z2.h, p1/m, z0.h +; CHECK-NEXT: mov z1.h, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcvtzs z1.h, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.h, #32767 // =0x7fff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z0.h, p2, z1.h, z2.h +; CHECK-NEXT: sel z0.h, p1, z2.h, z1.h ; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f16.nxv8i16( %f) @@ -568,18 +563,17 @@ define @test_signed_v8f16_v8i16( %f) { define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.d, #0x8000000000000000 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h -; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f16.nxv2i64( %f) @@ -589,26 +583,25 @@ define @test_signed_v2f16_v2i64( %f) { define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff +; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.h, w8 -; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h -; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z1.h +; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmgt p3.h, p0/z, z0.h, z5.h -; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z5.h +; CHECK-NEXT: fcvtzs z3.d, p1/m, z2.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z5.h ; CHECK-NEXT: fcvtzs z4.d, p2/m, z0.h -; CHECK-NEXT: fcmuo p2.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d -; CHECK-NEXT: sel z1.d, p3, z2.d, z4.d +; CHECK-NEXT: sel z0.d, p1, z1.d, z3.d +; CHECK-NEXT: sel z1.d, p3, z1.d, z4.d ; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-llrint.ll b/llvm/test/CodeGen/AArch64/sve-llrint.ll index f964d70e0a05c..c2bb0c81ab405 100644 --- a/llvm/test/CodeGen/AArch64/sve-llrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-llrint.ll @@ -5,9 +5,8 @@ define @llrint_v1i64_v1f16( %x) { ; CHECK-LABEL: llrint_v1i64_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h @@ -28,9 +27,8 @@ define @llrint_v1i64_v2f16( %x) { ; CHECK-LABEL: llrint_v1i64_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h @@ -52,10 +50,9 @@ define @llrint_v4i64_v4f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.d, #0x7fffffffffffffff @@ -92,10 +89,9 @@ define @llrint_v8i64_v8f16( %x) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z4.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z4.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z5.d, #0x8000000000000000 ; CHECK-NEXT: mov z6.d, #0x8000000000000000 ; CHECK-NEXT: mov z7.d, #0x8000000000000000 @@ -162,12 +158,13 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: uunpkhi z3.s, z0.h -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: uunpklo z7.s, z1.h ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: uunpkhi z1.s, z1.h -; CHECK-NEXT: mov z0.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.d, #0x8000000000000000 +; CHECK-NEXT: mov z29.h, w8 ; CHECK-NEXT: mov z31.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z4.d, z2.s ; CHECK-NEXT: uunpklo z24.d, z3.s @@ -175,10 +172,8 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: uunpkhi z6.d, z2.s ; CHECK-NEXT: uunpklo z26.d, z7.s ; CHECK-NEXT: uunpkhi z7.d, z7.s -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z30.d, z1.s -; CHECK-NEXT: mov z29.h, w8 ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: movprfx z27, z4 @@ -191,17 +186,17 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: frintx z26.h, p0/m, z26.h ; CHECK-NEXT: frintx z7.h, p0/m, z7.h ; CHECK-NEXT: mov z6.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p1.h, p0/z, z27.h, z2.h -; CHECK-NEXT: fcmge p3.h, p0/z, z24.h, z2.h -; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z2.h -; CHECK-NEXT: fcmge p2.h, p0/z, z28.h, z2.h -; CHECK-NEXT: fcmge p5.h, p0/z, z26.h, z2.h -; CHECK-NEXT: fcvtzs z0.d, p1/m, z27.h +; CHECK-NEXT: fcmge p1.h, p0/z, z27.h, z0.h +; CHECK-NEXT: fcmge p3.h, p0/z, z24.h, z0.h +; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z0.h +; CHECK-NEXT: fcmge p2.h, p0/z, z28.h, z0.h +; CHECK-NEXT: fcmge p5.h, p0/z, z26.h, z0.h +; CHECK-NEXT: fcvtzs z2.d, p1/m, z27.h ; CHECK-NEXT: fcvtzs z4.d, p3/m, z24.h ; CHECK-NEXT: fcvtzs z5.d, p4/m, z25.h ; CHECK-NEXT: fcmgt p3.h, p0/z, z27.h, z29.h ; CHECK-NEXT: fcvtzs z3.d, p2/m, z28.h -; CHECK-NEXT: fcmge p4.h, p0/z, z7.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z7.h, z0.h ; CHECK-NEXT: fcvtzs z6.d, p5/m, z26.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z27.h, z27.h ; CHECK-NEXT: movprfx z27, z30 @@ -212,7 +207,7 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: fcmuo p2.h, p0/z, z28.h, z28.h ; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z31.d, p4/m, z7.h -; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z0.h ; CHECK-NEXT: fcmgt p6.h, p0/z, z24.h, z29.h ; CHECK-NEXT: fcmuo p7.h, p0/z, z24.h, z24.h ; CHECK-NEXT: mov z24.d, #0x7fffffffffffffff @@ -221,31 +216,31 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: fcmuo p10.h, p0/z, z25.h, z25.h ; CHECK-NEXT: mov z25.d, #0x8000000000000000 ; CHECK-NEXT: sel z1.d, p5, z24.d, z3.d -; CHECK-NEXT: mov z0.d, p3/m, z24.d ; CHECK-NEXT: sel z3.d, p8, z24.d, z5.d -; CHECK-NEXT: fcmge p4.h, p0/z, z30.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z30.h, z0.h +; CHECK-NEXT: sel z0.d, p3, z24.d, z2.d ; CHECK-NEXT: sel z2.d, p6, z24.d, z4.d -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p10/m, #0 // =0x0 ; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload +; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: mov z2.d, p7/m, #0 // =0x0 ; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload -; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: fcvtzs z25.d, p4/m, z30.h +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmgt p5.h, p0/z, z7.h, z29.h ; CHECK-NEXT: fcmgt p6.h, p0/z, z27.h, z29.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z30.h, z29.h ; CHECK-NEXT: sel z4.d, p9, z24.d, z6.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z30.h, z29.h ; CHECK-NEXT: fcmuo p8.h, p0/z, z7.h, z7.h ; CHECK-NEXT: sel z5.d, p5, z24.d, z31.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: sel z6.d, p6, z24.d, z28.d ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p9.h, p0/z, z27.h, z27.h +; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: sel z7.d, p4, z24.d, z25.d ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload -; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.h, p0/z, z30.h, z30.h @@ -302,48 +297,47 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16 ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16 ; CHECK-NEXT: uunpklo z4.s, z0.h -; CHECK-NEXT: uunpkhi z5.s, z0.h -; CHECK-NEXT: mov w9, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z6.s, z1.h -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpkhi z28.s, z1.h -; CHECK-NEXT: mov z30.h, w9 +; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: mov w9, #31743 // =0x7bff +; CHECK-NEXT: uunpklo z5.s, z1.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z28.h, #-1025 // =0xfffffffffffffbff +; CHECK-NEXT: uunpkhi z29.s, z1.h +; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z13.s, z2.h ; CHECK-NEXT: mov z9.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z14.s, z2.h ; CHECK-NEXT: uunpkhi z17.s, z3.h -; CHECK-NEXT: uunpklo z7.d, z4.s +; CHECK-NEXT: uunpklo z6.d, z4.s ; CHECK-NEXT: uunpkhi z4.d, z4.s -; CHECK-NEXT: uunpklo z27.d, z5.s -; CHECK-NEXT: uunpklo z31.d, z6.s -; CHECK-NEXT: uunpkhi z8.d, z6.s -; CHECK-NEXT: uunpkhi z29.d, z5.s -; CHECK-NEXT: uunpkhi z11.d, z28.s -; CHECK-NEXT: uunpklo z10.d, z28.s +; CHECK-NEXT: uunpklo z27.d, z0.s +; CHECK-NEXT: uunpklo z31.d, z5.s +; CHECK-NEXT: uunpkhi z8.d, z5.s +; CHECK-NEXT: uunpkhi z30.d, z0.s +; CHECK-NEXT: uunpkhi z11.d, z29.s +; CHECK-NEXT: uunpklo z10.d, z29.s ; CHECK-NEXT: uunpklo z15.s, z3.h ; CHECK-NEXT: uunpklo z16.d, z14.s ; CHECK-NEXT: uunpkhi z14.d, z14.s ; CHECK-NEXT: mov z24.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z1, z7 -; CHECK-NEXT: frintx z1.h, p0/m, z7.h ; CHECK-NEXT: movprfx z5, z27 ; CHECK-NEXT: frintx z5.h, p0/m, z27.h +; CHECK-NEXT: movprfx z1, z6 +; CHECK-NEXT: frintx z1.h, p0/m, z6.h ; CHECK-NEXT: frintx z4.h, p0/m, z4.h ; CHECK-NEXT: movprfx z12, z31 ; CHECK-NEXT: frintx z12.h, p0/m, z31.h ; CHECK-NEXT: movprfx z27, z8 ; CHECK-NEXT: frintx z27.h, p0/m, z8.h -; CHECK-NEXT: movprfx z6, z29 -; CHECK-NEXT: frintx z6.h, p0/m, z29.h +; CHECK-NEXT: movprfx z6, z30 +; CHECK-NEXT: frintx z6.h, p0/m, z30.h ; CHECK-NEXT: movprfx z31, z10 ; CHECK-NEXT: frintx z31.h, p0/m, z10.h -; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: frintx z11.h, p0/m, z11.h ; CHECK-NEXT: movprfx z3, z16 ; CHECK-NEXT: frintx z3.h, p0/m, z16.h -; CHECK-NEXT: frintx z11.h, p0/m, z11.h -; CHECK-NEXT: mov z29.h, w9 +; CHECK-NEXT: mov z30.h, w9 ; CHECK-NEXT: uunpklo z10.d, z13.s ; CHECK-NEXT: uunpkhi z13.d, z13.s ; CHECK-NEXT: uunpkhi z20.d, z15.s @@ -354,124 +348,124 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK-NEXT: uunpklo z15.d, z15.s ; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z21.d, #0x8000000000000000 +; CHECK-NEXT: frintx z10.h, p0/m, z10.h ; CHECK-NEXT: mov z26.d, #0x8000000000000000 -; CHECK-NEXT: mov z28.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z29.d, #0x7fffffffffffffff ; CHECK-NEXT: movprfx z19, z13 ; CHECK-NEXT: frintx z19.h, p0/m, z13.h ; CHECK-NEXT: movprfx z13, z14 ; CHECK-NEXT: frintx z13.h, p0/m, z14.h -; CHECK-NEXT: frintx z10.h, p0/m, z10.h ; CHECK-NEXT: frintx z16.h, p0/m, z16.h ; CHECK-NEXT: mov z22.d, #0x8000000000000000 ; CHECK-NEXT: mov z23.d, #0x8000000000000000 -; CHECK-NEXT: frintx z15.h, p0/m, z15.h ; CHECK-NEXT: mov z14.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z30.h -; CHECK-NEXT: fcmge p2.h, p0/z, z12.h, z30.h -; CHECK-NEXT: fcmgt p9.h, p0/z, z12.h, z29.h +; CHECK-NEXT: frintx z15.h, p0/m, z15.h +; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z28.h +; CHECK-NEXT: fcmge p2.h, p0/z, z12.h, z28.h +; CHECK-NEXT: fcmgt p9.h, p0/z, z12.h, z30.h ; CHECK-NEXT: fcmuo p8.h, p0/z, z12.h, z12.h ; CHECK-NEXT: fcvtzs z7.d, p4/m, z4.h ; CHECK-NEXT: fcvtzs z8.d, p2/m, z12.h ; CHECK-NEXT: mov z12.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z28.h ; CHECK-NEXT: fcmuo p10.h, p0/z, z11.h, z11.h -; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z30.h -; CHECK-NEXT: mov z8.d, p9/m, z28.d +; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z28.h +; CHECK-NEXT: mov z8.d, p9/m, z29.d ; CHECK-NEXT: fcvtzs z9.d, p4/m, z27.h -; CHECK-NEXT: fcmge p4.h, p0/z, z11.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z11.h, z28.h ; CHECK-NEXT: fcvtzs z24.d, p3/m, z5.h ; CHECK-NEXT: mov z8.d, p8/m, #0 // =0x0 -; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z30.h -; CHECK-NEXT: fcmge p5.h, p0/z, z1.h, z30.h +; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z28.h +; CHECK-NEXT: fcmge p5.h, p0/z, z1.h, z28.h ; CHECK-NEXT: str z8, [x8, #4, mul vl] ; CHECK-NEXT: fcvtzs z12.d, p4/m, z11.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z29.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z30.h ; CHECK-NEXT: uunpkhi z11.d, z17.s ; CHECK-NEXT: movprfx z17, z20 ; CHECK-NEXT: frintx z17.h, p0/m, z20.h ; CHECK-NEXT: fcvtzs z25.d, p1/m, z6.h ; CHECK-NEXT: mov z20.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z0.d, p5/m, z1.h -; CHECK-NEXT: fcmge p6.h, p0/z, z10.h, z30.h +; CHECK-NEXT: fcmge p6.h, p0/z, z10.h, z28.h ; CHECK-NEXT: frintx z11.h, p0/m, z11.h -; CHECK-NEXT: fcmge p3.h, p0/z, z31.h, z30.h -; CHECK-NEXT: fcmge p1.h, p0/z, z13.h, z30.h +; CHECK-NEXT: fcmge p3.h, p0/z, z31.h, z28.h +; CHECK-NEXT: fcmge p1.h, p0/z, z13.h, z28.h ; CHECK-NEXT: fcvtzs z18.d, p6/m, z10.h -; CHECK-NEXT: fcmgt p11.h, p0/z, z10.h, z29.h -; CHECK-NEXT: fcmge p5.h, p0/z, z11.h, z30.h +; CHECK-NEXT: fcmgt p11.h, p0/z, z10.h, z30.h +; CHECK-NEXT: fcmge p5.h, p0/z, z11.h, z28.h ; CHECK-NEXT: fcvtzs z2.d, p3/m, z31.h ; CHECK-NEXT: fcvtzs z21.d, p1/m, z13.h -; CHECK-NEXT: fcmge p2.h, p0/z, z17.h, z30.h -; CHECK-NEXT: fcmge p3.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmge p2.h, p0/z, z17.h, z28.h +; CHECK-NEXT: fcmge p3.h, p0/z, z16.h, z28.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z10.h, z10.h -; CHECK-NEXT: sel z10.d, p4, z28.d, z12.d -; CHECK-NEXT: sel z12.d, p11, z28.d, z18.d +; CHECK-NEXT: sel z10.d, p4, z29.d, z12.d +; CHECK-NEXT: sel z12.d, p11, z29.d, z18.d ; CHECK-NEXT: fcvtzs z26.d, p5/m, z11.h ; CHECK-NEXT: fcvtzs z22.d, p2/m, z17.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z29.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z30.h ; CHECK-NEXT: fcvtzs z23.d, p3/m, z16.h ; CHECK-NEXT: mov z10.d, p10/m, #0 // =0x0 ; CHECK-NEXT: mov z12.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmge p6.h, p0/z, z19.h, z30.h +; CHECK-NEXT: fcmge p6.h, p0/z, z19.h, z28.h ; CHECK-NEXT: str z10, [x8, #7, mul vl] -; CHECK-NEXT: fcmge p7.h, p0/z, z3.h, z30.h +; CHECK-NEXT: fcmge p7.h, p0/z, z3.h, z28.h ; CHECK-NEXT: str z12, [x8, #8, mul vl] -; CHECK-NEXT: mov z26.d, p4/m, z28.d -; CHECK-NEXT: fcmge p2.h, p0/z, z15.h, z30.h -; CHECK-NEXT: mov z30.d, #0x8000000000000000 +; CHECK-NEXT: mov z26.d, p4/m, z29.d +; CHECK-NEXT: fcmge p2.h, p0/z, z15.h, z28.h +; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z14.d, p6/m, z19.h -; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z29.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z29.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z30.h ; CHECK-NEXT: fcvtzs z20.d, p7/m, z3.h -; CHECK-NEXT: fcvtzs z30.d, p2/m, z15.h +; CHECK-NEXT: fcvtzs z28.d, p2/m, z15.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z11.h, z11.h ; CHECK-NEXT: fcmuo p2.h, p0/z, z16.h, z16.h -; CHECK-NEXT: sel z11.d, p5, z28.d, z23.d -; CHECK-NEXT: sel z16.d, p3, z28.d, z22.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z19.h, z29.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z15.h, z29.h +; CHECK-NEXT: sel z11.d, p5, z29.d, z23.d +; CHECK-NEXT: sel z16.d, p3, z29.d, z22.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z19.h, z30.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z15.h, z30.h ; CHECK-NEXT: mov z26.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z11.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.h, p0/z, z13.h, z29.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z13.h, z30.h ; CHECK-NEXT: fcmuo p6.h, p0/z, z17.h, z17.h ; CHECK-NEXT: str z26, [x8, #15, mul vl] -; CHECK-NEXT: sel z26.d, p4, z28.d, z14.d +; CHECK-NEXT: sel z26.d, p4, z29.d, z14.d ; CHECK-NEXT: str z11, [x8, #14, mul vl] -; CHECK-NEXT: mov z30.d, p3/m, z28.d -; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z29.h +; CHECK-NEXT: mov z28.d, p3/m, z29.d +; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z30.h ; CHECK-NEXT: fcmuo p4.h, p0/z, z13.h, z13.h ; CHECK-NEXT: fcmuo p3.h, p0/z, z3.h, z3.h -; CHECK-NEXT: sel z3.d, p1, z28.d, z21.d +; CHECK-NEXT: sel z3.d, p1, z29.d, z21.d ; CHECK-NEXT: mov z16.d, p6/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p12.h, p0/z, z27.h, z29.h -; CHECK-NEXT: sel z11.d, p2, z28.d, z20.d +; CHECK-NEXT: fcmgt p12.h, p0/z, z27.h, z30.h +; CHECK-NEXT: sel z11.d, p2, z29.d, z20.d ; CHECK-NEXT: str z16, [x8, #13, mul vl] ; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p6.h, p0/z, z15.h, z15.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z29.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z30.h ; CHECK-NEXT: mov z11.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z9.d, p12/m, z28.d +; CHECK-NEXT: mov z9.d, p12/m, z29.d ; CHECK-NEXT: str z3, [x8, #11, mul vl] ; CHECK-NEXT: fcmuo p5.h, p0/z, z19.h, z19.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z5.h, z29.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z5.h, z30.h ; CHECK-NEXT: str z11, [x8, #10, mul vl] -; CHECK-NEXT: mov z30.d, p6/m, #0 // =0x0 -; CHECK-NEXT: sel z3.d, p1, z28.d, z7.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z6.h, z29.h +; CHECK-NEXT: mov z28.d, p6/m, #0 // =0x0 +; CHECK-NEXT: sel z3.d, p1, z29.d, z7.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z6.h, z30.h ; CHECK-NEXT: fcmuo p3.h, p0/z, z27.h, z27.h -; CHECK-NEXT: str z30, [x8, #12, mul vl] +; CHECK-NEXT: str z28, [x8, #12, mul vl] ; CHECK-NEXT: mov z26.d, p5/m, #0 // =0x0 -; CHECK-NEXT: sel z7.d, p2, z28.d, z24.d -; CHECK-NEXT: fcmgt p6.h, p0/z, z31.h, z29.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z29.h +; CHECK-NEXT: sel z7.d, p2, z29.d, z24.d +; CHECK-NEXT: fcmgt p6.h, p0/z, z31.h, z30.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z30.h ; CHECK-NEXT: str z26, [x8, #9, mul vl] -; CHECK-NEXT: sel z24.d, p4, z28.d, z25.d +; CHECK-NEXT: sel z24.d, p4, z29.d, z25.d ; CHECK-NEXT: mov z9.d, p3/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p5.h, p0/z, z31.h, z31.h ; CHECK-NEXT: fcmuo p2.h, p0/z, z6.h, z6.h -; CHECK-NEXT: mov z2.d, p6/m, z28.d +; CHECK-NEXT: mov z2.d, p6/m, z29.d ; CHECK-NEXT: str z9, [x8, #5, mul vl] -; CHECK-NEXT: mov z0.d, p1/m, z28.d +; CHECK-NEXT: mov z0.d, p1/m, z29.d ; CHECK-NEXT: fcmuo p3.h, p0/z, z5.h, z5.h ; CHECK-NEXT: fcmuo p4.h, p0/z, z4.h, z4.h ; CHECK-NEXT: mov z2.d, p5/m, #0 // =0x0 diff --git a/llvm/test/CodeGen/AArch64/sve-lrint.ll b/llvm/test/CodeGen/AArch64/sve-lrint.ll index f517e7fe8dc16..f1224d30d53cc 100644 --- a/llvm/test/CodeGen/AArch64/sve-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-lrint.ll @@ -6,9 +6,8 @@ define @lrint_v1f16( %x) { ; CHECK-LABEL: lrint_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h @@ -29,9 +28,8 @@ define @lrint_v2f16( %x) { ; CHECK-LABEL: lrint_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h @@ -53,10 +51,9 @@ define @lrint_v4f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.d, #0x7fffffffffffffff @@ -93,10 +90,9 @@ define @lrint_v8f16( %x) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z4.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z4.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z5.d, #0x8000000000000000 ; CHECK-NEXT: mov z6.d, #0x8000000000000000 ; CHECK-NEXT: mov z7.d, #0x8000000000000000 @@ -163,12 +159,13 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: uunpkhi z3.s, z0.h -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: uunpklo z7.s, z1.h ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: uunpkhi z1.s, z1.h -; CHECK-NEXT: mov z0.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.d, #0x8000000000000000 +; CHECK-NEXT: mov z29.h, w8 ; CHECK-NEXT: mov z31.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z4.d, z2.s ; CHECK-NEXT: uunpklo z24.d, z3.s @@ -176,10 +173,8 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: uunpkhi z6.d, z2.s ; CHECK-NEXT: uunpklo z26.d, z7.s ; CHECK-NEXT: uunpkhi z7.d, z7.s -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z30.d, z1.s -; CHECK-NEXT: mov z29.h, w8 ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: movprfx z27, z4 @@ -192,17 +187,17 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: frintx z26.h, p0/m, z26.h ; CHECK-NEXT: frintx z7.h, p0/m, z7.h ; CHECK-NEXT: mov z6.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p1.h, p0/z, z27.h, z2.h -; CHECK-NEXT: fcmge p3.h, p0/z, z24.h, z2.h -; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z2.h -; CHECK-NEXT: fcmge p2.h, p0/z, z28.h, z2.h -; CHECK-NEXT: fcmge p5.h, p0/z, z26.h, z2.h -; CHECK-NEXT: fcvtzs z0.d, p1/m, z27.h +; CHECK-NEXT: fcmge p1.h, p0/z, z27.h, z0.h +; CHECK-NEXT: fcmge p3.h, p0/z, z24.h, z0.h +; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z0.h +; CHECK-NEXT: fcmge p2.h, p0/z, z28.h, z0.h +; CHECK-NEXT: fcmge p5.h, p0/z, z26.h, z0.h +; CHECK-NEXT: fcvtzs z2.d, p1/m, z27.h ; CHECK-NEXT: fcvtzs z4.d, p3/m, z24.h ; CHECK-NEXT: fcvtzs z5.d, p4/m, z25.h ; CHECK-NEXT: fcmgt p3.h, p0/z, z27.h, z29.h ; CHECK-NEXT: fcvtzs z3.d, p2/m, z28.h -; CHECK-NEXT: fcmge p4.h, p0/z, z7.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z7.h, z0.h ; CHECK-NEXT: fcvtzs z6.d, p5/m, z26.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z27.h, z27.h ; CHECK-NEXT: movprfx z27, z30 @@ -213,7 +208,7 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: fcmuo p2.h, p0/z, z28.h, z28.h ; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z31.d, p4/m, z7.h -; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z0.h ; CHECK-NEXT: fcmgt p6.h, p0/z, z24.h, z29.h ; CHECK-NEXT: fcmuo p7.h, p0/z, z24.h, z24.h ; CHECK-NEXT: mov z24.d, #0x7fffffffffffffff @@ -222,31 +217,31 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: fcmuo p10.h, p0/z, z25.h, z25.h ; CHECK-NEXT: mov z25.d, #0x8000000000000000 ; CHECK-NEXT: sel z1.d, p5, z24.d, z3.d -; CHECK-NEXT: mov z0.d, p3/m, z24.d ; CHECK-NEXT: sel z3.d, p8, z24.d, z5.d -; CHECK-NEXT: fcmge p4.h, p0/z, z30.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z30.h, z0.h +; CHECK-NEXT: sel z0.d, p3, z24.d, z2.d ; CHECK-NEXT: sel z2.d, p6, z24.d, z4.d -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p10/m, #0 // =0x0 ; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload +; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: mov z2.d, p7/m, #0 // =0x0 ; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload -; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: fcvtzs z25.d, p4/m, z30.h +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmgt p5.h, p0/z, z7.h, z29.h ; CHECK-NEXT: fcmgt p6.h, p0/z, z27.h, z29.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z30.h, z29.h ; CHECK-NEXT: sel z4.d, p9, z24.d, z6.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z30.h, z29.h ; CHECK-NEXT: fcmuo p8.h, p0/z, z7.h, z7.h ; CHECK-NEXT: sel z5.d, p5, z24.d, z31.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: sel z6.d, p6, z24.d, z28.d ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p9.h, p0/z, z27.h, z27.h +; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: sel z7.d, p4, z24.d, z25.d ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload -; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.h, p0/z, z30.h, z30.h @@ -303,48 +298,47 @@ define @lrint_v32f16( %x) { ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16 ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16 ; CHECK-NEXT: uunpklo z4.s, z0.h -; CHECK-NEXT: uunpkhi z5.s, z0.h -; CHECK-NEXT: mov w9, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z6.s, z1.h -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpkhi z28.s, z1.h -; CHECK-NEXT: mov z30.h, w9 +; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: mov w9, #31743 // =0x7bff +; CHECK-NEXT: uunpklo z5.s, z1.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z28.h, #-1025 // =0xfffffffffffffbff +; CHECK-NEXT: uunpkhi z29.s, z1.h +; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z13.s, z2.h ; CHECK-NEXT: mov z9.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z14.s, z2.h ; CHECK-NEXT: uunpkhi z17.s, z3.h -; CHECK-NEXT: uunpklo z7.d, z4.s +; CHECK-NEXT: uunpklo z6.d, z4.s ; CHECK-NEXT: uunpkhi z4.d, z4.s -; CHECK-NEXT: uunpklo z27.d, z5.s -; CHECK-NEXT: uunpklo z31.d, z6.s -; CHECK-NEXT: uunpkhi z8.d, z6.s -; CHECK-NEXT: uunpkhi z29.d, z5.s -; CHECK-NEXT: uunpkhi z11.d, z28.s -; CHECK-NEXT: uunpklo z10.d, z28.s +; CHECK-NEXT: uunpklo z27.d, z0.s +; CHECK-NEXT: uunpklo z31.d, z5.s +; CHECK-NEXT: uunpkhi z8.d, z5.s +; CHECK-NEXT: uunpkhi z30.d, z0.s +; CHECK-NEXT: uunpkhi z11.d, z29.s +; CHECK-NEXT: uunpklo z10.d, z29.s ; CHECK-NEXT: uunpklo z15.s, z3.h ; CHECK-NEXT: uunpklo z16.d, z14.s ; CHECK-NEXT: uunpkhi z14.d, z14.s ; CHECK-NEXT: mov z24.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z1, z7 -; CHECK-NEXT: frintx z1.h, p0/m, z7.h ; CHECK-NEXT: movprfx z5, z27 ; CHECK-NEXT: frintx z5.h, p0/m, z27.h +; CHECK-NEXT: movprfx z1, z6 +; CHECK-NEXT: frintx z1.h, p0/m, z6.h ; CHECK-NEXT: frintx z4.h, p0/m, z4.h ; CHECK-NEXT: movprfx z12, z31 ; CHECK-NEXT: frintx z12.h, p0/m, z31.h ; CHECK-NEXT: movprfx z27, z8 ; CHECK-NEXT: frintx z27.h, p0/m, z8.h -; CHECK-NEXT: movprfx z6, z29 -; CHECK-NEXT: frintx z6.h, p0/m, z29.h +; CHECK-NEXT: movprfx z6, z30 +; CHECK-NEXT: frintx z6.h, p0/m, z30.h ; CHECK-NEXT: movprfx z31, z10 ; CHECK-NEXT: frintx z31.h, p0/m, z10.h -; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: frintx z11.h, p0/m, z11.h ; CHECK-NEXT: movprfx z3, z16 ; CHECK-NEXT: frintx z3.h, p0/m, z16.h -; CHECK-NEXT: frintx z11.h, p0/m, z11.h -; CHECK-NEXT: mov z29.h, w9 +; CHECK-NEXT: mov z30.h, w9 ; CHECK-NEXT: uunpklo z10.d, z13.s ; CHECK-NEXT: uunpkhi z13.d, z13.s ; CHECK-NEXT: uunpkhi z20.d, z15.s @@ -355,124 +349,124 @@ define @lrint_v32f16( %x) { ; CHECK-NEXT: uunpklo z15.d, z15.s ; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z21.d, #0x8000000000000000 +; CHECK-NEXT: frintx z10.h, p0/m, z10.h ; CHECK-NEXT: mov z26.d, #0x8000000000000000 -; CHECK-NEXT: mov z28.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z29.d, #0x7fffffffffffffff ; CHECK-NEXT: movprfx z19, z13 ; CHECK-NEXT: frintx z19.h, p0/m, z13.h ; CHECK-NEXT: movprfx z13, z14 ; CHECK-NEXT: frintx z13.h, p0/m, z14.h -; CHECK-NEXT: frintx z10.h, p0/m, z10.h ; CHECK-NEXT: frintx z16.h, p0/m, z16.h ; CHECK-NEXT: mov z22.d, #0x8000000000000000 ; CHECK-NEXT: mov z23.d, #0x8000000000000000 -; CHECK-NEXT: frintx z15.h, p0/m, z15.h ; CHECK-NEXT: mov z14.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z30.h -; CHECK-NEXT: fcmge p2.h, p0/z, z12.h, z30.h -; CHECK-NEXT: fcmgt p9.h, p0/z, z12.h, z29.h +; CHECK-NEXT: frintx z15.h, p0/m, z15.h +; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z28.h +; CHECK-NEXT: fcmge p2.h, p0/z, z12.h, z28.h +; CHECK-NEXT: fcmgt p9.h, p0/z, z12.h, z30.h ; CHECK-NEXT: fcmuo p8.h, p0/z, z12.h, z12.h ; CHECK-NEXT: fcvtzs z7.d, p4/m, z4.h ; CHECK-NEXT: fcvtzs z8.d, p2/m, z12.h ; CHECK-NEXT: mov z12.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z28.h ; CHECK-NEXT: fcmuo p10.h, p0/z, z11.h, z11.h -; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z30.h -; CHECK-NEXT: mov z8.d, p9/m, z28.d +; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z28.h +; CHECK-NEXT: mov z8.d, p9/m, z29.d ; CHECK-NEXT: fcvtzs z9.d, p4/m, z27.h -; CHECK-NEXT: fcmge p4.h, p0/z, z11.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z11.h, z28.h ; CHECK-NEXT: fcvtzs z24.d, p3/m, z5.h ; CHECK-NEXT: mov z8.d, p8/m, #0 // =0x0 -; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z30.h -; CHECK-NEXT: fcmge p5.h, p0/z, z1.h, z30.h +; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z28.h +; CHECK-NEXT: fcmge p5.h, p0/z, z1.h, z28.h ; CHECK-NEXT: str z8, [x8, #4, mul vl] ; CHECK-NEXT: fcvtzs z12.d, p4/m, z11.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z29.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z30.h ; CHECK-NEXT: uunpkhi z11.d, z17.s ; CHECK-NEXT: movprfx z17, z20 ; CHECK-NEXT: frintx z17.h, p0/m, z20.h ; CHECK-NEXT: fcvtzs z25.d, p1/m, z6.h ; CHECK-NEXT: mov z20.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z0.d, p5/m, z1.h -; CHECK-NEXT: fcmge p6.h, p0/z, z10.h, z30.h +; CHECK-NEXT: fcmge p6.h, p0/z, z10.h, z28.h ; CHECK-NEXT: frintx z11.h, p0/m, z11.h -; CHECK-NEXT: fcmge p3.h, p0/z, z31.h, z30.h -; CHECK-NEXT: fcmge p1.h, p0/z, z13.h, z30.h +; CHECK-NEXT: fcmge p3.h, p0/z, z31.h, z28.h +; CHECK-NEXT: fcmge p1.h, p0/z, z13.h, z28.h ; CHECK-NEXT: fcvtzs z18.d, p6/m, z10.h -; CHECK-NEXT: fcmgt p11.h, p0/z, z10.h, z29.h -; CHECK-NEXT: fcmge p5.h, p0/z, z11.h, z30.h +; CHECK-NEXT: fcmgt p11.h, p0/z, z10.h, z30.h +; CHECK-NEXT: fcmge p5.h, p0/z, z11.h, z28.h ; CHECK-NEXT: fcvtzs z2.d, p3/m, z31.h ; CHECK-NEXT: fcvtzs z21.d, p1/m, z13.h -; CHECK-NEXT: fcmge p2.h, p0/z, z17.h, z30.h -; CHECK-NEXT: fcmge p3.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmge p2.h, p0/z, z17.h, z28.h +; CHECK-NEXT: fcmge p3.h, p0/z, z16.h, z28.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z10.h, z10.h -; CHECK-NEXT: sel z10.d, p4, z28.d, z12.d -; CHECK-NEXT: sel z12.d, p11, z28.d, z18.d +; CHECK-NEXT: sel z10.d, p4, z29.d, z12.d +; CHECK-NEXT: sel z12.d, p11, z29.d, z18.d ; CHECK-NEXT: fcvtzs z26.d, p5/m, z11.h ; CHECK-NEXT: fcvtzs z22.d, p2/m, z17.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z29.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z30.h ; CHECK-NEXT: fcvtzs z23.d, p3/m, z16.h ; CHECK-NEXT: mov z10.d, p10/m, #0 // =0x0 ; CHECK-NEXT: mov z12.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmge p6.h, p0/z, z19.h, z30.h +; CHECK-NEXT: fcmge p6.h, p0/z, z19.h, z28.h ; CHECK-NEXT: str z10, [x8, #7, mul vl] -; CHECK-NEXT: fcmge p7.h, p0/z, z3.h, z30.h +; CHECK-NEXT: fcmge p7.h, p0/z, z3.h, z28.h ; CHECK-NEXT: str z12, [x8, #8, mul vl] -; CHECK-NEXT: mov z26.d, p4/m, z28.d -; CHECK-NEXT: fcmge p2.h, p0/z, z15.h, z30.h -; CHECK-NEXT: mov z30.d, #0x8000000000000000 +; CHECK-NEXT: mov z26.d, p4/m, z29.d +; CHECK-NEXT: fcmge p2.h, p0/z, z15.h, z28.h +; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z14.d, p6/m, z19.h -; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z29.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z29.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z30.h ; CHECK-NEXT: fcvtzs z20.d, p7/m, z3.h -; CHECK-NEXT: fcvtzs z30.d, p2/m, z15.h +; CHECK-NEXT: fcvtzs z28.d, p2/m, z15.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z11.h, z11.h ; CHECK-NEXT: fcmuo p2.h, p0/z, z16.h, z16.h -; CHECK-NEXT: sel z11.d, p5, z28.d, z23.d -; CHECK-NEXT: sel z16.d, p3, z28.d, z22.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z19.h, z29.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z15.h, z29.h +; CHECK-NEXT: sel z11.d, p5, z29.d, z23.d +; CHECK-NEXT: sel z16.d, p3, z29.d, z22.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z19.h, z30.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z15.h, z30.h ; CHECK-NEXT: mov z26.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z11.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.h, p0/z, z13.h, z29.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z13.h, z30.h ; CHECK-NEXT: fcmuo p6.h, p0/z, z17.h, z17.h ; CHECK-NEXT: str z26, [x8, #15, mul vl] -; CHECK-NEXT: sel z26.d, p4, z28.d, z14.d +; CHECK-NEXT: sel z26.d, p4, z29.d, z14.d ; CHECK-NEXT: str z11, [x8, #14, mul vl] -; CHECK-NEXT: mov z30.d, p3/m, z28.d -; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z29.h +; CHECK-NEXT: mov z28.d, p3/m, z29.d +; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z30.h ; CHECK-NEXT: fcmuo p4.h, p0/z, z13.h, z13.h ; CHECK-NEXT: fcmuo p3.h, p0/z, z3.h, z3.h -; CHECK-NEXT: sel z3.d, p1, z28.d, z21.d +; CHECK-NEXT: sel z3.d, p1, z29.d, z21.d ; CHECK-NEXT: mov z16.d, p6/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p12.h, p0/z, z27.h, z29.h -; CHECK-NEXT: sel z11.d, p2, z28.d, z20.d +; CHECK-NEXT: fcmgt p12.h, p0/z, z27.h, z30.h +; CHECK-NEXT: sel z11.d, p2, z29.d, z20.d ; CHECK-NEXT: str z16, [x8, #13, mul vl] ; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p6.h, p0/z, z15.h, z15.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z29.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z30.h ; CHECK-NEXT: mov z11.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z9.d, p12/m, z28.d +; CHECK-NEXT: mov z9.d, p12/m, z29.d ; CHECK-NEXT: str z3, [x8, #11, mul vl] ; CHECK-NEXT: fcmuo p5.h, p0/z, z19.h, z19.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z5.h, z29.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z5.h, z30.h ; CHECK-NEXT: str z11, [x8, #10, mul vl] -; CHECK-NEXT: mov z30.d, p6/m, #0 // =0x0 -; CHECK-NEXT: sel z3.d, p1, z28.d, z7.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z6.h, z29.h +; CHECK-NEXT: mov z28.d, p6/m, #0 // =0x0 +; CHECK-NEXT: sel z3.d, p1, z29.d, z7.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z6.h, z30.h ; CHECK-NEXT: fcmuo p3.h, p0/z, z27.h, z27.h -; CHECK-NEXT: str z30, [x8, #12, mul vl] +; CHECK-NEXT: str z28, [x8, #12, mul vl] ; CHECK-NEXT: mov z26.d, p5/m, #0 // =0x0 -; CHECK-NEXT: sel z7.d, p2, z28.d, z24.d -; CHECK-NEXT: fcmgt p6.h, p0/z, z31.h, z29.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z29.h +; CHECK-NEXT: sel z7.d, p2, z29.d, z24.d +; CHECK-NEXT: fcmgt p6.h, p0/z, z31.h, z30.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z30.h ; CHECK-NEXT: str z26, [x8, #9, mul vl] -; CHECK-NEXT: sel z24.d, p4, z28.d, z25.d +; CHECK-NEXT: sel z24.d, p4, z29.d, z25.d ; CHECK-NEXT: mov z9.d, p3/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p5.h, p0/z, z31.h, z31.h ; CHECK-NEXT: fcmuo p2.h, p0/z, z6.h, z6.h -; CHECK-NEXT: mov z2.d, p6/m, z28.d +; CHECK-NEXT: mov z2.d, p6/m, z29.d ; CHECK-NEXT: str z9, [x8, #5, mul vl] -; CHECK-NEXT: mov z0.d, p1/m, z28.d +; CHECK-NEXT: mov z0.d, p1/m, z29.d ; CHECK-NEXT: fcmuo p3.h, p0/z, z5.h, z5.h ; CHECK-NEXT: fcmuo p4.h, p0/z, z4.h, z4.h ; CHECK-NEXT: mov z2.d, p5/m, #0 // =0x0 diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll index 19e13ec484380..1ceaa5ad27734 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -512,8 +512,7 @@ define @splat_nxv2bf16_imm() { define @splat_nzero_nxv2f16() { ; CHECK-LABEL: splat_nzero_nxv2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: dupm z0.h, #0x8000 ; CHECK-NEXT: ret ret splat (half -0.0) } @@ -521,8 +520,7 @@ define @splat_nzero_nxv2f16() { define @splat_nzero_nxv4f16() { ; CHECK-LABEL: splat_nzero_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: dupm z0.h, #0x8000 ; CHECK-NEXT: ret ret splat (half -0.0) } @@ -530,8 +528,7 @@ define @splat_nzero_nxv4f16() { define @splat_nzero_nxv8f16() { ; CHECK-LABEL: splat_nzero_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: dupm z0.h, #0x8000 ; CHECK-NEXT: ret ret splat (half -0.0) } @@ -539,8 +536,7 @@ define @splat_nzero_nxv8f16() { define @splat_nzero_nxv2f32() { ; CHECK-LABEL: splat_nzero_nxv2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z0.s, #0x80000000 ; CHECK-NEXT: ret ret splat (float -0.0) } @@ -548,8 +544,7 @@ define @splat_nzero_nxv2f32() { define @splat_nzero_nxv4f32() { ; CHECK-LABEL: splat_nzero_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z0.s, #0x80000000 ; CHECK-NEXT: ret ret splat (float -0.0) } @@ -557,8 +552,7 @@ define @splat_nzero_nxv4f32() { define @splat_nzero_nxv2f64() { ; CHECK-LABEL: splat_nzero_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: mov z0.d, #0x8000000000000000 ; CHECK-NEXT: ret ret splat (double -0.0) } @@ -566,9 +560,7 @@ define @splat_nzero_nxv2f64() { define @splat_nzero_nxv2bf16() { ; CHECK-LABEL: splat_nzero_nxv2bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: fmov h0, w8 -; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: dupm z0.h, #0x8000 ; CHECK-NEXT: ret ret splat (bfloat -0.0) } @@ -576,9 +568,7 @@ define @splat_nzero_nxv2bf16() { define @splat_nzero_nxv4bf16() { ; CHECK-LABEL: splat_nzero_nxv4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: fmov h0, w8 -; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: dupm z0.h, #0x8000 ; CHECK-NEXT: ret ret splat (bfloat -0.0) } @@ -586,9 +576,7 @@ define @splat_nzero_nxv4bf16() { define @splat_nzero_nxv8bf16() { ; CHECK-LABEL: splat_nzero_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: fmov h0, w8 -; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: dupm z0.h, #0x8000 ; CHECK-NEXT: ret ret splat (bfloat -0.0) } @@ -596,8 +584,7 @@ define @splat_nzero_nxv8bf16() { define @splat_pinf_nxv2f16() { ; CHECK-LABEL: splat_pinf_nxv2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31744 // =0x7c00 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: dupm z0.h, #0x7c00 ; CHECK-NEXT: ret ret splat (half 0x7FF0000000000000) } @@ -605,8 +592,7 @@ define @splat_pinf_nxv2f16() { define @splat_pinf_nxv4f16() { ; CHECK-LABEL: splat_pinf_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31744 // =0x7c00 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: dupm z0.h, #0x7c00 ; CHECK-NEXT: ret ret splat (half 0x7FF0000000000000) } @@ -614,8 +600,7 @@ define @splat_pinf_nxv4f16() { define @splat_pinf_nxv8f16() { ; CHECK-LABEL: splat_pinf_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31744 // =0x7c00 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: dupm z0.h, #0x7c00 ; CHECK-NEXT: ret ret splat (half 0x7FF0000000000000) } @@ -623,8 +608,7 @@ define @splat_pinf_nxv8f16() { define @splat_pinf_nxv2f32() { ; CHECK-LABEL: splat_pinf_nxv2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z0.s, #0x7f800000 ; CHECK-NEXT: ret ret splat (float 0x7FF0000000000000) } @@ -632,8 +616,7 @@ define @splat_pinf_nxv2f32() { define @splat_pinf_nxv4f32() { ; CHECK-LABEL: splat_pinf_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z0.s, #0x7f800000 ; CHECK-NEXT: ret ret splat (float 0x7FF0000000000000) } @@ -641,8 +624,7 @@ define @splat_pinf_nxv4f32() { define @splat_pinf_nxv2f64() { ; CHECK-LABEL: splat_pinf_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000 -; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: mov z0.d, #0x7ff0000000000000 ; CHECK-NEXT: ret ret splat (double 0x7FF0000000000000) } @@ -650,9 +632,7 @@ define @splat_pinf_nxv2f64() { define @splat_pinf_nxv2bf16() { ; CHECK-LABEL: splat_pinf_nxv2bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32640 // =0x7f80 -; CHECK-NEXT: fmov h0, w8 -; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: mov z0.h, #32640 // =0x7f80 ; CHECK-NEXT: ret ret splat (bfloat 0x7FF0000000000000) } @@ -660,9 +640,7 @@ define @splat_pinf_nxv2bf16() { define @splat_pinf_nxv4bf16() { ; CHECK-LABEL: splat_pinf_nxv4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32640 // =0x7f80 -; CHECK-NEXT: fmov h0, w8 -; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: mov z0.h, #32640 // =0x7f80 ; CHECK-NEXT: ret ret splat (bfloat 0x7FF0000000000000) } @@ -670,9 +648,7 @@ define @splat_pinf_nxv4bf16() { define @splat_pinf_nxv8bf16() { ; CHECK-LABEL: splat_pinf_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32640 // =0x7f80 -; CHECK-NEXT: fmov h0, w8 -; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: mov z0.h, #32640 // =0x7f80 ; CHECK-NEXT: ret ret splat (bfloat 0x7FF0000000000000) } @@ -680,8 +656,7 @@ define @splat_pinf_nxv8bf16() { define @splat_ninf_nxv2f16() { ; CHECK-LABEL: splat_ninf_nxv2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64512 // =0xfc00 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: dupm z0.h, #0xfc00 ; CHECK-NEXT: ret ret splat (half 0xFFF0000000000000) } @@ -689,8 +664,7 @@ define @splat_ninf_nxv2f16() { define @splat_ninf_nxv4f16() { ; CHECK-LABEL: splat_ninf_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64512 // =0xfc00 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: dupm z0.h, #0xfc00 ; CHECK-NEXT: ret ret splat (half 0xFFF0000000000000) } @@ -698,8 +672,7 @@ define @splat_ninf_nxv4f16() { define @splat_ninf_nxv8f16() { ; CHECK-LABEL: splat_ninf_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64512 // =0xfc00 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: dupm z0.h, #0xfc00 ; CHECK-NEXT: ret ret splat (half 0xFFF0000000000000) } @@ -707,8 +680,7 @@ define @splat_ninf_nxv8f16() { define @splat_ninf_nxv2f32() { ; CHECK-LABEL: splat_ninf_nxv2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-8388608 // =0xff800000 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z0.s, #0xff800000 ; CHECK-NEXT: ret ret splat (float 0xFFF0000000000000) } @@ -716,8 +688,7 @@ define @splat_ninf_nxv2f32() { define @splat_ninf_nxv4f32() { ; CHECK-LABEL: splat_ninf_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-8388608 // =0xff800000 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z0.s, #0xff800000 ; CHECK-NEXT: ret ret splat (float 0xFFF0000000000000) } @@ -725,8 +696,7 @@ define @splat_ninf_nxv4f32() { define @splat_ninf_nxv2f64() { ; CHECK-LABEL: splat_ninf_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4503599627370496 // =0xfff0000000000000 -; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: mov z0.d, #0xfff0000000000000 ; CHECK-NEXT: ret ret splat (double 0xFFF0000000000000) } @@ -734,9 +704,7 @@ define @splat_ninf_nxv2f64() { define @splat_ninf_nxv2bf16() { ; CHECK-LABEL: splat_ninf_nxv2bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65408 // =0xff80 -; CHECK-NEXT: fmov h0, w8 -; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: dupm z0.h, #0xff80 ; CHECK-NEXT: ret ret splat (bfloat 0xFFF0000000000000) } @@ -744,9 +712,7 @@ define @splat_ninf_nxv2bf16() { define @splat_ninf_nxv4bf16() { ; CHECK-LABEL: splat_ninf_nxv4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65408 // =0xff80 -; CHECK-NEXT: fmov h0, w8 -; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: dupm z0.h, #0xff80 ; CHECK-NEXT: ret ret splat (bfloat 0xFFF0000000000000) } @@ -754,9 +720,7 @@ define @splat_ninf_nxv4bf16() { define @splat_ninf_nxv8bf16() { ; CHECK-LABEL: splat_ninf_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65408 // =0xff80 -; CHECK-NEXT: fmov h0, w8 -; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: dupm z0.h, #0xff80 ; CHECK-NEXT: ret ret splat (bfloat 0xFFF0000000000000) } @@ -764,8 +728,7 @@ define @splat_ninf_nxv8bf16() { define @splat_nan_nxv2f16() { ; CHECK-LABEL: splat_nan_nxv2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32256 // =0x7e00 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: dupm z0.h, #0x7e00 ; CHECK-NEXT: ret ret splat (half 0x7FF8000000000000) } @@ -773,8 +736,7 @@ define @splat_nan_nxv2f16() { define @splat_nan_nxv4f16() { ; CHECK-LABEL: splat_nan_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32256 // =0x7e00 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: dupm z0.h, #0x7e00 ; CHECK-NEXT: ret ret splat (half 0x7FF8000000000000) } @@ -782,8 +744,7 @@ define @splat_nan_nxv4f16() { define @splat_nan_nxv8f16() { ; CHECK-LABEL: splat_nan_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32256 // =0x7e00 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: dupm z0.h, #0x7e00 ; CHECK-NEXT: ret ret splat (half 0x7FF8000000000000) } @@ -791,8 +752,7 @@ define @splat_nan_nxv8f16() { define @splat_nan_nxv2f32() { ; CHECK-LABEL: splat_nan_nxv2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2143289344 // =0x7fc00000 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z0.s, #0x7fc00000 ; CHECK-NEXT: ret ret splat (float 0x7FF8000000000000) } @@ -800,8 +760,7 @@ define @splat_nan_nxv2f32() { define @splat_nan_nxv4f32() { ; CHECK-LABEL: splat_nan_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2143289344 // =0x7fc00000 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z0.s, #0x7fc00000 ; CHECK-NEXT: ret ret splat (float 0x7FF8000000000000) } @@ -809,8 +768,7 @@ define @splat_nan_nxv4f32() { define @splat_nan_nxv2f64() { ; CHECK-LABEL: splat_nan_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 // =0x7ff8000000000000 -; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: mov z0.d, #0x7ff8000000000000 ; CHECK-NEXT: ret ret splat (double 0x7FF8000000000000) } @@ -818,9 +776,7 @@ define @splat_nan_nxv2f64() { define @splat_nan_nxv2bf16() { ; CHECK-LABEL: splat_nan_nxv2bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32704 // =0x7fc0 -; CHECK-NEXT: fmov h0, w8 -; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: mov z0.h, #32704 // =0x7fc0 ; CHECK-NEXT: ret ret splat (bfloat 0x7FF8000000000000) } @@ -828,9 +784,7 @@ define @splat_nan_nxv2bf16() { define @splat_nan_nxv4bf16() { ; CHECK-LABEL: splat_nan_nxv4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32704 // =0x7fc0 -; CHECK-NEXT: fmov h0, w8 -; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: mov z0.h, #32704 // =0x7fc0 ; CHECK-NEXT: ret ret splat (bfloat 0x7FF8000000000000) } @@ -838,9 +792,7 @@ define @splat_nan_nxv4bf16() { define @splat_nan_nxv8bf16() { ; CHECK-LABEL: splat_nan_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32704 // =0x7fc0 -; CHECK-NEXT: fmov h0, w8 -; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: mov z0.h, #32704 // =0x7fc0 ; CHECK-NEXT: ret ret splat (bfloat 0x7FF8000000000000) } diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll index 6b5b3d6d436cb..b04029c273ae2 100644 --- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll @@ -338,8 +338,7 @@ ret %sel define @sel_merge_nxv8f16_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nxv8f16_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.h, #0x8000 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret %sel = select %p, splat (half -0.0), %in @@ -349,8 +348,7 @@ ret %sel define @sel_merge_nx4f16_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx4f16_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.h, #0x8000 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret %sel = select %p, splat (half -0.0), %in @@ -360,8 +358,7 @@ ret %sel define @sel_merge_nx2f16_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx2f16_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.h, #0x8000 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %sel = select %p, splat (half -0.0), %in @@ -371,8 +368,7 @@ ret %sel define @sel_merge_nx4f32_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx4f32_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov z1.s, #0x80000000 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret %sel = select %p, splat (float -0.0), %in @@ -382,8 +378,7 @@ ret %sel define @sel_merge_nx2f32_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx2f32_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov z1.s, #0x80000000 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %sel = select %p, splat (float -0.0), %in @@ -393,8 +388,7 @@ ret %sel define @sel_merge_nx2f64_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx2f64_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z1.d, #0x8000000000000000 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %sel = select %p, splat (double -0.0), %in