diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index fa55a58d05e76..29d65d5d1db64 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26104,6 +26104,17 @@ static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static bool isSignExtInReg(const SDValue &V) { + if (V.getOpcode() != AArch64ISD::VASHR || + V.getOperand(0).getOpcode() != AArch64ISD::VSHL) + return false; + + unsigned BitWidth = V->getValueType(0).getScalarSizeInBits(); + unsigned ShiftAmtR = V.getConstantOperandVal(1); + unsigned ShiftAmtL = V.getOperand(0).getConstantOperandVal(1); + return (ShiftAmtR == ShiftAmtL && ShiftAmtR == (BitWidth - 1)); +} + static SDValue performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && @@ -26144,6 +26155,27 @@ performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { LHS->getOperand(0), Pred); } + // setcc_merge_zero( + // pred, insert_subvector(undef, signext_inreg(vNi1), 0), != splat(0)) + // => setcc_merge_zero( + // pred, insert_subvector(undef, shl(vNi1), 0), != splat(0)) + if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) && + LHS->getOpcode() == ISD::INSERT_SUBVECTOR && LHS.hasOneUse()) { + SDValue L0 = LHS->getOperand(0); + SDValue L1 = LHS->getOperand(1); + SDValue L2 = LHS->getOperand(2); + + if (L0.getOpcode() == ISD::UNDEF && isNullConstant(L2) && + isSignExtInReg(L1)) { + SDLoc DL(N); + SDValue Shl = L1.getOperand(0); + SDValue NewLHS = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, + LHS.getValueType(), L0, Shl, L2); + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, N->getValueType(0), + Pred, NewLHS, RHS, N->getOperand(3)); + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/combine-storetomstore.ll b/llvm/test/CodeGen/AArch64/combine-storetomstore.ll index c2e54d3d39394..6ab2d7c2d7857 100644 --- a/llvm/test/CodeGen/AArch64/combine-storetomstore.ll +++ b/llvm/test/CodeGen/AArch64/combine-storetomstore.ll @@ -24,7 +24,6 @@ define void @test_masked_store_success_v4i16(<4 x i16> %x, ptr %ptr, <4 x i1> %m ; SVE-NEXT: shl v1.4h, v1.4h, #15 ; SVE-NEXT: ptrue p0.h, vl4 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 -; SVE-NEXT: cmlt v1.4h, v1.4h, #0 ; SVE-NEXT: cmpne p0.h, p0/z, z1.h, #0 ; SVE-NEXT: st1h { z0.h }, p0, [x0] ; SVE-NEXT: ret @@ -41,7 +40,6 @@ define void @test_masked_store_success_v4i32(<4 x i32> %x, ptr %ptr, <4 x i1> %m ; SVE-NEXT: ptrue p0.s, vl4 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: shl v1.4s, v1.4s, #31 -; SVE-NEXT: cmlt v1.4s, v1.4s, #0 ; SVE-NEXT: cmpne p0.s, p0/z, z1.s, #0 ; SVE-NEXT: st1w { z0.s }, p0, [x0] ; SVE-NEXT: ret @@ -63,8 +61,6 @@ define void @test_masked_store_success_v4i64(<4 x i64> %x, ptr %ptr, <4 x i1> %m ; SVE-NEXT: ushll v2.2d, v2.2s, #0 ; SVE-NEXT: shl v3.2d, v3.2d, #63 ; SVE-NEXT: shl v2.2d, v2.2d, #63 -; SVE-NEXT: cmlt v3.2d, v3.2d, #0 -; SVE-NEXT: cmlt v2.2d, v2.2d, #0 ; SVE-NEXT: cmpne p1.d, p0/z, z3.d, #0 ; SVE-NEXT: cmpne p0.d, p0/z, z2.d, #0 ; SVE-NEXT: st1d { z1.d }, p1, [x0, x8, lsl #3] @@ -82,7 +78,6 @@ define void @test_masked_store_success_v4f16(<4 x half> %x, ptr %ptr, <4 x i1> % ; SVE-NEXT: shl v1.4h, v1.4h, #15 ; SVE-NEXT: ptrue p0.h, vl4 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 -; SVE-NEXT: cmlt v1.4h, v1.4h, #0 ; SVE-NEXT: cmpne p0.h, p0/z, z1.h, #0 ; SVE-NEXT: st1h { z0.h }, p0, [x0] ; SVE-NEXT: ret @@ -99,7 +94,6 @@ define void @test_masked_store_success_v4f32(<4 x float> %x, ptr %ptr, <4 x i1> ; SVE-NEXT: ptrue p0.s, vl4 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: shl v1.4s, v1.4s, #31 -; SVE-NEXT: cmlt v1.4s, v1.4s, #0 ; SVE-NEXT: cmpne p0.s, p0/z, z1.s, #0 ; SVE-NEXT: st1w { z0.s }, p0, [x0] ; SVE-NEXT: ret @@ -121,8 +115,6 @@ define void @test_masked_store_success_v4f64(<4 x double> %x, ptr %ptr, <4 x i1> ; SVE-NEXT: ushll v2.2d, v2.2s, #0 ; SVE-NEXT: shl v3.2d, v3.2d, #63 ; SVE-NEXT: shl v2.2d, v2.2d, #63 -; SVE-NEXT: cmlt v3.2d, v3.2d, #0 -; SVE-NEXT: cmlt v2.2d, v2.2d, #0 ; SVE-NEXT: cmpne p1.d, p0/z, z3.d, #0 ; SVE-NEXT: cmpne p0.d, p0/z, z2.d, #0 ; SVE-NEXT: st1d { z1.d }, p1, [x0, x8, lsl #3] @@ -140,7 +132,6 @@ define void @test_masked_store_success_v8i8(<8 x i8> %x, ptr %ptr, <8 x i1> %mas ; SVE-NEXT: shl v1.8b, v1.8b, #7 ; SVE-NEXT: ptrue p0.b, vl8 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 -; SVE-NEXT: cmlt v1.8b, v1.8b, #0 ; SVE-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; SVE-NEXT: st1b { z0.b }, p0, [x0] ; SVE-NEXT: ret @@ -157,7 +148,6 @@ define void @test_masked_store_success_v8i16(<8 x i16> %x, ptr %ptr, <8 x i1> %m ; SVE-NEXT: ptrue p0.h, vl8 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: shl v1.8h, v1.8h, #15 -; SVE-NEXT: cmlt v1.8h, v1.8h, #0 ; SVE-NEXT: cmpne p0.h, p0/z, z1.h, #0 ; SVE-NEXT: st1h { z0.h }, p0, [x0] ; SVE-NEXT: ret @@ -180,8 +170,6 @@ define void @test_masked_store_success_v8i32(<8 x i32> %x, ptr %ptr, <8 x i1> %m ; SVE-NEXT: ushll v2.4s, v2.4h, #0 ; SVE-NEXT: shl v3.4s, v3.4s, #31 ; SVE-NEXT: shl v2.4s, v2.4s, #31 -; SVE-NEXT: cmlt v3.4s, v3.4s, #0 -; SVE-NEXT: cmlt v2.4s, v2.4s, #0 ; SVE-NEXT: cmpne p1.s, p0/z, z3.s, #0 ; SVE-NEXT: cmpne p0.s, p0/z, z2.s, #0 ; SVE-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2] @@ -219,12 +207,8 @@ define void @test_masked_store_success_v8i64(<8 x i64> %x, ptr %ptr, <8 x i1> %m ; SVE-NEXT: shl v4.2d, v4.2d, #63 ; SVE-NEXT: shl v5.2d, v5.2d, #63 ; SVE-NEXT: shl v6.2d, v6.2d, #63 -; SVE-NEXT: shl v7.2d, v7.2d, #63 -; SVE-NEXT: cmlt v4.2d, v4.2d, #0 -; SVE-NEXT: cmlt v5.2d, v5.2d, #0 -; SVE-NEXT: cmlt v6.2d, v6.2d, #0 ; SVE-NEXT: cmpne p1.d, p0/z, z4.d, #0 -; SVE-NEXT: cmlt v4.2d, v7.2d, #0 +; SVE-NEXT: shl v4.2d, v7.2d, #63 ; SVE-NEXT: cmpne p2.d, p0/z, z5.d, #0 ; SVE-NEXT: cmpne p3.d, p0/z, z6.d, #0 ; SVE-NEXT: cmpne p0.d, p0/z, z4.d, #0 @@ -247,7 +231,6 @@ define void @test_masked_store_success_v8f16(<8 x half> %x, ptr %ptr, <8 x i1> % ; SVE-NEXT: ptrue p0.h, vl8 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: shl v1.8h, v1.8h, #15 -; SVE-NEXT: cmlt v1.8h, v1.8h, #0 ; SVE-NEXT: cmpne p0.h, p0/z, z1.h, #0 ; SVE-NEXT: st1h { z0.h }, p0, [x0] ; SVE-NEXT: ret @@ -270,8 +253,6 @@ define void @test_masked_store_success_v8f32(<8 x float> %x, ptr %ptr, <8 x i1> ; SVE-NEXT: ushll v2.4s, v2.4h, #0 ; SVE-NEXT: shl v3.4s, v3.4s, #31 ; SVE-NEXT: shl v2.4s, v2.4s, #31 -; SVE-NEXT: cmlt v3.4s, v3.4s, #0 -; SVE-NEXT: cmlt v2.4s, v2.4s, #0 ; SVE-NEXT: cmpne p1.s, p0/z, z3.s, #0 ; SVE-NEXT: cmpne p0.s, p0/z, z2.s, #0 ; SVE-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2] @@ -309,12 +290,8 @@ define void @test_masked_store_success_v8f64(<8 x double> %x, ptr %ptr, <8 x i1> ; SVE-NEXT: shl v4.2d, v4.2d, #63 ; SVE-NEXT: shl v5.2d, v5.2d, #63 ; SVE-NEXT: shl v6.2d, v6.2d, #63 -; SVE-NEXT: shl v7.2d, v7.2d, #63 -; SVE-NEXT: cmlt v4.2d, v4.2d, #0 -; SVE-NEXT: cmlt v5.2d, v5.2d, #0 -; SVE-NEXT: cmlt v6.2d, v6.2d, #0 ; SVE-NEXT: cmpne p1.d, p0/z, z4.d, #0 -; SVE-NEXT: cmlt v4.2d, v7.2d, #0 +; SVE-NEXT: shl v4.2d, v7.2d, #63 ; SVE-NEXT: cmpne p2.d, p0/z, z5.d, #0 ; SVE-NEXT: cmpne p3.d, p0/z, z6.d, #0 ; SVE-NEXT: cmpne p0.d, p0/z, z4.d, #0 @@ -336,7 +313,6 @@ define void @test_masked_store_success_v16i8(<16 x i8> %x, ptr %ptr, <16 x i1> % ; SVE-NEXT: shl v1.16b, v1.16b, #7 ; SVE-NEXT: ptrue p0.b, vl16 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 -; SVE-NEXT: cmlt v1.16b, v1.16b, #0 ; SVE-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; SVE-NEXT: st1b { z0.b }, p0, [x0] ; SVE-NEXT: ret @@ -357,8 +333,6 @@ define void @test_masked_store_success_v16i16(<16 x i16> %x, ptr %ptr, <16 x i1> ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: shl v3.8h, v3.8h, #15 ; SVE-NEXT: shl v2.8h, v2.8h, #15 -; SVE-NEXT: cmlt v3.8h, v3.8h, #0 -; SVE-NEXT: cmlt v2.8h, v2.8h, #0 ; SVE-NEXT: cmpne p1.h, p0/z, z3.h, #0 ; SVE-NEXT: cmpne p0.h, p0/z, z2.h, #0 ; SVE-NEXT: st1h { z1.h }, p1, [x0, x8, lsl #1] @@ -391,13 +365,9 @@ define void @test_masked_store_success_v16i32(<16 x i32> %x, ptr %ptr, <16 x i1> ; SVE-NEXT: ushll v7.4s, v7.4h, #0 ; SVE-NEXT: ushll v5.4s, v5.4h, #0 ; SVE-NEXT: shl v4.4s, v4.4s, #31 -; SVE-NEXT: cmlt v6.4s, v6.4s, #0 +; SVE-NEXT: cmpne p1.s, p0/z, z6.s, #0 ; SVE-NEXT: shl v7.4s, v7.4s, #31 ; SVE-NEXT: shl v5.4s, v5.4s, #31 -; SVE-NEXT: cmlt v4.4s, v4.4s, #0 -; SVE-NEXT: cmpne p1.s, p0/z, z6.s, #0 -; SVE-NEXT: cmlt v7.4s, v7.4s, #0 -; SVE-NEXT: cmlt v5.4s, v5.4s, #0 ; SVE-NEXT: cmpne p2.s, p0/z, z7.s, #0 ; SVE-NEXT: cmpne p3.s, p0/z, z5.s, #0 ; SVE-NEXT: cmpne p0.s, p0/z, z4.s, #0 @@ -479,8 +449,6 @@ define void @test_masked_store_success_v32i8(<32 x i8> %x, ptr %ptr, <32 x i1> % ; SVE-NEXT: mov w8, #16 // =0x10 ; SVE-NEXT: shl v2.16b, v2.16b, #7 ; SVE-NEXT: shl v3.16b, v3.16b, #7 -; SVE-NEXT: cmlt v2.16b, v2.16b, #0 -; SVE-NEXT: cmlt v3.16b, v3.16b, #0 ; SVE-NEXT: cmpne p1.b, p0/z, z3.b, #0 ; SVE-NEXT: cmpne p0.b, p0/z, z2.b, #0 ; SVE-NEXT: st1b { z1.b }, p1, [x0, x8] @@ -565,12 +533,8 @@ define void @test_masked_store_success_v32i16(<32 x i16> %x, ptr %ptr, <32 x i1> ; SVE-NEXT: shl v4.8h, v4.8h, #15 ; SVE-NEXT: shl v5.8h, v5.8h, #15 ; SVE-NEXT: shl v6.8h, v6.8h, #15 -; SVE-NEXT: shl v7.8h, v7.8h, #15 -; SVE-NEXT: cmlt v4.8h, v4.8h, #0 -; SVE-NEXT: cmlt v5.8h, v5.8h, #0 -; SVE-NEXT: cmlt v6.8h, v6.8h, #0 ; SVE-NEXT: cmpne p1.h, p0/z, z4.h, #0 -; SVE-NEXT: cmlt v4.8h, v7.8h, #0 +; SVE-NEXT: shl v4.8h, v7.8h, #15 ; SVE-NEXT: cmpne p2.h, p0/z, z5.h, #0 ; SVE-NEXT: cmpne p3.h, p0/z, z6.h, #0 ; SVE-NEXT: cmpne p0.h, p0/z, z4.h, #0 @@ -595,144 +559,140 @@ define void @test_masked_store_success_v64i8(<64 x i8> %x, ptr %ptr, <64 x i1> % ; SVE-NEXT: .cfi_offset w29, -16 ; SVE-NEXT: ldr w8, [sp, #216] ; SVE-NEXT: ldr w9, [sp, #344] -; SVE-NEXT: fmov s7, w1 +; SVE-NEXT: fmov s6, w1 ; SVE-NEXT: ldr w11, [sp, #88] ; SVE-NEXT: ldr w10, [sp, #224] ; SVE-NEXT: ptrue p0.b, vl16 -; SVE-NEXT: fmov s4, w8 -; SVE-NEXT: fmov s5, w9 +; SVE-NEXT: fmov s5, w8 +; SVE-NEXT: fmov s4, w9 ; SVE-NEXT: ldr w8, [sp, #352] -; SVE-NEXT: fmov s6, w11 +; SVE-NEXT: fmov s7, w11 ; SVE-NEXT: ldr w9, [sp, #96] -; SVE-NEXT: mov v7.b[1], w2 +; SVE-NEXT: mov v6.b[1], w2 ; SVE-NEXT: // kill: def $q2 killed $q2 def $z2 ; SVE-NEXT: // kill: def $q3 killed $q3 def $z3 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 -; SVE-NEXT: mov v4.b[1], w10 -; SVE-NEXT: mov v5.b[1], w8 +; SVE-NEXT: mov v5.b[1], w10 +; SVE-NEXT: mov v4.b[1], w8 ; SVE-NEXT: ldr w8, [sp, #232] -; SVE-NEXT: mov v6.b[1], w9 +; SVE-NEXT: mov v7.b[1], w9 ; SVE-NEXT: ldr w9, [sp, #360] ; SVE-NEXT: ldr w10, [sp, #112] -; SVE-NEXT: mov v7.b[2], w3 -; SVE-NEXT: mov v4.b[2], w8 +; SVE-NEXT: mov v6.b[2], w3 +; SVE-NEXT: mov v5.b[2], w8 ; SVE-NEXT: ldr w8, [sp, #104] -; SVE-NEXT: mov v5.b[2], w9 +; SVE-NEXT: mov v4.b[2], w9 ; SVE-NEXT: ldr w9, [sp, #368] -; SVE-NEXT: mov v6.b[2], w8 +; SVE-NEXT: mov v7.b[2], w8 ; SVE-NEXT: ldr w8, [sp, #240] -; SVE-NEXT: mov v7.b[3], w4 -; SVE-NEXT: mov v4.b[3], w8 -; SVE-NEXT: mov v5.b[3], w9 +; SVE-NEXT: mov v6.b[3], w4 +; SVE-NEXT: mov v5.b[3], w8 +; SVE-NEXT: mov v4.b[3], w9 ; SVE-NEXT: ldr w8, [sp, #248] ; SVE-NEXT: ldr w9, [sp, #376] -; SVE-NEXT: mov v6.b[3], w10 +; SVE-NEXT: mov v7.b[3], w10 ; SVE-NEXT: ldr w10, [sp, #120] -; SVE-NEXT: mov v7.b[4], w5 -; SVE-NEXT: mov v4.b[4], w8 -; SVE-NEXT: mov v5.b[4], w9 +; SVE-NEXT: mov v6.b[4], w5 +; SVE-NEXT: mov v5.b[4], w8 +; SVE-NEXT: mov v4.b[4], w9 ; SVE-NEXT: ldr w8, [sp, #256] ; SVE-NEXT: ldr w9, [sp, #384] -; SVE-NEXT: mov v6.b[4], w10 +; SVE-NEXT: mov v7.b[4], w10 ; SVE-NEXT: ldr w10, [sp, #128] -; SVE-NEXT: mov v7.b[5], w6 -; SVE-NEXT: mov v4.b[5], w8 -; SVE-NEXT: mov v5.b[5], w9 +; SVE-NEXT: mov v6.b[5], w6 +; SVE-NEXT: mov v5.b[5], w8 +; SVE-NEXT: mov v4.b[5], w9 ; SVE-NEXT: ldr w8, [sp, #264] ; SVE-NEXT: ldr w9, [sp, #392] -; SVE-NEXT: mov v6.b[5], w10 +; SVE-NEXT: mov v7.b[5], w10 ; SVE-NEXT: ldr w10, [sp, #136] -; SVE-NEXT: mov v7.b[6], w7 -; SVE-NEXT: mov v4.b[6], w8 -; SVE-NEXT: mov v5.b[6], w9 +; SVE-NEXT: mov v6.b[6], w7 +; SVE-NEXT: mov v5.b[6], w8 +; SVE-NEXT: mov v4.b[6], w9 ; SVE-NEXT: ldr w8, [sp, #272] ; SVE-NEXT: ldr w9, [sp, #400] -; SVE-NEXT: mov v6.b[6], w10 +; SVE-NEXT: mov v7.b[6], w10 ; SVE-NEXT: ldr w10, [sp, #144] -; SVE-NEXT: mov v4.b[7], w8 +; SVE-NEXT: mov v5.b[7], w8 ; SVE-NEXT: ldr w8, [sp, #16] -; SVE-NEXT: mov v5.b[7], w9 +; SVE-NEXT: mov v4.b[7], w9 ; SVE-NEXT: ldr w9, [sp, #280] -; SVE-NEXT: mov v6.b[7], w10 -; SVE-NEXT: mov v7.b[7], w8 +; SVE-NEXT: mov v7.b[7], w10 +; SVE-NEXT: mov v6.b[7], w8 ; SVE-NEXT: ldr w10, [sp, #408] ; SVE-NEXT: ldr w8, [sp, #152] -; SVE-NEXT: mov v4.b[8], w9 +; SVE-NEXT: mov v5.b[8], w9 ; SVE-NEXT: ldr w9, [sp, #24] -; SVE-NEXT: mov v5.b[8], w10 +; SVE-NEXT: mov v4.b[8], w10 ; SVE-NEXT: ldr w10, [sp, #288] -; SVE-NEXT: mov v6.b[8], w8 -; SVE-NEXT: mov v7.b[8], w9 +; SVE-NEXT: mov v7.b[8], w8 +; SVE-NEXT: mov v6.b[8], w9 ; SVE-NEXT: ldr w8, [sp, #416] ; SVE-NEXT: ldr w9, [sp, #160] -; SVE-NEXT: mov v4.b[9], w10 +; SVE-NEXT: mov v5.b[9], w10 ; SVE-NEXT: ldr w10, [sp, #32] -; SVE-NEXT: mov v5.b[9], w8 +; SVE-NEXT: mov v4.b[9], w8 ; SVE-NEXT: ldr w8, [sp, #296] -; SVE-NEXT: mov v6.b[9], w9 -; SVE-NEXT: mov v7.b[9], w10 +; SVE-NEXT: mov v7.b[9], w9 +; SVE-NEXT: mov v6.b[9], w10 ; SVE-NEXT: ldr w9, [sp, #424] ; SVE-NEXT: ldr w10, [sp, #168] -; SVE-NEXT: mov v4.b[10], w8 +; SVE-NEXT: mov v5.b[10], w8 ; SVE-NEXT: ldr w8, [sp, #40] -; SVE-NEXT: mov v5.b[10], w9 +; SVE-NEXT: mov v4.b[10], w9 ; SVE-NEXT: ldr w9, [sp, #304] -; SVE-NEXT: mov v6.b[10], w10 -; SVE-NEXT: mov v7.b[10], w8 +; SVE-NEXT: mov v7.b[10], w10 +; SVE-NEXT: mov v6.b[10], w8 ; SVE-NEXT: ldr w10, [sp, #432] ; SVE-NEXT: ldr w8, [sp, #176] -; SVE-NEXT: mov v4.b[11], w9 +; SVE-NEXT: mov v5.b[11], w9 ; SVE-NEXT: ldr w9, [sp, #48] -; SVE-NEXT: mov v5.b[11], w10 +; SVE-NEXT: mov v4.b[11], w10 ; SVE-NEXT: ldr w10, [sp, #312] -; SVE-NEXT: mov v6.b[11], w8 -; SVE-NEXT: mov v7.b[11], w9 +; SVE-NEXT: mov v7.b[11], w8 +; SVE-NEXT: mov v6.b[11], w9 ; SVE-NEXT: ldr w8, [sp, #440] ; SVE-NEXT: ldr w9, [sp, #184] -; SVE-NEXT: mov v4.b[12], w10 +; SVE-NEXT: mov v5.b[12], w10 ; SVE-NEXT: ldr w10, [sp, #56] -; SVE-NEXT: mov v5.b[12], w8 +; SVE-NEXT: mov v4.b[12], w8 ; SVE-NEXT: ldr w8, [sp, #320] -; SVE-NEXT: mov v6.b[12], w9 -; SVE-NEXT: mov v7.b[12], w10 +; SVE-NEXT: mov v7.b[12], w9 +; SVE-NEXT: mov v6.b[12], w10 ; SVE-NEXT: ldr w9, [sp, #448] ; SVE-NEXT: ldr w10, [sp, #192] -; SVE-NEXT: mov v4.b[13], w8 +; SVE-NEXT: mov v5.b[13], w8 ; SVE-NEXT: ldr w8, [sp, #64] -; SVE-NEXT: mov v5.b[13], w9 +; SVE-NEXT: mov v4.b[13], w9 ; SVE-NEXT: ldr w9, [sp, #328] -; SVE-NEXT: mov v6.b[13], w10 -; SVE-NEXT: mov v7.b[13], w8 +; SVE-NEXT: mov v7.b[13], w10 +; SVE-NEXT: mov v6.b[13], w8 ; SVE-NEXT: ldr w10, [sp, #456] ; SVE-NEXT: ldr w8, [sp, #200] -; SVE-NEXT: mov v4.b[14], w9 +; SVE-NEXT: mov v5.b[14], w9 ; SVE-NEXT: ldr w9, [sp, #72] -; SVE-NEXT: mov v5.b[14], w10 +; SVE-NEXT: mov v4.b[14], w10 ; SVE-NEXT: ldr w10, [sp, #336] -; SVE-NEXT: mov v6.b[14], w8 -; SVE-NEXT: mov v7.b[14], w9 +; SVE-NEXT: mov v7.b[14], w8 +; SVE-NEXT: mov v6.b[14], w9 ; SVE-NEXT: ldr w8, [sp, #464] ; SVE-NEXT: ldr w9, [sp, #208] -; SVE-NEXT: mov v4.b[15], w10 +; SVE-NEXT: mov v5.b[15], w10 ; SVE-NEXT: ldr w10, [sp, #80] -; SVE-NEXT: mov v5.b[15], w8 +; SVE-NEXT: mov v4.b[15], w8 ; SVE-NEXT: mov w8, #32 // =0x20 -; SVE-NEXT: mov v6.b[15], w9 -; SVE-NEXT: mov v7.b[15], w10 +; SVE-NEXT: mov v7.b[15], w9 +; SVE-NEXT: mov v6.b[15], w10 ; SVE-NEXT: mov w9, #48 // =0x30 -; SVE-NEXT: shl v4.16b, v4.16b, #7 ; SVE-NEXT: shl v5.16b, v5.16b, #7 -; SVE-NEXT: shl v6.16b, v6.16b, #7 +; SVE-NEXT: shl v4.16b, v4.16b, #7 ; SVE-NEXT: shl v7.16b, v7.16b, #7 -; SVE-NEXT: cmlt v4.16b, v4.16b, #0 -; SVE-NEXT: cmlt v5.16b, v5.16b, #0 -; SVE-NEXT: cmlt v6.16b, v6.16b, #0 -; SVE-NEXT: cmpne p1.b, p0/z, z4.b, #0 -; SVE-NEXT: cmlt v4.16b, v7.16b, #0 -; SVE-NEXT: cmpne p2.b, p0/z, z5.b, #0 -; SVE-NEXT: cmpne p3.b, p0/z, z6.b, #0 -; SVE-NEXT: cmpne p0.b, p0/z, z4.b, #0 +; SVE-NEXT: cmpne p1.b, p0/z, z5.b, #0 +; SVE-NEXT: shl v5.16b, v6.16b, #7 +; SVE-NEXT: cmpne p2.b, p0/z, z4.b, #0 +; SVE-NEXT: cmpne p3.b, p0/z, z7.b, #0 +; SVE-NEXT: cmpne p0.b, p0/z, z5.b, #0 ; SVE-NEXT: st1b { z2.b }, p1, [x0, x8] ; SVE-NEXT: mov w8, #16 // =0x10 ; SVE-NEXT: st1b { z3.b }, p2, [x0, x9] @@ -755,7 +715,6 @@ define void @test_masked_store_success_invert_mask_v4i32(<4 x i32> %x, ptr %ptr, ; SVE-NEXT: eor v1.8b, v1.8b, v2.8b ; SVE-NEXT: ushll v1.4s, v1.4h, #0 ; SVE-NEXT: shl v1.4s, v1.4s, #31 -; SVE-NEXT: cmlt v1.4s, v1.4s, #0 ; SVE-NEXT: cmpne p0.s, p0/z, z1.s, #0 ; SVE-NEXT: st1w { z0.s }, p0, [x0] ; SVE-NEXT: ret @@ -947,29 +906,27 @@ define void @test_masked_store_multiple_v8i32(<8 x i32> %x, <8 x i32> %y, ptr %p ; SVE-LABEL: test_masked_store_multiple_v8i32: ; SVE: // %bb.0: ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 -; SVE-NEXT: zip2 v6.8b, v4.8b, v0.8b -; SVE-NEXT: zip1 v4.8b, v4.8b, v0.8b +; SVE-NEXT: zip1 v6.8b, v5.8b, v0.8b +; SVE-NEXT: zip2 v7.8b, v4.8b, v0.8b ; SVE-NEXT: mov x8, #4 // =0x4 -; SVE-NEXT: zip1 v7.8b, v5.8b, v0.8b ; SVE-NEXT: zip2 v5.8b, v5.8b, v0.8b +; SVE-NEXT: zip1 v4.8b, v4.8b, v0.8b ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: ptrue p0.s, vl4 ; SVE-NEXT: ushll v6.4s, v6.4h, #0 -; SVE-NEXT: ushll v4.4s, v4.4h, #0 ; SVE-NEXT: ushll v7.4s, v7.4h, #0 ; SVE-NEXT: ushll v5.4s, v5.4h, #0 +; SVE-NEXT: ushll v4.4s, v4.4h, #0 ; SVE-NEXT: shl v6.4s, v6.4s, #31 -; SVE-NEXT: shl v4.4s, v4.4s, #31 ; SVE-NEXT: shl v7.4s, v7.4s, #31 ; SVE-NEXT: shl v5.4s, v5.4s, #31 +; SVE-NEXT: shl v4.4s, v4.4s, #31 ; SVE-NEXT: cmlt v6.4s, v6.4s, #0 -; SVE-NEXT: cmlt v4.4s, v4.4s, #0 -; SVE-NEXT: cmlt v7.4s, v7.4s, #0 +; SVE-NEXT: cmpne p1.s, p0/z, z7.s, #0 ; SVE-NEXT: cmlt v5.4s, v5.4s, #0 -; SVE-NEXT: cmpne p1.s, p0/z, z6.s, #0 -; SVE-NEXT: ldp q6, q16, [x1] +; SVE-NEXT: ldp q7, q16, [x1] ; SVE-NEXT: cmpne p0.s, p0/z, z4.s, #0 -; SVE-NEXT: bif v2.16b, v6.16b, v7.16b +; SVE-NEXT: bif v2.16b, v7.16b, v6.16b ; SVE-NEXT: bif v3.16b, v16.16b, v5.16b ; SVE-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2] ; SVE-NEXT: st1w { z0.s }, p0, [x0] @@ -987,74 +944,70 @@ define void @test_masked_store_multiple_v8i32(<8 x i32> %x, <8 x i32> %y, ptr %p define void @test_masked_store_multiple_v8i64(<8 x i64> %x, <8 x i64> %y, ptr %ptr1, ptr %ptr2, <8 x i1> %mask, <8 x i1> %mask2) { ; SVE-LABEL: test_masked_store_multiple_v8i64: ; SVE: // %bb.0: -; SVE-NEXT: ldp d16, d18, [sp] -; SVE-NEXT: ptrue p0.d, vl2 +; SVE-NEXT: ldp d16, d17, [sp] +; SVE-NEXT: ptrue p1.d, vl2 +; SVE-NEXT: mov x9, #4 // =0x4 ; SVE-NEXT: // kill: def $q3 killed $q3 def $z3 ; SVE-NEXT: // kill: def $q2 killed $q2 def $z2 -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 -; SVE-NEXT: mov x8, #6 // =0x6 -; SVE-NEXT: mov x9, #4 // =0x4 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 -; SVE-NEXT: mov b17, v16.b[4] -; SVE-NEXT: mov b19, v16.b[2] -; SVE-NEXT: mov b20, v16.b[6] -; SVE-NEXT: mov b21, v16.b[0] -; SVE-NEXT: mov b22, v18.b[4] -; SVE-NEXT: mov b23, v18.b[6] -; SVE-NEXT: mov b24, v18.b[0] -; SVE-NEXT: mov b25, v18.b[2] -; SVE-NEXT: mov v17.b[4], v16.b[5] -; SVE-NEXT: mov v19.b[4], v16.b[3] -; SVE-NEXT: mov v20.b[4], v16.b[7] -; SVE-NEXT: mov v21.b[4], v16.b[1] -; SVE-NEXT: mov v22.b[4], v18.b[5] -; SVE-NEXT: mov v23.b[4], v18.b[7] -; SVE-NEXT: mov v24.b[4], v18.b[1] -; SVE-NEXT: mov v25.b[4], v18.b[3] -; SVE-NEXT: ushll v17.2d, v17.2s, #0 -; SVE-NEXT: ushll v18.2d, v21.2s, #0 -; SVE-NEXT: ushll v21.2d, v24.2s, #0 -; SVE-NEXT: shl v16.2d, v17.2d, #63 +; SVE-NEXT: mov x8, #6 // =0x6 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE-NEXT: mov b18, v16.b[0] +; SVE-NEXT: mov b19, v16.b[4] +; SVE-NEXT: mov b20, v17.b[4] +; SVE-NEXT: mov b21, v16.b[6] +; SVE-NEXT: mov b22, v16.b[2] +; SVE-NEXT: mov b23, v17.b[6] +; SVE-NEXT: mov b24, v17.b[2] +; SVE-NEXT: mov b25, v17.b[0] +; SVE-NEXT: mov v18.b[4], v16.b[1] +; SVE-NEXT: mov v19.b[4], v16.b[5] +; SVE-NEXT: mov v20.b[4], v17.b[5] +; SVE-NEXT: mov v21.b[4], v16.b[7] +; SVE-NEXT: mov v22.b[4], v16.b[3] +; SVE-NEXT: mov v23.b[4], v17.b[7] +; SVE-NEXT: mov v24.b[4], v17.b[3] +; SVE-NEXT: mov v25.b[4], v17.b[1] +; SVE-NEXT: ushll v18.2d, v18.2s, #0 ; SVE-NEXT: ushll v17.2d, v19.2s, #0 -; SVE-NEXT: ushll v19.2d, v20.2s, #0 -; SVE-NEXT: ushll v20.2d, v22.2s, #0 -; SVE-NEXT: shl v18.2d, v18.2d, #63 -; SVE-NEXT: ushll v22.2d, v25.2s, #0 -; SVE-NEXT: shl v21.2d, v21.2d, #63 -; SVE-NEXT: cmlt v16.2d, v16.2d, #0 +; SVE-NEXT: ushll v19.2d, v21.2s, #0 +; SVE-NEXT: ushll v21.2d, v25.2s, #0 +; SVE-NEXT: shl v16.2d, v18.2d, #63 +; SVE-NEXT: ushll v18.2d, v20.2s, #0 +; SVE-NEXT: ushll v20.2d, v23.2s, #0 ; SVE-NEXT: shl v17.2d, v17.2d, #63 ; SVE-NEXT: shl v19.2d, v19.2d, #63 +; SVE-NEXT: shl v21.2d, v21.2d, #63 +; SVE-NEXT: cmpne p0.d, p1/z, z16.d, #0 +; SVE-NEXT: ushll v16.2d, v22.2s, #0 +; SVE-NEXT: shl v18.2d, v18.2d, #63 +; SVE-NEXT: ushll v22.2d, v24.2s, #0 ; SVE-NEXT: shl v20.2d, v20.2d, #63 -; SVE-NEXT: cmlt v18.2d, v18.2d, #0 -; SVE-NEXT: shl v22.2d, v22.2d, #63 -; SVE-NEXT: cmlt v21.2d, v21.2d, #0 -; SVE-NEXT: cmpne p1.d, p0/z, z16.d, #0 -; SVE-NEXT: ushll v16.2d, v23.2s, #0 -; SVE-NEXT: cmlt v17.2d, v17.2d, #0 -; SVE-NEXT: cmlt v19.2d, v19.2d, #0 -; SVE-NEXT: cmlt v20.2d, v20.2d, #0 +; SVE-NEXT: cmpne p2.d, p1/z, z17.d, #0 +; SVE-NEXT: cmpne p3.d, p1/z, z19.d, #0 ; SVE-NEXT: shl v16.2d, v16.2d, #63 -; SVE-NEXT: cmpne p2.d, p0/z, z17.d, #0 -; SVE-NEXT: cmpne p3.d, p0/z, z19.d, #0 -; SVE-NEXT: ldp q17, q19, [x1, #32] -; SVE-NEXT: cmpne p0.d, p0/z, z18.d, #0 -; SVE-NEXT: cmlt v16.2d, v16.2d, #0 -; SVE-NEXT: bif v6.16b, v17.16b, v20.16b -; SVE-NEXT: cmlt v20.2d, v22.2d, #0 -; SVE-NEXT: ldp q17, q18, [x1] -; SVE-NEXT: st1d { z2.d }, p1, [x0, x9, lsl #3] -; SVE-NEXT: mov v2.16b, v16.16b +; SVE-NEXT: cmlt v17.2d, v18.2d, #0 +; SVE-NEXT: shl v18.2d, v22.2d, #63 +; SVE-NEXT: ldp q19, q22, [x1, #32] +; SVE-NEXT: cmpne p1.d, p1/z, z16.d, #0 +; SVE-NEXT: cmlt v16.2d, v20.2d, #0 +; SVE-NEXT: cmlt v20.2d, v21.2d, #0 +; SVE-NEXT: cmlt v18.2d, v18.2d, #0 +; SVE-NEXT: bif v6.16b, v19.16b, v17.16b +; SVE-NEXT: ldp q17, q19, [x1] +; SVE-NEXT: st1d { z2.d }, p2, [x0, x9, lsl #3] +; SVE-NEXT: mov x9, #2 // =0x2 ; SVE-NEXT: st1d { z3.d }, p3, [x0, x8, lsl #3] -; SVE-NEXT: mov v3.16b, v21.16b +; SVE-NEXT: mov v2.16b, v16.16b +; SVE-NEXT: mov v3.16b, v20.16b +; SVE-NEXT: st1d { z1.d }, p1, [x0, x9, lsl #3] +; SVE-NEXT: mov v1.16b, v18.16b ; SVE-NEXT: st1d { z0.d }, p0, [x0] -; SVE-NEXT: mov v0.16b, v20.16b -; SVE-NEXT: mov x9, #2 // =0x2 -; SVE-NEXT: st1d { z1.d }, p2, [x0, x9, lsl #3] -; SVE-NEXT: bsl v2.16b, v7.16b, v19.16b +; SVE-NEXT: bsl v2.16b, v7.16b, v22.16b ; SVE-NEXT: bsl v3.16b, v4.16b, v17.16b -; SVE-NEXT: bsl v0.16b, v5.16b, v18.16b +; SVE-NEXT: bsl v1.16b, v5.16b, v19.16b ; SVE-NEXT: stp q6, q2, [x1, #32] -; SVE-NEXT: stp q3, q0, [x1] +; SVE-NEXT: stp q3, q1, [x1] ; SVE-NEXT: ret %load = load <8 x i64>, ptr %ptr1, align 32 %load2 = load <8 x i64>, ptr %ptr2, align 32 @@ -1073,7 +1026,6 @@ define void @test_masked_store_unaligned_v4i32(<4 x i32> %data, ptr %ptr, <4 x i ; SVE-NEXT: add x8, x0, #1 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: shl v1.4s, v1.4s, #31 -; SVE-NEXT: cmlt v1.4s, v1.4s, #0 ; SVE-NEXT: cmpne p0.s, p0/z, z1.s, #0 ; SVE-NEXT: st1w { z0.s }, p0, [x8] ; SVE-NEXT: ret @@ -1098,8 +1050,6 @@ define void @test_masked_store_unaligned_v4i64(<4 x i64> %data, ptr %ptr, <4 x i ; SVE-NEXT: ushll v2.2d, v2.2s, #0 ; SVE-NEXT: shl v3.2d, v3.2d, #63 ; SVE-NEXT: shl v2.2d, v2.2d, #63 -; SVE-NEXT: cmlt v3.2d, v3.2d, #0 -; SVE-NEXT: cmlt v2.2d, v2.2d, #0 ; SVE-NEXT: cmpne p1.d, p0/z, z3.d, #0 ; SVE-NEXT: cmpne p0.d, p0/z, z2.d, #0 ; SVE-NEXT: st1d { z1.d }, p1, [x8] @@ -1127,8 +1077,6 @@ define void @test_masked_store_unaligned_v8i32(<8 x i32> %data, ptr %ptr, <8 x i ; SVE-NEXT: ushll v2.4s, v2.4h, #0 ; SVE-NEXT: shl v3.4s, v3.4s, #31 ; SVE-NEXT: shl v2.4s, v2.4s, #31 -; SVE-NEXT: cmlt v3.4s, v3.4s, #0 -; SVE-NEXT: cmlt v2.4s, v2.4s, #0 ; SVE-NEXT: cmpne p1.s, p0/z, z3.s, #0 ; SVE-NEXT: cmpne p0.s, p0/z, z2.s, #0 ; SVE-NEXT: st1w { z0.s }, p1, [x8] @@ -1168,12 +1116,8 @@ define void @test_masked_store_unaligned_v8i64(<8 x i64> %data, ptr %ptr, <8 x i ; SVE-NEXT: shl v4.2d, v4.2d, #63 ; SVE-NEXT: shl v5.2d, v5.2d, #63 ; SVE-NEXT: shl v6.2d, v6.2d, #63 -; SVE-NEXT: shl v7.2d, v7.2d, #63 -; SVE-NEXT: cmlt v4.2d, v4.2d, #0 -; SVE-NEXT: cmlt v5.2d, v5.2d, #0 -; SVE-NEXT: cmlt v6.2d, v6.2d, #0 ; SVE-NEXT: cmpne p1.d, p0/z, z4.d, #0 -; SVE-NEXT: cmlt v4.2d, v7.2d, #0 +; SVE-NEXT: shl v4.2d, v7.2d, #63 ; SVE-NEXT: cmpne p2.d, p0/z, z5.d, #0 ; SVE-NEXT: cmpne p3.d, p0/z, z6.d, #0 ; SVE-NEXT: cmpne p0.d, p0/z, z4.d, #0 diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll index cdf2a962f9322..ca16df3c09ade 100644 --- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll +++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll @@ -367,7 +367,6 @@ define i32 @ctz_v16i1(<16 x i1> %a) { ; NONSTREAMING-NEXT: shl v0.16b, v0.16b, #7 ; NONSTREAMING-NEXT: ptrue p0.b, vl16 ; NONSTREAMING-NEXT: ptrue p1.b -; NONSTREAMING-NEXT: cmlt v0.16b, v0.16b, #0 ; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b ; NONSTREAMING-NEXT: cntp x0, p0, p0.b @@ -396,7 +395,6 @@ define i32 @ctz_v16i1_poison(<16 x i1> %a) { ; NONSTREAMING-NEXT: shl v0.16b, v0.16b, #7 ; NONSTREAMING-NEXT: ptrue p0.b, vl16 ; NONSTREAMING-NEXT: ptrue p1.b -; NONSTREAMING-NEXT: cmlt v0.16b, v0.16b, #0 ; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b ; NONSTREAMING-NEXT: cntp x0, p0, p0.b @@ -425,7 +423,6 @@ define i64 @add_i64_ctz_v16i1_poison(<16 x i1> %a, i64 %b) { ; NONSTREAMING-NEXT: shl v0.16b, v0.16b, #7 ; NONSTREAMING-NEXT: ptrue p0.b, vl16 ; NONSTREAMING-NEXT: ptrue p1.b -; NONSTREAMING-NEXT: cmlt v0.16b, v0.16b, #0 ; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b ; NONSTREAMING-NEXT: incp x0, p0.b @@ -453,7 +450,6 @@ define i32 @ctz_v8i1(<8 x i1> %a) { ; NONSTREAMING-NEXT: shl v0.8b, v0.8b, #7 ; NONSTREAMING-NEXT: ptrue p0.b, vl8 ; NONSTREAMING-NEXT: ptrue p1.b -; NONSTREAMING-NEXT: cmlt v0.8b, v0.8b, #0 ; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b ; NONSTREAMING-NEXT: cntp x0, p0, p0.b @@ -482,7 +478,6 @@ define i32 @ctz_v8i1_poison(<8 x i1> %a) { ; NONSTREAMING-NEXT: shl v0.8b, v0.8b, #7 ; NONSTREAMING-NEXT: ptrue p0.b, vl8 ; NONSTREAMING-NEXT: ptrue p1.b -; NONSTREAMING-NEXT: cmlt v0.8b, v0.8b, #0 ; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b ; NONSTREAMING-NEXT: cntp x0, p0, p0.b @@ -511,7 +506,6 @@ define i32 @ctz_v4i1(<4 x i1> %a) { ; NONSTREAMING-NEXT: shl v0.4h, v0.4h, #15 ; NONSTREAMING-NEXT: ptrue p0.h, vl4 ; NONSTREAMING-NEXT: ptrue p1.h -; NONSTREAMING-NEXT: cmlt v0.4h, v0.4h, #0 ; NONSTREAMING-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b ; NONSTREAMING-NEXT: cntp x0, p0, p0.h @@ -540,7 +534,6 @@ define i32 @ctz_v4i1_poison(<4 x i1> %a) { ; NONSTREAMING-NEXT: shl v0.4h, v0.4h, #15 ; NONSTREAMING-NEXT: ptrue p0.h, vl4 ; NONSTREAMING-NEXT: ptrue p1.h -; NONSTREAMING-NEXT: cmlt v0.4h, v0.4h, #0 ; NONSTREAMING-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b ; NONSTREAMING-NEXT: cntp x0, p0, p0.h @@ -569,7 +562,6 @@ define i32 @ctz_v2i1(<2 x i1> %a) { ; NONSTREAMING-NEXT: shl v0.2s, v0.2s, #31 ; NONSTREAMING-NEXT: ptrue p0.s, vl2 ; NONSTREAMING-NEXT: ptrue p1.s -; NONSTREAMING-NEXT: cmlt v0.2s, v0.2s, #0 ; NONSTREAMING-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b ; NONSTREAMING-NEXT: cntp x0, p0, p0.s @@ -598,7 +590,6 @@ define i32 @ctz_v2i1_poison(<2 x i1> %a) { ; NONSTREAMING-NEXT: shl v0.2s, v0.2s, #31 ; NONSTREAMING-NEXT: ptrue p0.s, vl2 ; NONSTREAMING-NEXT: ptrue p1.s -; NONSTREAMING-NEXT: cmlt v0.2s, v0.2s, #0 ; NONSTREAMING-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b ; NONSTREAMING-NEXT: cntp x0, p0, p0.s diff --git a/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll b/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll index 474a9d1003e8c..46bd414568e05 100644 --- a/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll +++ b/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll @@ -142,7 +142,6 @@ define <16 x i1> @match_v16i8_v8i8(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mas ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z1.d, d1 -; CHECK-NEXT: cmlt v2.16b, v2.16b, #0 ; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0 ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff @@ -159,7 +158,6 @@ define <16 x i1> @match_v16i8_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %m ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: cmlt v2.16b, v2.16b, #0 ; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0 ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff @@ -177,7 +175,6 @@ define <8 x i1> @match_v8i8_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) # ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z1.d, d1 -; CHECK-NEXT: cmlt v2.8b, v2.8b, #0 ; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0 ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff @@ -206,7 +203,6 @@ define <8 x i1> @match_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: shl v2.8h, v2.8h, #15 -; CHECK-NEXT: cmlt v2.8h, v2.8h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0 ; CHECK-NEXT: match p0.h, p0/z, z0.h, z1.h ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff @@ -225,7 +221,6 @@ define <8 x i1> @match_v8i8_v16i8(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask) ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: cmlt v2.8b, v2.8b, #0 ; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0 ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll index 55d37d1bda5e4..69fb3d88af02a 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll @@ -13,7 +13,6 @@ define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) { ; CHECK: // %bb.0: ; CHECK-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -28,7 +27,6 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: shl v0.8h, v0.8h, #15 -; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -43,7 +41,6 @@ define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) { ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -58,7 +55,6 @@ define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) { ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: shl v0.2d, v0.2d, #63 -; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -74,7 +70,6 @@ define <2 x double> @masked_load_passthru_v2f64(ptr %src, <2 x i1> %mask, <2 x d ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: shl v0.2d, v0.2d, #63 -; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll index 1a19b77f53c67..4570b50c96ad3 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll @@ -13,7 +13,6 @@ define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) { ; CHECK: // %bb.0: ; CHECK-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] @@ -28,7 +27,6 @@ define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: shl v0.8h, v0.8h, #15 -; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -43,7 +41,6 @@ define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) { ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -58,7 +55,6 @@ define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) { ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: shl v0.2d, v0.2d, #63 -; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll index 390f5c21f7b44..36df5e5deadfc 100644 --- a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll +++ b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll @@ -7,7 +7,6 @@ define <4 x i32> @masked_load_v4i32(ptr %a, <4 x i1> %mask) nounwind { ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -23,7 +22,6 @@ define void @masked_store_v4i32(<4 x i32> %x, ptr %a, <4 x i1> %mask) nounwind { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 ; CHECK-NEXT: stnt1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret