diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index c84fe4d6619740..b822f15ed193b3 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -6855,25 +6855,25 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, // If one of the operands is a constant vector zero, attempt to fold the // comparison to a specialized compare-against-zero form. - SDValue SingleOp; - if (ISD::isBuildVectorAllZeros(Op1.getNode())) - SingleOp = Op0; - else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { + if (ISD::isBuildVectorAllZeros(Op0.getNode()) && + (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ || + Opc == ARMCC::NE)) { if (Opc == ARMCC::GE) Opc = ARMCC::LE; else if (Opc == ARMCC::GT) Opc = ARMCC::LT; - SingleOp = Op1; + std::swap(Op0, Op1); } SDValue Result; - if (SingleOp.getNode()) { - Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp, + if (ISD::isBuildVectorAllZeros(Op1.getNode()) && + (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE || + Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ)) + Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0, DAG.getConstant(Opc, dl, MVT::i32)); - } else { + else Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1, DAG.getConstant(Opc, dl, MVT::i32)); - } Result = DAG.getSExtOrTrunc(Result, dl, VT); diff --git a/llvm/test/CodeGen/ARM/vcmpz.ll b/llvm/test/CodeGen/ARM/vcmpz.ll index f800346a6b5643..51b5d28d8192e7 100644 --- a/llvm/test/CodeGen/ARM/vcmpz.ll +++ b/llvm/test/CodeGen/ARM/vcmpz.ll @@ -174,11 +174,16 @@ define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ult(<4 x i32> %0) { ret <4 x i32> %3 } -;define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ule(<4 x i32> %0) { -; %2 = icmp ule <4 x i32> %0, zeroinitializer -; %3 = sext <4 x i1> %2 to <4 x i32> -; ret <4 x i32> %3 -;} +define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ule(<4 x i32> %0) { +; CHECK-LABEL: vcmpz_zr_ule: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEXT: vcge.u32 q0, q8, q0 +; CHECK-NEXT: bx lr + %2 = icmp ule <4 x i32> %0, zeroinitializer + %3 = sext <4 x i1> %2 to <4 x i32> + ret <4 x i32> %3 +} define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ugt(<4 x i32> %0) { ; CHECK-LABEL: vcmpz_zr_ugt: @@ -294,8 +299,13 @@ define arm_aapcs_vfpcc <4 x i32> @vcmpz_zl_ugt(<4 x i32> %0) { ret <4 x i32> %3 } -;define arm_aapcs_vfpcc <4 x i32> @vcmpz_zl_uge(<4 x i32> %0) { -; %2 = icmp uge <4 x i32> zeroinitializer, %0 -; %3 = sext <4 x i1> %2 to <4 x i32> -; ret <4 x i32> %3 -;} +define arm_aapcs_vfpcc <4 x i32> @vcmpz_zl_uge(<4 x i32> %0) { +; CHECK-LABEL: vcmpz_zl_uge: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEXT: vcge.u32 q0, q8, q0 +; CHECK-NEXT: bx lr + %2 = icmp uge <4 x i32> zeroinitializer, %0 + %3 = sext <4 x i1> %2 to <4 x i32> + ret <4 x i32> %3 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-and.ll b/llvm/test/CodeGen/Thumb2/mve-pred-and.ll index e745fafdbea728..e8d5eadabf7f95 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-and.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-and.ll @@ -122,8 +122,9 @@ entry: define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q2, #0x0 ; CHECK-NEXT: vpt.i32 eq, q0, zr -; CHECK-NEXT: vcmpt.u32 cs, q1, zr +; CHECK-NEXT: vcmpt.u32 cs, q2, q1 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll index cb3f554e21b0a0..435ddf0a6e57b6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll @@ -123,7 +123,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u32 cs, q1, zr +; CHECK-NEXT: vmov.i32 q2, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q2, q1 ; CHECK-NEXT: vpnot ; CHECK-NEXT: vpst ; CHECK-NEXT: vcmpt.i32 ne, q0, zr diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll index e5fef332034fe1..0ff262e6b53ab5 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll @@ -151,7 +151,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u32 cs, q1, zr +; CHECK-NEXT: vmov.i32 q2, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q2, q1 ; CHECK-NEXT: vmrs r0, p0 ; CHECK-NEXT: vcmp.i32 eq, q0, zr ; CHECK-NEXT: vmrs r1, p0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll index fcb9d136307fe5..aaf49c76a07a0f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll @@ -110,7 +110,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @vcmp_ulez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vcmp_ulez_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u32 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -229,7 +230,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @vcmp_ulez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vcmp_ulez_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u16 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u16 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -348,7 +350,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @vcmp_ulez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vcmp_ulez_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u8 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u8 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -489,7 +492,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ugez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vcmp_r_ugez_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u32 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -608,7 +612,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ugez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vcmp_r_ugez_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u16 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u16 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -727,7 +732,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ugez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vcmp_r_ugez_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u8 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u8 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: