diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c index d5d15b4dea966..35fde8733f375 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c @@ -3584,13 +3584,13 @@ void test_integer(void) { // CHECK-ASM: vsrlb vsc = vec_abs(vsc); - // CHECK-ASM: vlcb + // CHECK-ASM: vlpb vss = vec_abs(vss); - // CHECK-ASM: vlch + // CHECK-ASM: vlph vsi = vec_abs(vsi); - // CHECK-ASM: vlcf + // CHECK-ASM: vlpf vsl = vec_abs(vsl); - // CHECK-ASM: vlcg + // CHECK-ASM: vlpg vsc = vec_max(vsc, vsc); // CHECK-ASM: vmxb diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c index 6ee9e1ee3a117..cd0fafdb7435f 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c @@ -246,7 +246,7 @@ void test_integer(void) { // CHECK-ASM: vctzq vslll = vec_abs(vslll); - // CHECK-ASM: vlcq + // CHECK-ASM: vlpq vslll = vec_avg(vslll, vslll); // CHECK: call i128 @llvm.s390.vavgq(i128 %{{.*}}, i128 %{{.*}}) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 3ddf182149e57..cbaff294819a2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3997,6 +3997,27 @@ static Value *foldOrUnsignedUMulOverflowICmp(BinaryOperator &I, return nullptr; } +/// Fold select(X >s 0, 0, -X) | smax(X, 0) --> abs(X) +/// select(X abs(X) +static Value *FoldOrOfSelectSmaxToAbs(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + Value *X; + Value *Sel; + if (match(&I, + m_c_Or(m_Value(Sel), m_OneUse(m_SMax(m_Value(X), m_ZeroInt()))))) { + auto NegX = m_Neg(m_Specific(X)); + if (match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(X), + m_ZeroInt()), + m_ZeroInt(), NegX)) || + match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(X), + m_ZeroInt()), + NegX, m_ZeroInt()))) + return Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, + Builder.getFalse()); + } + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -4545,6 +4566,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Value *V = SimplifyAddWithRemainder(I)) return replaceInstUsesWith(I, V); + if (Value *Res = FoldOrOfSelectSmaxToAbs(I, Builder)) + return replaceInstUsesWith(I, Res); + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index 6b090e982af0a..f61a1970d3aa4 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -2113,3 +2113,98 @@ define <4 x i32> @or_zext_nneg_minus_constant_splat(<4 x i8> %a) { %or = or <4 x i32> %zext, splat (i32 -9) ret <4 x i32> %or } + +define i8 @or_positive_minus_non_positive_to_abs(i8 %a){ +; CHECK-LABEL: @or_positive_minus_non_positive_to_abs( +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.abs.i8(i8 [[A:%.*]], i1 false) +; CHECK-NEXT: ret i8 [[TMP2]] +; + %b = icmp sgt i8 %a, 0 + %mask = sext i1 %b to i8 + %neg = sub i8 0, %a + %mask_inv = xor i8 %mask, -1 + %c = and i8 %neg, %mask_inv + %d = and i8 %a, %mask + %or = or i8 %c, %d + ret i8 %or +} + +; TODO: Fold to smax https://alive2.llvm.org/ce/z/wDiDh2 +define i8 @or_select_smax_neg_to_abs(i8 %a){ +; CHECK-LABEL: @or_select_smax_neg_to_abs( +; CHECK-NEXT: [[SGT0:%.*]] = icmp sgt i8 [[A:%.*]], 0 +; CHECK-NEXT: [[NEG:%.*]] = sub nsw i8 0, [[A]] +; CHECK-NEXT: [[OR:%.*]] = select i1 [[SGT0]], i8 0, i8 [[NEG]] +; CHECK-NEXT: ret i8 [[OR]] +; + %sgt0 = icmp sgt i8 %a, 0 + %neg = sub nsw i8 0, %a + %sel = select i1 %sgt0, i8 0, i8 %neg + ret i8 %sel +} + +; TODO: Fold to abs https://alive2.llvm.org/ce/z/DybfHG +define i8 @or_select_smax_smax_to_abs(i8 %a){ +; CHECK-LABEL: @or_select_smax_smax_to_abs( +; CHECK-NEXT: [[NEG:%.*]] = sub nsw i8 0, [[A:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = call i8 @llvm.smax.i8(i8 [[NEG]], i8 0) +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 0) +; CHECK-NEXT: [[OR:%.*]] = or i8 [[SEL]], [[MAX]] +; CHECK-NEXT: ret i8 [[OR]] +; + %neg = sub nsw i8 0, %a + %sel = call i8 @llvm.smax.i8(i8 %neg, i8 0) + %max = call i8 @llvm.smax.i8(i8 %a, i8 0) + %or = or i8 %sel, %max + ret i8 %or +} + +declare i8 @llvm.abs.i8(i8, i1) +declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1) + +define <2 x i8> @or_sgt_select_smax_to_abs(<2 x i8> %a){ +; CHECK-LABEL: @or_sgt_select_smax_to_abs( +; CHECK-NEXT: [[OR:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[A:%.*]], i1 false) +; CHECK-NEXT: ret <2 x i8> [[OR]] +; + %sgt0 = icmp sgt <2 x i8> %a, zeroinitializer + %neg = sub <2 x i8> zeroinitializer, %a + %sel = select <2 x i1> %sgt0, <2 x i8> zeroinitializer, <2 x i8> %neg + %max = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %a, <2 x i8> zeroinitializer) + %or = or <2 x i8> %sel, %max + ret <2 x i8> %or +} + +define <2 x i8> @or_slt_select_smax_to_abs(<2 x i8> %a){ +; CHECK-LABEL: @or_slt_select_smax_to_abs( +; CHECK-NEXT: [[OR:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[A:%.*]], i1 false) +; CHECK-NEXT: ret <2 x i8> [[OR]] +; + %slt0 = icmp slt <2 x i8> %a, zeroinitializer + %neg = sub <2 x i8> zeroinitializer, %a + %sel = select <2 x i1> %slt0, <2 x i8> %neg, <2 x i8> zeroinitializer + %max = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %a, <2 x i8> zeroinitializer) + %or = or <2 x i8> %sel, %max + ret <2 x i8> %or +} + +; negative test - %d has multiple uses. %or is not folded to abs. + +define <2 x i8> @or_select_smax_multi_uses(<2 x i8> %a){ +; CHECK-LABEL: @or_select_smax_multi_uses( +; CHECK-NEXT: [[B:%.*]] = icmp sgt <2 x i8> [[A:%.*]], zeroinitializer +; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[A]] +; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[B]], <2 x i8> zeroinitializer, <2 x i8> [[NEG]] +; CHECK-NEXT: [[D:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[A]], <2 x i8> zeroinitializer) +; CHECK-NEXT: [[OR1:%.*]] = or <2 x i8> [[C]], [[D]] +; CHECK-NEXT: [[OR:%.*]] = add <2 x i8> [[OR1]], [[D]] +; CHECK-NEXT: ret <2 x i8> [[OR]] +; + %sgt0 = icmp sgt <2 x i8> %a, zeroinitializer + %neg = sub <2 x i8> zeroinitializer, %a + %sel = select <2 x i1> %sgt0, <2 x i8> zeroinitializer, <2 x i8> %neg + %max = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %a, <2 x i8> zeroinitializer) + %or = or <2 x i8> %sel, %max + %add = add <2 x i8> %or, %max + ret <2 x i8> %add +}