diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 899baa9c998ec..9078675da0e95 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -25512,6 +25512,32 @@ SDValue performCONDCombine(SDNode *N, CmpIndex, CC)) return Val; + // X & M ?= C --> (C << clz(M)) ?= (X << clz(M)) where M is a non-empty + // sequence of ones starting at the least significant bit with the remainder + // zero and C is a constant s.t. (C & ~M) == 0 that cannot be materialised + // into a SUBS (immediate). The transformed form can be matched into a SUBS + // (shifted register). + if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && AndNode->hasOneUse() && + isa(AndNode->getOperand(1)) && + isa(SubsNode->getOperand(1))) { + SDValue X = AndNode->getOperand(0); + APInt M = AndNode->getConstantOperandAPInt(1); + APInt C = SubsNode->getConstantOperandAPInt(1); + + if (M.isMask() && C.isSubsetOf(M) && !isLegalArithImmed(C.getZExtValue())) { + SDLoc DL(SubsNode); + EVT VT = SubsNode->getValueType(0); + unsigned ShiftAmt = M.countl_zero(); + SDValue ShiftedX = DAG.getNode( + ISD::SHL, DL, VT, X, DAG.getShiftAmountConstant(ShiftAmt, VT, DL)); + SDValue ShiftedC = DAG.getConstant(C << ShiftAmt, DL, VT); + SDValue NewSubs = DAG.getNode(AArch64ISD::SUBS, DL, SubsNode->getVTList(), + ShiftedC, ShiftedX); + DCI.CombineTo(SubsNode, NewSubs, NewSubs.getValue(1)); + return SDValue(N, 0); + } + } + if (ConstantSDNode *CN = dyn_cast(AndNode->getOperand(1))) { uint32_t CNV = CN->getZExtValue(); if (CNV == 255) diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll index e68539bcf07d9..e8bbaf96395f0 100644 --- a/llvm/test/CodeGen/AArch64/isinf.ll +++ b/llvm/test/CodeGen/AArch64/isinf.ll @@ -27,9 +27,8 @@ define i32 @replace_isinf_call_f32(float %x) { ; CHECK-LABEL: replace_isinf_call_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 -; CHECK-NEXT: and w9, w9, #0x7fffffff -; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: mov w8, #-16777216 // =0xff000000 +; CHECK-NEXT: cmp w8, w9, lsl #1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %abs = tail call float @llvm.fabs.f32(float %x) @@ -43,9 +42,8 @@ define i32 @replace_isinf_call_f64(double %x) { ; CHECK-LABEL: replace_isinf_call_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000 -; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff -; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x8, #-9007199254740992 // =0xffe0000000000000 +; CHECK-NEXT: cmp x8, x9, lsl #1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %abs = tail call double @llvm.fabs.f64(double %x) diff --git a/llvm/test/CodeGen/AArch64/masked-integer-compare.ll b/llvm/test/CodeGen/AArch64/masked-integer-compare.ll new file mode 100644 index 0000000000000..363cd10c78a94 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/masked-integer-compare.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -| FileCheck %s + +; Test code generation support for SUBS (shifted register) from masked integer +; compare sequences. These sequences appear in isinf tests, for example. + +define i1 @combine_masked_i32(i32 %x) { +; CHECK-LABEL: combine_masked_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-16777216 // =0xff000000 +; CHECK-NEXT: cmp w8, w0, lsl #1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %and = and i32 %x, u0x7fffffff + %sub = sub i32 %and, u0x7f800000 + %cmp = icmp eq i32 %sub, 0 + ret i1 %cmp +} + +define i1 @combine_masked_i64(i64 %x) { +; CHECK-LABEL: combine_masked_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-9007199254740992 // =0xffe0000000000000 +; CHECK-NEXT: cmp x8, x0, lsl #1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %and = and i64 %x, u0x7fffffffffffffff + %sub = sub i64 %and, u0x7ff0000000000000 + %cmp = icmp eq i64 %sub, 0 + ret i1 %cmp +} + +define i1 @combine_masked_ne(i32 %x) { +; CHECK-LABEL: combine_masked_ne: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-16777216 // =0xff000000 +; CHECK-NEXT: cmp w8, w0, lsl #1 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %and = and i32 %x, u0x7fffffff + %cmp = icmp ne i32 %and, u0x7f800000 + ret i1 %cmp +} + +define i1 @combine_masked_lsl4(i32 %x) { +; CHECK-LABEL: combine_masked_lsl4: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-134217728 // =0xf8000000 +; CHECK-NEXT: cmp w8, w0, lsl #4 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %and = and i32 %x, u0x0fffffff + %cmp = icmp eq i32 %and, u0x0f800000 + ret i1 %cmp +} + +define i1 @dont_combine_not_mask(i32 %x) { +; CHECK-LABEL: dont_combine_not_mask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 +; CHECK-NEXT: and w9, w0, #0x7ffffffe +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %and = and i32 %x, u0x7ffffffe + %cmp = icmp eq i32 %and, u0x7f800000 + ret i1 %cmp +} + +define i1 @dont_combine_cmp_not_masked(i32 %x) { +; CHECK-LABEL: dont_combine_cmp_not_masked: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 +; CHECK-NEXT: and w9, w0, #0x3fffffff +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %and = and i32 %x, u0x3fffffff + %cmp = icmp eq i32 %and, u0x7f800000 + ret i1 %cmp +} + +define i1 @dont_combine_not_constant_mask(i32 %x, i32 %m) { +; CHECK-LABEL: dont_combine_not_constant_mask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 +; CHECK-NEXT: and w9, w0, w1 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %and = and i32 %x, %m + %cmp = icmp eq i32 %and, u0x7f800000 + ret i1 %cmp +} + +define i1 @dont_combine_not_constant_cmp(i32 %x, i32 %c) { +; CHECK-LABEL: dont_combine_not_constant_cmp: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xfffffff +; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %and = and i32 %x, u0x0fffffff + %cmp = icmp eq i32 %and, %c + ret i1 %cmp +} + +define i1 @dont_combine_subs_imm(i32 %x) { +; CHECK-LABEL: dont_combine_subs_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0x7fffffff +; CHECK-NEXT: cmp w8, #291 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %and = and i32 %x, u0x7fffffff + %cmp = icmp eq i32 %and, u0x123 + ret i1 %cmp +} + +define i1 @dont_combine_subs_imm_lsl12(i32 %x) { +; CHECK-LABEL: dont_combine_subs_imm_lsl12: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0x7fffffff +; CHECK-NEXT: cmp w8, #291, lsl #12 // =1191936 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %and = and i32 %x, u0x7fffffff + %cmp = icmp eq i32 %and, u0x123000 + ret i1 %cmp +} + +define { i1, i1 } @dont_combine_multi_use_cmp(i32 %x) { +; CHECK-LABEL: dont_combine_multi_use_cmp: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 +; CHECK-NEXT: and w9, w0, #0x7fffffff +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: cset w1, lt +; CHECK-NEXT: ret + %and = and i32 %x, u0x7fffffff + %eq = icmp eq i32 %and, u0x7f800000 + %lt = icmp slt i32 %and, u0x7f800000 + %r1 = insertvalue { i1, i1 } poison, i1 %eq, 0 + %r2 = insertvalue { i1, i1 } %r1, i1 %lt, 1 + ret { i1, i1 } %r2 +} + +define { i32, i1 } @dont_combine_multi_use_sub(i32 %x) { +; CHECK-LABEL: dont_combine_multi_use_sub: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-2139095040 // =0x80800000 +; CHECK-NEXT: and w9, w0, #0x7fffffff +; CHECK-NEXT: adds w0, w9, w8 +; CHECK-NEXT: cset w1, eq +; CHECK-NEXT: ret + %and = and i32 %x, u0x7fffffff + %sub = sub i32 %and, u0x7f800000 + %cmp = icmp eq i32 %sub, 0 + %r1 = insertvalue { i32, i1 } poison, i32 %sub, 0 + %r2 = insertvalue { i32, i1 } %r1, i1 %cmp, 1 + ret { i32, i1 } %r2 +} + +define { i32, i1 } @dont_combine_multi_use_and(i32 %x) { +; CHECK-LABEL: dont_combine_multi_use_and: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 +; CHECK-NEXT: and w0, w0, #0x7fffffff +; CHECK-NEXT: cmp w0, w8 +; CHECK-NEXT: cset w1, eq +; CHECK-NEXT: ret + %and = and i32 %x, u0x7fffffff + %cmp = icmp eq i32 %and, u0x7f800000 + %r1 = insertvalue { i32, i1 } poison, i32 %and, 0 + %r2 = insertvalue { i32, i1 } %r1, i1 %cmp, 1 + ret { i32, i1 } %r2 +}