-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[ExpandMemCmp] Improve memcmp optimisation for boolean results #71221
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ExpandMemCmp] Improve memcmp optimisation for boolean results #71221
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-aarch64 Author: Igor Kirillov (igogo-x86) ChangesThis patch enhances the optimization of memcmp calls when only two outcomes
Previously, LLVM would generate unnecessary operations even when the user of Patch is 52.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71221.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 28e258be226a695..a3dd0feea2ff969 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -31,6 +32,7 @@
#include <optional>
using namespace llvm;
+using namespace llvm::PatternMatch;
namespace llvm {
class TargetLowering;
@@ -656,6 +658,37 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
/*Offset*/ 0);
+
+ // If a user of memcmp cares only about two outcomes, for example:
+ // bool result = memcmp(a, b, NBYTES) > 0;
+ // We can generate more optimal code with a smaller number of operations
+ if (auto *U = CI->getUniqueUndroppableUser()) {
+ auto *UI = cast<Instruction>(U);
+ ICmpInst::Predicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
+ uint64_t Shift;
+ bool NeedsZExt = false;
+ // This is a special case because instead of checking if the result is less than zero:
+ // bool result = memcmp(a, b, NBYTES) < 0;
+ // Compiler is clever enough to generate the following code:
+ // bool result = memcmp(a, b, NBYTES) >> 31;
+ if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) && Shift == CI->getType()->getIntegerBitWidth() - 1) {
+ Pred = ICmpInst::ICMP_SLT;
+ NeedsZExt = true;
+ } else {
+ // In case of a successful match this call will set `Pred` variable
+ match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero()));
+ }
+ // Generate new code and remove the original memcmp call and the user
+ if (ICmpInst::isSigned(Pred)) {
+ Value *Cmp = Builder.CreateICmp(CmpInst::getUnsignedPredicate(Pred), Loads.Lhs, Loads.Rhs);
+ auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, U->getType()) : Cmp;
+ UI->replaceAllUsesWith(Result);
+ UI->eraseFromParent();
+ CI->eraseFromParent();
+ return nullptr;
+ }
+ }
+
// The result of memcmp is negative, zero, or positive, so produce that by
// subtracting 2 extended compare bits: sub (ugt, ult).
// If a target prefers to use selects to get -1/0/1, they should be able
@@ -670,7 +703,7 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
}
// This function expands the memcmp call into an inline expansion and returns
-// the memcmp result.
+// the memcmp result. Returns nullptr if the memcmp is already replaced.
Value *MemCmpExpansion::getMemCmpExpansion() {
// Create the basic block framework for a multi-block expansion.
if (getNumBlocks() != 1) {
@@ -838,11 +871,11 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
NumMemCmpInlined++;
- Value *Res = Expansion.getMemCmpExpansion();
-
- // Replace call with result of expansion and erase call.
- CI->replaceAllUsesWith(Res);
- CI->eraseFromParent();
+ if (Value *Res = Expansion.getMemCmpExpansion()) {
+ // Replace call with result of expansion and erase call.
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ }
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll
index d13a416a28761ca..4da7c8c95a4e4f0 100644
--- a/llvm/test/CodeGen/AArch64/memcmp.ll
+++ b/llvm/test/CodeGen/AArch64/memcmp.ll
@@ -222,16 +222,28 @@ define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w8, w8, w9
-; CHECK-NEXT: lsr w0, w8, #31
+; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
%c = icmp slt i32 %m, 0
ret i1 %c
}
+define i32 @length4_lt_32(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: length4_lt_32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: ldr w9, [x1]
+; CHECK-NEXT: rev w8, w8
+; CHECK-NEXT: rev w9, w9
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+ %c = lshr i32 %m, 31
+ ret i32 %c
+}
+
define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
; CHECK-LABEL: length4_gt:
; CHECK: // %bb.0:
@@ -240,11 +252,7 @@ define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w8, w8, w9
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: cset w0, hi
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
%c = icmp sgt i32 %m, 0
@@ -313,10 +321,7 @@ define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: sub w8, w8, w9
-; CHECK-NEXT: lsr w0, w8, #31
+; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
%c = icmp slt i32 %m, 0
@@ -343,6 +348,25 @@ define i32 @length6(ptr %X, ptr %Y) nounwind {
ret i32 %m
}
+define i32 @length6_lt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: length6_lt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrh w8, [x0, #4]
+; CHECK-NEXT: ldr w9, [x0]
+; CHECK-NEXT: ldrh w10, [x1, #4]
+; CHECK-NEXT: ldr w11, [x1]
+; CHECK-NEXT: orr x8, x9, x8, lsl #32
+; CHECK-NEXT: orr x9, x11, x10, lsl #32
+; CHECK-NEXT: rev x8, x8
+; CHECK-NEXT: rev x9, x9
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind
+ %r = lshr i32 %m, 31
+ ret i32 %r
+}
+
define i32 @length7(ptr %X, ptr %Y) nounwind {
; CHECK-LABEL: length7:
; CHECK: // %bb.0:
@@ -351,18 +375,18 @@ define i32 @length7(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB22_3
+; CHECK-NEXT: b.ne .LBB24_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldur w8, [x0, #3]
; CHECK-NEXT: ldur w9, [x1, #3]
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB22_3
+; CHECK-NEXT: b.ne .LBB24_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB22_3: // %res_block
+; CHECK-NEXT: .LBB24_3: // %res_block
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -379,18 +403,18 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB23_3
+; CHECK-NEXT: b.ne .LBB25_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldur w8, [x0, #3]
; CHECK-NEXT: ldur w9, [x1, #3]
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: b.ne .LBB23_3
+; CHECK-NEXT: b.ne .LBB25_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB23_3: // %res_block
+; CHECK-NEXT: .LBB25_3: // %res_block
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -470,13 +494,13 @@ define i32 @length9(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB28_2
+; CHECK-NEXT: b.ne .LBB30_2
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldrb w8, [x0, #8]
; CHECK-NEXT: ldrb w9, [x1, #8]
; CHECK-NEXT: sub w0, w8, w9
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB28_2: // %res_block
+; CHECK-NEXT: .LBB30_2: // %res_block
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
@@ -508,7 +532,7 @@ define i32 @length10(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB30_3
+; CHECK-NEXT: b.ne .LBB32_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldrh w8, [x0, #8]
; CHECK-NEXT: ldrh w9, [x1, #8]
@@ -517,11 +541,11 @@ define i32 @length10(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: lsr w9, w9, #16
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB30_3
+; CHECK-NEXT: b.ne .LBB32_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB30_3: // %res_block
+; CHECK-NEXT: .LBB32_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -554,18 +578,18 @@ define i32 @length11(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB32_3
+; CHECK-NEXT: b.ne .LBB34_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldur x8, [x0, #3]
; CHECK-NEXT: ldur x9, [x1, #3]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB32_3
+; CHECK-NEXT: b.ne .LBB34_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB32_3: // %res_block
+; CHECK-NEXT: .LBB34_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -614,18 +638,18 @@ define i32 @length12(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB35_3
+; CHECK-NEXT: b.ne .LBB37_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr w8, [x0, #8]
; CHECK-NEXT: ldr w9, [x1, #8]
; CHECK-NEXT: rev w8, w8
; CHECK-NEXT: rev w9, w9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB35_3
+; CHECK-NEXT: b.ne .LBB37_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB35_3: // %res_block
+; CHECK-NEXT: .LBB37_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -674,18 +698,18 @@ define i32 @length15(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB38_3
+; CHECK-NEXT: b.ne .LBB40_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldur x8, [x0, #7]
; CHECK-NEXT: ldur x9, [x1, #7]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB38_3
+; CHECK-NEXT: b.ne .LBB40_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB38_3: // %res_block
+; CHECK-NEXT: .LBB40_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -702,18 +726,18 @@ define i1 @length15_lt(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB39_3
+; CHECK-NEXT: b.ne .LBB41_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldur x8, [x0, #7]
; CHECK-NEXT: ldur x9, [x1, #7]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB39_3
+; CHECK-NEXT: b.ne .LBB41_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB39_3: // %res_block
+; CHECK-NEXT: .LBB41_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -734,7 +758,7 @@ define i32 @length15_const(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: movk x8, #12594, lsl #48
; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: b.ne .LBB40_3
+; CHECK-NEXT: b.ne .LBB42_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: mov x8, #13365 // =0x3435
; CHECK-NEXT: ldur x9, [x0, #7]
@@ -743,11 +767,11 @@ define i32 @length15_const(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: movk x8, #14393, lsl #48
; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: b.ne .LBB40_3
+; CHECK-NEXT: b.ne .LBB42_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB40_3: // %res_block
+; CHECK-NEXT: .LBB42_3: // %res_block
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -782,7 +806,7 @@ define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: movk x8, #12594, lsl #48
; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: b.ne .LBB42_3
+; CHECK-NEXT: b.ne .LBB44_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: mov x8, #13365 // =0x3435
; CHECK-NEXT: ldur x9, [x0, #7]
@@ -791,15 +815,15 @@ define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: movk x8, #14393, lsl #48
; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: b.ne .LBB42_3
+; CHECK-NEXT: b.ne .LBB44_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB42_4
-; CHECK-NEXT: .LBB42_3: // %res_block
+; CHECK-NEXT: b .LBB44_4
+; CHECK-NEXT: .LBB44_3: // %res_block
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB42_4: // %endblock
+; CHECK-NEXT: .LBB44_4: // %endblock
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
@@ -817,18 +841,18 @@ define i32 @length16(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB43_3
+; CHECK-NEXT: b.ne .LBB45_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB43_3
+; CHECK-NEXT: b.ne .LBB45_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB43_3: // %res_block
+; CHECK-NEXT: .LBB45_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -859,18 +883,18 @@ define i1 @length16_lt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB45_3
+; CHECK-NEXT: b.ne .LBB47_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB45_3
+; CHECK-NEXT: b.ne .LBB47_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB45_3: // %res_block
+; CHECK-NEXT: .LBB47_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -889,22 +913,22 @@ define i1 @length16_gt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB46_3
+; CHECK-NEXT: b.ne .LBB48_3
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB46_3
+; CHECK-NEXT: b.ne .LBB48_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB46_4
-; CHECK-NEXT: .LBB46_3: // %res_block
+; CHECK-NEXT: b .LBB48_4
+; CHECK-NEXT: .LBB48_3: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB46_4: // %endblock
+; CHECK-NEXT: .LBB48_4: // %endblock
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
@@ -943,25 +967,25 @@ define i32 @length24(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB48_4
+; CHECK-NEXT: b.ne .LBB50_4
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB48_4
+; CHECK-NEXT: b.ne .LBB50_4
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB48_4
+; CHECK-NEXT: b.ne .LBB50_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB48_4: // %res_block
+; CHECK-NEXT: .LBB50_4: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w0, w8, hs
@@ -995,25 +1019,25 @@ define i1 @length24_lt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB50_4
+; CHECK-NEXT: b.ne .LBB52_4
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB50_4
+; CHECK-NEXT: b.ne .LBB52_4
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB50_4
+; CHECK-NEXT: b.ne .LBB52_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: lsr w0, wzr, #31
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB50_4: // %res_block
+; CHECK-NEXT: .LBB52_4: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
@@ -1032,29 +1056,29 @@ define i1 @length24_gt(ptr %x, ptr %y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB51_4
+; CHECK-NEXT: b.ne .LBB53_4
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB51_4
+; CHECK-NEXT: b.ne .LBB53_4
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB51_4
+; CHECK-NEXT: b.ne .LBB53_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b .LBB51_5
-; CHECK-NEXT: .LBB51_4: // %res_block
+; CHECK-NEXT: b .LBB53_5
+; CHECK-NEXT: .LBB53_4: // %res_block
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cneg w8, w8, hs
-; CHECK-NEXT: .LBB51_5: // %endblock
+; CHECK-NEXT: .LBB53_5: // %endblock
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
@@ -1098,32 +1122,32 @@ define i32 @length31(ptr %X, ptr %Y) nounwind {
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB53_5
+; CHECK-NEXT: b.ne .LBB55_5
; CHECK-NEXT: // %bb.1: // %loadbb1
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: ldr x9, [x1, #8]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB53_5
+; CHECK-NEXT: b.ne .LBB55_5
; CHECK-NEXT: // %bb.2: // %loadbb2
; CHECK-NEXT: ldr x8, [x0, #16]
; CHECK-NEXT: ldr x9, [x1, #16]
; CHECK-NEXT: rev x8, x8
; CHECK-NEXT: rev x9, x9
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.ne .LBB53_5
+; CHECK-NEXT: b.ne .LBB55_5
; CHECK-NEXT: // %bb.3: // %loadbb3
; CHECK-NEXT: ldur x8, ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for this @igogo-x86! It looks good - I just have a couple of minor comments.
7ce3e79
to
6974602
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM! Thanks for making the changes @igogo-x86.
This patch enhances the optimization of memcmp calls when only two outcomes are needed and comparison fits into one block, for example: bool result = memcmp(a, b, 6) > 0; Previously, LLVM would generate unnecessary operations even when the user of memcmp was only interested in a binary outcome.
8160d60
to
9fb8ac9
Compare
This patch enhances the optimization of memcmp calls when only two outcomes
are needed and comparison fits into one block, for example:
Previously, LLVM would generate unnecessary operations even when the user of
memcmp was only interested in a binary outcome.