Skip to content

Conversation

igogo-x86
Copy link
Contributor

This patch enhances the optimization of memcmp calls when only two outcomes
are needed and comparison fits into one block, for example:

bool result = memcmp(a, b, 6) > 0;

Previously, LLVM would generate unnecessary operations even when the user of
memcmp was only interested in a binary outcome.

@llvmbot
Copy link
Member

llvmbot commented Nov 3, 2023

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-backend-aarch64

Author: Igor Kirillov (igogo-x86)

Changes

This patch enhances the optimization of memcmp calls when only two outcomes
are needed and comparison fits into one block, for example:

bool result = memcmp(a, b, 6) > 0;

Previously, LLVM would generate unnecessary operations even when the user of
memcmp was only interested in a binary outcome.


Patch is 52.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71221.diff

6 Files Affected:

  • (modified) llvm/lib/CodeGen/ExpandMemCmp.cpp (+39-6)
  • (modified) llvm/test/CodeGen/AArch64/memcmp.ll (+208-184)
  • (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll (+5-13)
  • (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll (+6-14)
  • (modified) llvm/test/CodeGen/X86/memcmp-x32.ll (+5-13)
  • (modified) llvm/test/CodeGen/X86/memcmp.ll (+6-14)
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 28e258be226a695..a3dd0feea2ff969 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -31,6 +32,7 @@
 #include <optional>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 namespace llvm {
 class TargetLowering;
@@ -656,6 +658,37 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
 
   const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
                                      /*Offset*/ 0);
+
+  // If a user of memcmp cares only about two outcomes, for example:
+  //    bool result = memcmp(a, b, NBYTES) > 0;
+  // We can generate more optimal code with a smaller number of operations
+  if (auto *U = CI->getUniqueUndroppableUser()) {
+    auto *UI = cast<Instruction>(U);
+    ICmpInst::Predicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
+    uint64_t Shift;
+    bool NeedsZExt = false;
+    // This is a special case because instead of checking if the result is less than zero:
+    //    bool result = memcmp(a, b, NBYTES) < 0;
+    // Compiler is clever enough to generate the following code:
+    //    bool result = memcmp(a, b, NBYTES) >> 31;
+    if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) && Shift == CI->getType()->getIntegerBitWidth() - 1) {
+      Pred = ICmpInst::ICMP_SLT;
+      NeedsZExt = true;
+    } else {
+      // In case of a successful match this call will set `Pred` variable
+      match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero()));
+    }
+    // Generate new code and remove the original memcmp call and the user
+    if (ICmpInst::isSigned(Pred)) {
+      Value *Cmp = Builder.CreateICmp(CmpInst::getUnsignedPredicate(Pred), Loads.Lhs, Loads.Rhs);
+      auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, U->getType()) : Cmp;
+      UI->replaceAllUsesWith(Result);
+      UI->eraseFromParent();
+      CI->eraseFromParent();
+      return nullptr;
+    }
+  }
+
   // The result of memcmp is negative, zero, or positive, so produce that by
   // subtracting 2 extended compare bits: sub (ugt, ult).
   // If a target prefers to use selects to get -1/0/1, they should be able
@@ -670,7 +703,7 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
 }
 
 // This function expands the memcmp call into an inline expansion and returns
-// the memcmp result.
+// the memcmp result. Returns nullptr if the memcmp is already replaced.
 Value *MemCmpExpansion::getMemCmpExpansion() {
   // Create the basic block framework for a multi-block expansion.
   if (getNumBlocks() != 1) {
@@ -838,11 +871,11 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
 
   NumMemCmpInlined++;
 
-  Value *Res = Expansion.getMemCmpExpansion();
-
-  // Replace call with result of expansion and erase call.
-  CI->replaceAllUsesWith(Res);
-  CI->eraseFromParent();
+  if (Value *Res = Expansion.getMemCmpExpansion()) {
+    // Replace call with result of expansion and erase call.
+    CI->replaceAllUsesWith(Res);
+    CI->eraseFromParent();
+  }
 
   return true;
 }
diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll
index d13a416a28761ca..4da7c8c95a4e4f0 100644
--- a/llvm/test/CodeGen/AArch64/memcmp.ll
+++ b/llvm/test/CodeGen/AArch64/memcmp.ll
@@ -222,16 +222,28 @@ define i1 @length4_lt(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    cset w8, hi
-; CHECK-NEXT:    cset w9, lo
-; CHECK-NEXT:    sub w8, w8, w9
-; CHECK-NEXT:    lsr w0, w8, #31
+; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
   %c = icmp slt i32 %m, 0
   ret i1 %c
 }
 
+define i32 @length4_lt_32(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: length4_lt_32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr w8, [x0]
+; CHECK-NEXT:    ldr w9, [x1]
+; CHECK-NEXT:    rev w8, w8
+; CHECK-NEXT:    rev w9, w9
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    ret
+  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
+  %c = lshr i32 %m, 31
+  ret i32 %c
+}
+
 define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
 ; CHECK-LABEL: length4_gt:
 ; CHECK:       // %bb.0:
@@ -240,11 +252,7 @@ define i1 @length4_gt(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    cset w8, hi
-; CHECK-NEXT:    cset w9, lo
-; CHECK-NEXT:    sub w8, w8, w9
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    cset w0, hi
 ; CHECK-NEXT:    ret
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
   %c = icmp sgt i32 %m, 0
@@ -313,10 +321,7 @@ define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    cset w8, hi
-; CHECK-NEXT:    cset w9, lo
-; CHECK-NEXT:    sub w8, w8, w9
-; CHECK-NEXT:    lsr w0, w8, #31
+; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
   %c = icmp slt i32 %m, 0
@@ -343,6 +348,25 @@ define i32 @length6(ptr %X, ptr %Y) nounwind {
   ret i32 %m
 }
 
+define i32 @length6_lt(ptr %X, ptr %Y) nounwind {
+; CHECK-LABEL: length6_lt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrh w8, [x0, #4]
+; CHECK-NEXT:    ldr w9, [x0]
+; CHECK-NEXT:    ldrh w10, [x1, #4]
+; CHECK-NEXT:    ldr w11, [x1]
+; CHECK-NEXT:    orr x8, x9, x8, lsl #32
+; CHECK-NEXT:    orr x9, x11, x10, lsl #32
+; CHECK-NEXT:    rev x8, x8
+; CHECK-NEXT:    rev x9, x9
+; CHECK-NEXT:    cmp x8, x9
+; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    ret
+  %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind
+  %r = lshr i32 %m, 31
+  ret i32 %r
+}
+
 define i32 @length7(ptr %X, ptr %Y) nounwind {
 ; CHECK-LABEL: length7:
 ; CHECK:       // %bb.0:
@@ -351,18 +375,18 @@ define i32 @length7(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB22_3
+; CHECK-NEXT:    b.ne .LBB24_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldur w8, [x0, #3]
 ; CHECK-NEXT:    ldur w9, [x1, #3]
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB22_3
+; CHECK-NEXT:    b.ne .LBB24_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB22_3: // %res_block
+; CHECK-NEXT:  .LBB24_3: // %res_block
 ; CHECK-NEXT:    cmp w8, w9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -379,18 +403,18 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB23_3
+; CHECK-NEXT:    b.ne .LBB25_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldur w8, [x0, #3]
 ; CHECK-NEXT:    ldur w9, [x1, #3]
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB23_3
+; CHECK-NEXT:    b.ne .LBB25_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB23_3: // %res_block
+; CHECK-NEXT:  .LBB25_3: // %res_block
 ; CHECK-NEXT:    cmp w8, w9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -470,13 +494,13 @@ define i32 @length9(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB28_2
+; CHECK-NEXT:    b.ne .LBB30_2
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldrb w8, [x0, #8]
 ; CHECK-NEXT:    ldrb w9, [x1, #8]
 ; CHECK-NEXT:    sub w0, w8, w9
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB28_2: // %res_block
+; CHECK-NEXT:  .LBB30_2: // %res_block
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
@@ -508,7 +532,7 @@ define i32 @length10(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB30_3
+; CHECK-NEXT:    b.ne .LBB32_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldrh w8, [x0, #8]
 ; CHECK-NEXT:    ldrh w9, [x1, #8]
@@ -517,11 +541,11 @@ define i32 @length10(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    lsr w9, w9, #16
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB30_3
+; CHECK-NEXT:    b.ne .LBB32_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB30_3: // %res_block
+; CHECK-NEXT:  .LBB32_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -554,18 +578,18 @@ define i32 @length11(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB32_3
+; CHECK-NEXT:    b.ne .LBB34_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldur x8, [x0, #3]
 ; CHECK-NEXT:    ldur x9, [x1, #3]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB32_3
+; CHECK-NEXT:    b.ne .LBB34_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB32_3: // %res_block
+; CHECK-NEXT:  .LBB34_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -614,18 +638,18 @@ define i32 @length12(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB35_3
+; CHECK-NEXT:    b.ne .LBB37_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr w8, [x0, #8]
 ; CHECK-NEXT:    ldr w9, [x1, #8]
 ; CHECK-NEXT:    rev w8, w8
 ; CHECK-NEXT:    rev w9, w9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB35_3
+; CHECK-NEXT:    b.ne .LBB37_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB35_3: // %res_block
+; CHECK-NEXT:  .LBB37_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -674,18 +698,18 @@ define i32 @length15(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB38_3
+; CHECK-NEXT:    b.ne .LBB40_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldur x8, [x0, #7]
 ; CHECK-NEXT:    ldur x9, [x1, #7]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB38_3
+; CHECK-NEXT:    b.ne .LBB40_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB38_3: // %res_block
+; CHECK-NEXT:  .LBB40_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -702,18 +726,18 @@ define i1 @length15_lt(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB39_3
+; CHECK-NEXT:    b.ne .LBB41_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldur x8, [x0, #7]
 ; CHECK-NEXT:    ldur x9, [x1, #7]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB39_3
+; CHECK-NEXT:    b.ne .LBB41_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB39_3: // %res_block
+; CHECK-NEXT:  .LBB41_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -734,7 +758,7 @@ define i32 @length15_const(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    movk x8, #12594, lsl #48
 ; CHECK-NEXT:    cmp x9, x8
-; CHECK-NEXT:    b.ne .LBB40_3
+; CHECK-NEXT:    b.ne .LBB42_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    mov x8, #13365 // =0x3435
 ; CHECK-NEXT:    ldur x9, [x0, #7]
@@ -743,11 +767,11 @@ define i32 @length15_const(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    movk x8, #14393, lsl #48
 ; CHECK-NEXT:    cmp x9, x8
-; CHECK-NEXT:    b.ne .LBB40_3
+; CHECK-NEXT:    b.ne .LBB42_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB40_3: // %res_block
+; CHECK-NEXT:  .LBB42_3: // %res_block
 ; CHECK-NEXT:    cmp x9, x8
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -782,7 +806,7 @@ define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    movk x8, #12594, lsl #48
 ; CHECK-NEXT:    cmp x9, x8
-; CHECK-NEXT:    b.ne .LBB42_3
+; CHECK-NEXT:    b.ne .LBB44_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    mov x8, #13365 // =0x3435
 ; CHECK-NEXT:    ldur x9, [x0, #7]
@@ -791,15 +815,15 @@ define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    movk x8, #14393, lsl #48
 ; CHECK-NEXT:    cmp x9, x8
-; CHECK-NEXT:    b.ne .LBB42_3
+; CHECK-NEXT:    b.ne .LBB44_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b .LBB42_4
-; CHECK-NEXT:  .LBB42_3: // %res_block
+; CHECK-NEXT:    b .LBB44_4
+; CHECK-NEXT:  .LBB44_3: // %res_block
 ; CHECK-NEXT:    cmp x9, x8
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
-; CHECK-NEXT:  .LBB42_4: // %endblock
+; CHECK-NEXT:  .LBB44_4: // %endblock
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
@@ -817,18 +841,18 @@ define i32 @length16(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB43_3
+; CHECK-NEXT:    b.ne .LBB45_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB43_3
+; CHECK-NEXT:    b.ne .LBB45_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB43_3: // %res_block
+; CHECK-NEXT:  .LBB45_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -859,18 +883,18 @@ define i1 @length16_lt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB45_3
+; CHECK-NEXT:    b.ne .LBB47_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB45_3
+; CHECK-NEXT:    b.ne .LBB47_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB45_3: // %res_block
+; CHECK-NEXT:  .LBB47_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -889,22 +913,22 @@ define i1 @length16_gt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB46_3
+; CHECK-NEXT:    b.ne .LBB48_3
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB46_3
+; CHECK-NEXT:    b.ne .LBB48_3
 ; CHECK-NEXT:  // %bb.2:
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b .LBB46_4
-; CHECK-NEXT:  .LBB46_3: // %res_block
+; CHECK-NEXT:    b .LBB48_4
+; CHECK-NEXT:  .LBB48_3: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
-; CHECK-NEXT:  .LBB46_4: // %endblock
+; CHECK-NEXT:  .LBB48_4: // %endblock
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
@@ -943,25 +967,25 @@ define i32 @length24(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB48_4
+; CHECK-NEXT:    b.ne .LBB50_4
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB48_4
+; CHECK-NEXT:    b.ne .LBB50_4
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB48_4
+; CHECK-NEXT:    b.ne .LBB50_4
 ; CHECK-NEXT:  // %bb.3:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB48_4: // %res_block
+; CHECK-NEXT:  .LBB50_4: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w0, w8, hs
@@ -995,25 +1019,25 @@ define i1 @length24_lt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB50_4
+; CHECK-NEXT:    b.ne .LBB52_4
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB50_4
+; CHECK-NEXT:    b.ne .LBB52_4
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB50_4
+; CHECK-NEXT:    b.ne .LBB52_4
 ; CHECK-NEXT:  // %bb.3:
 ; CHECK-NEXT:    lsr w0, wzr, #31
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB50_4: // %res_block
+; CHECK-NEXT:  .LBB52_4: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
@@ -1032,29 +1056,29 @@ define i1 @length24_gt(ptr %x, ptr %y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB51_4
+; CHECK-NEXT:    b.ne .LBB53_4
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB51_4
+; CHECK-NEXT:    b.ne .LBB53_4
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB51_4
+; CHECK-NEXT:    b.ne .LBB53_4
 ; CHECK-NEXT:  // %bb.3:
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b .LBB51_5
-; CHECK-NEXT:  .LBB51_4: // %res_block
+; CHECK-NEXT:    b .LBB53_5
+; CHECK-NEXT:  .LBB53_4: // %res_block
 ; CHECK-NEXT:    cmp x8, x9
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    cneg w8, w8, hs
-; CHECK-NEXT:  .LBB51_5: // %endblock
+; CHECK-NEXT:  .LBB53_5: // %endblock
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
@@ -1098,32 +1122,32 @@ define i32 @length31(ptr %X, ptr %Y) nounwind {
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB53_5
+; CHECK-NEXT:    b.ne .LBB55_5
 ; CHECK-NEXT:  // %bb.1: // %loadbb1
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x1, #8]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB53_5
+; CHECK-NEXT:    b.ne .LBB55_5
 ; CHECK-NEXT:  // %bb.2: // %loadbb2
 ; CHECK-NEXT:    ldr x8, [x0, #16]
 ; CHECK-NEXT:    ldr x9, [x1, #16]
 ; CHECK-NEXT:    rev x8, x8
 ; CHECK-NEXT:    rev x9, x9
 ; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    b.ne .LBB53_5
+; CHECK-NEXT:    b.ne .LBB55_5
 ; CHECK-NEXT:  // %bb.3: // %loadbb3
 ; CHECK-NEXT:    ldur x8, ...
[truncated]

Copy link

github-actions bot commented Nov 3, 2023

✅ With the latest revision this PR passed the C/C++ code formatter.

@RKSimon RKSimon self-requested a review November 3, 2023 19:29
Copy link
Contributor

@david-arm david-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for this @igogo-x86! It looks good - I just have a couple of minor comments.

@igogo-x86 igogo-x86 force-pushed the expand-memcmp-two-outcome-comparison branch from 7ce3e79 to 6974602 Compare November 6, 2023 12:48
Copy link
Contributor

@david-arm david-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM! Thanks for making the changes @igogo-x86.

  This patch enhances the optimization of memcmp calls when only two outcomes
are needed and comparison fits into one block, for example:

	bool result = memcmp(a, b, 6) > 0;

  Previously, LLVM would generate unnecessary operations even when the user of
memcmp was only interested in a binary outcome.
@igogo-x86 igogo-x86 force-pushed the expand-memcmp-two-outcome-comparison branch from 8160d60 to 9fb8ac9 Compare November 9, 2023 10:27
@igogo-x86 igogo-x86 merged commit 59a063d into llvm:main Nov 9, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants