Skip to content

Conversation

diggerlin
Copy link
Contributor

@diggerlin diggerlin commented Sep 12, 2025

The newly added test case will be used to verify a more efficient memcmp in cases where the length is known.

@llvmbot
Copy link
Member

llvmbot commented Sep 12, 2025

@llvm/pr-subscribers-backend-powerpc

Author: zhijian lin (diggerlin)

Changes

the new adding test case will be used to test a more efficient memcmp in cases where the length is known


Full diff: https://github.com/llvm/llvm-project/pull/158367.diff

1 Files Affected:

  • (added) llvm/test/CodeGen/PowerPC/memcmp_fixsize.ll (+98)
diff --git a/llvm/test/CodeGen/PowerPC/memcmp_fixsize.ll b/llvm/test/CodeGen/PowerPC/memcmp_fixsize.ll
new file mode 100644
index 0000000000000..62048f8f8dfc6
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/memcmp_fixsize.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-AIX64-32-P8
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-AIX64-32-P10
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-LINUX64-P8
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-LINUX64-P10
+
+define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) {
+; CHECK-AIX64-32-P8-LABEL: cmpeq16:
+; CHECK-AIX64-32-P8:       # %bb.0: # %entry
+; CHECK-AIX64-32-P8-NEXT:    ld r5, 0(r3)
+; CHECK-AIX64-32-P8-NEXT:    ld r6, 0(r4)
+; CHECK-AIX64-32-P8-NEXT:    cmpld r5, r6
+; CHECK-AIX64-32-P8-NEXT:    bne cr0, L..BB0_2
+; CHECK-AIX64-32-P8-NEXT:  # %bb.1: # %loadbb1
+; CHECK-AIX64-32-P8-NEXT:    ld r5, 8(r3)
+; CHECK-AIX64-32-P8-NEXT:    ld r4, 8(r4)
+; CHECK-AIX64-32-P8-NEXT:    li r3, 0
+; CHECK-AIX64-32-P8-NEXT:    cmpld r5, r4
+; CHECK-AIX64-32-P8-NEXT:    beq cr0, L..BB0_3
+; CHECK-AIX64-32-P8-NEXT:  L..BB0_2: # %res_block
+; CHECK-AIX64-32-P8-NEXT:    li r3, 1
+; CHECK-AIX64-32-P8-NEXT:  L..BB0_3: # %endblock
+; CHECK-AIX64-32-P8-NEXT:    cntlzw r3, r3
+; CHECK-AIX64-32-P8-NEXT:    srwi r3, r3, 5
+; CHECK-AIX64-32-P8-NEXT:    blr
+;
+; CHECK-AIX64-32-P10-LABEL: cmpeq16:
+; CHECK-AIX64-32-P10:       # %bb.0: # %entry
+; CHECK-AIX64-32-P10-NEXT:    ld r5, 0(r3)
+; CHECK-AIX64-32-P10-NEXT:    ld r6, 0(r4)
+; CHECK-AIX64-32-P10-NEXT:    cmpld r5, r6
+; CHECK-AIX64-32-P10-NEXT:    bne cr0, L..BB0_2
+; CHECK-AIX64-32-P10-NEXT:  # %bb.1: # %loadbb1
+; CHECK-AIX64-32-P10-NEXT:    ld r5, 8(r3)
+; CHECK-AIX64-32-P10-NEXT:    ld r4, 8(r4)
+; CHECK-AIX64-32-P10-NEXT:    li r3, 0
+; CHECK-AIX64-32-P10-NEXT:    cmpld r5, r4
+; CHECK-AIX64-32-P10-NEXT:    beq cr0, L..BB0_3
+; CHECK-AIX64-32-P10-NEXT:  L..BB0_2: # %res_block
+; CHECK-AIX64-32-P10-NEXT:    li r3, 1
+; CHECK-AIX64-32-P10-NEXT:  L..BB0_3: # %endblock
+; CHECK-AIX64-32-P10-NEXT:    cntlzw r3, r3
+; CHECK-AIX64-32-P10-NEXT:    rlwinm r3, r3, 27, 31, 31
+; CHECK-AIX64-32-P10-NEXT:    blr
+;
+; CHECK-LINUX64-P8-LABEL: cmpeq16:
+; CHECK-LINUX64-P8:       # %bb.0: # %entry
+; CHECK-LINUX64-P8-NEXT:    ld r5, 0(r3)
+; CHECK-LINUX64-P8-NEXT:    ld r6, 0(r4)
+; CHECK-LINUX64-P8-NEXT:    cmpld r5, r6
+; CHECK-LINUX64-P8-NEXT:    bne cr0, .LBB0_2
+; CHECK-LINUX64-P8-NEXT:  # %bb.1: # %loadbb1
+; CHECK-LINUX64-P8-NEXT:    ld r5, 8(r3)
+; CHECK-LINUX64-P8-NEXT:    ld r4, 8(r4)
+; CHECK-LINUX64-P8-NEXT:    li r3, 0
+; CHECK-LINUX64-P8-NEXT:    cmpld r5, r4
+; CHECK-LINUX64-P8-NEXT:    beq cr0, .LBB0_3
+; CHECK-LINUX64-P8-NEXT:  .LBB0_2: # %res_block
+; CHECK-LINUX64-P8-NEXT:    li r3, 1
+; CHECK-LINUX64-P8-NEXT:  .LBB0_3: # %endblock
+; CHECK-LINUX64-P8-NEXT:    cntlzw r3, r3
+; CHECK-LINUX64-P8-NEXT:    srwi r3, r3, 5
+; CHECK-LINUX64-P8-NEXT:    blr
+;
+; CHECK-LINUX64-P10-LABEL: cmpeq16:
+; CHECK-LINUX64-P10:       # %bb.0: # %entry
+; CHECK-LINUX64-P10-NEXT:    ld r5, 0(r3)
+; CHECK-LINUX64-P10-NEXT:    ld r6, 0(r4)
+; CHECK-LINUX64-P10-NEXT:    cmpld r5, r6
+; CHECK-LINUX64-P10-NEXT:    bne cr0, .LBB0_2
+; CHECK-LINUX64-P10-NEXT:  # %bb.1: # %loadbb1
+; CHECK-LINUX64-P10-NEXT:    ld r5, 8(r3)
+; CHECK-LINUX64-P10-NEXT:    ld r4, 8(r4)
+; CHECK-LINUX64-P10-NEXT:    li r3, 0
+; CHECK-LINUX64-P10-NEXT:    cmpld r5, r4
+; CHECK-LINUX64-P10-NEXT:    beq cr0, .LBB0_3
+; CHECK-LINUX64-P10-NEXT:  .LBB0_2: # %res_block
+; CHECK-LINUX64-P10-NEXT:    li r3, 1
+; CHECK-LINUX64-P10-NEXT:  .LBB0_3: # %endblock
+; CHECK-LINUX64-P10-NEXT:    cntlzw r3, r3
+; CHECK-LINUX64-P10-NEXT:    rlwinm r3, r3, 27, 31, 31
+; CHECK-LINUX64-P10-NEXT:    blr
+entry:
+  %bcmp = tail call i32 @bcmp(ptr noundef nonnull dereferenceable(16) %a, ptr noundef nonnull dereferenceable(16) %b, i64 16)
+  %cmp = icmp eq i32 %bcmp, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+declare signext i32 @bcmp(ptr captures(none), ptr captures(none), i64)
+

@diggerlin diggerlin force-pushed the digger/NFC-memcmp_fixsize-testcase branch from 5319875 to 3a0aa01 Compare September 15, 2025 13:47
@diggerlin diggerlin merged commit 4bf0001 into llvm:main Sep 15, 2025
10 of 11 checks passed
itzexpoexpo pushed a commit to itzexpoexpo/llvm-project that referenced this pull request Sep 21, 2025
…p in cases where the length is known (llvm#158367)

The newly added test case will be used to verify a more efficient memcmp
in cases where the length is known.
SeongjaeP pushed a commit to SeongjaeP/llvm-project that referenced this pull request Sep 23, 2025
…p in cases where the length is known (llvm#158367)

The newly added test case will be used to verify a more efficient memcmp
in cases where the length is known.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants