Skip to content

Conversation

@zhaoqi5
Copy link
Contributor

@zhaoqi5 zhaoqi5 commented Nov 5, 2025

After overriding TargetTransformInfo::enableMemCmpExpansion in this commit, MergeICmps and ExpandMemCmp passes will be enabled on LoongArch.

@zhaoqi5 zhaoqi5 force-pushed the users/zhaoqi5/opt-enable-memcmp-expansion branch from 79e394b to 0c42622 Compare November 6, 2025 06:56
@zhaoqi5 zhaoqi5 changed the base branch from main to users/zhaoqi5/tests-memcmp-expansion November 6, 2025 06:56
@zhaoqi5 zhaoqi5 marked this pull request as ready for review November 6, 2025 07:59
@llvmbot
Copy link
Member

llvmbot commented Nov 6, 2025

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

Changes

After overriding TargetTransformInfo::enableMemCmpExpansion in this commit, MergeICmps and ExpandMemCmp passes will be enabled on LoongArch.


Patch is 220.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166526.diff

5 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp (+24-1)
  • (modified) llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h (+2-1)
  • (modified) llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll (+1619-527)
  • (modified) llvm/test/CodeGen/LoongArch/expandmemcmp.ll (+2594-715)
  • (modified) llvm/test/CodeGen/LoongArch/memcmp.ll (+18-9)
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index f548a8dd0532b..f6637ef58cf9c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -111,4 +111,27 @@ bool LoongArchTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
   }
 }
 
-// TODO: Implement more hooks to provide TTI machinery for LoongArch.
+LoongArchTTIImpl::TTI::MemCmpExpansionOptions
+LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+  TTI::MemCmpExpansionOptions Options;
+
+  if (!ST->hasUAL())
+    return Options;
+
+  // TODO: Set same as the default value of MaxLoadsPerMemcmp or
+  // MaxLoadsPerMemcmpOptSize. May need more consideration?
+  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+  Options.NumLoadsPerBlock = Options.MaxNumLoads;
+  Options.AllowOverlappingLoads = true;
+
+  // TODO: Support for vectors.
+  if (ST->is64Bit()) {
+    Options.LoadSizes = {8, 4, 2, 1};
+    Options.AllowedTailExpansions = {3, 5, 6};
+  } else {
+    Options.LoadSizes = {4, 2, 1};
+    Options.AllowedTailExpansions = {3};
+  }
+
+  return Options;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index e3f16c7804994..9b479f9dc0dc5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -55,7 +55,8 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
 
   bool shouldExpandReduction(const IntrinsicInst *II) const override;
 
-  // TODO: Implement more hooks to provide TTI machinery for LoongArch.
+  TTI::MemCmpExpansionOptions
+  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll
index 82fe899bb795b..a6ed1f1db1678 100644
--- a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll
+++ b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll
@@ -38,260 +38,488 @@ entry:
 }
 
 define i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_1:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 1
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_1:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.bu $a0, $a0, 0
+; LA32-UAL-NEXT:    ld.bu $a1, $a1, 0
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_1:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 1
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_1:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.bu $a0, $a0, 0
+; LA64-UAL-NEXT:    ld.bu $a1, $a1, 0
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_1:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 1
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_1:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 1
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_2:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 2
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_2:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT:    ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_2:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 2
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_2:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.hu $a0, $a0, 0
+; LA64-UAL-NEXT:    ld.hu $a1, $a1, 0
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_2:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 2
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_2:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 2
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_3:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 3
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_3:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.hu $a2, $a0, 0
+; LA32-UAL-NEXT:    ld.hu $a3, $a1, 0
+; LA32-UAL-NEXT:    ld.bu $a0, $a0, 2
+; LA32-UAL-NEXT:    ld.bu $a1, $a1, 2
+; LA32-UAL-NEXT:    xor $a2, $a2, $a3
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    or $a0, $a2, $a0
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_3:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 3
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_3:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.hu $a2, $a0, 0
+; LA64-UAL-NEXT:    ld.hu $a3, $a1, 0
+; LA64-UAL-NEXT:    ld.bu $a0, $a0, 2
+; LA64-UAL-NEXT:    ld.bu $a1, $a1, 2
+; LA64-UAL-NEXT:    xor $a2, $a2, $a3
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    or $a0, $a2, $a0
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_3:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 3
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_3:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 3
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_4:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 4
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_4:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.w $a0, $a0, 0
+; LA32-UAL-NEXT:    ld.w $a1, $a1, 0
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_4:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 4
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_4:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.w $a0, $a0, 0
+; LA64-UAL-NEXT:    ld.w $a1, $a1, 0
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_4:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 4
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_4:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 4
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_5:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 5
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_5:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA32-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA32-UAL-NEXT:    ld.bu $a0, $a0, 4
+; LA32-UAL-NEXT:    ld.bu $a1, $a1, 4
+; LA32-UAL-NEXT:    xor $a2, $a2, $a3
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    or $a0, $a2, $a0
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_5:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 5
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_5:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA64-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA64-UAL-NEXT:    ld.bu $a0, $a0, 4
+; LA64-UAL-NEXT:    ld.bu $a1, $a1, 4
+; LA64-UAL-NEXT:    xor $a2, $a2, $a3
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    or $a0, $a2, $a0
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_5:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 5
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_5:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 5
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_6:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 6
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_6:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA32-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA32-UAL-NEXT:    ld.hu $a0, $a0, 4
+; LA32-UAL-NEXT:    ld.hu $a1, $a1, 4
+; LA32-UAL-NEXT:    xor $a2, $a2, $a3
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    or $a0, $a2, $a0
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_6:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 6
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_6:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA64-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA64-UAL-NEXT:    ld.hu $a0, $a0, 4
+; LA64-UAL-NEXT:    ld.hu $a1, $a1, 4
+; LA64-UAL-NEXT:    xor $a2, $a2, $a3
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    or $a0, $a2, $a0
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_6:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 6
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_6:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 6
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_7:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 7
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_7:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA32-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA32-UAL-NEXT:    ld.w $a0, $a0, 3
+; LA32-UAL-NEXT:    ld.w $a1, $a1, 3
+; LA32-UAL-NEXT:    xor $a2, $a2, $a3
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    or $a0, $a2, $a0
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_7:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 7
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_7:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA64-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA64-UAL-NEXT:    ld.w $a0, $a0, 3
+; LA64-UAL-NEXT:    ld.w $a1, $a1, 3
+; LA64-UAL-NEXT:    xor $a2, $a2, $a3
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    or $a0, $a2, $a0
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_7:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 7
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_7:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 7
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_8:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 8
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_8:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA32-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA32-UAL-NEXT:    ld.w $a0, $a0, 4
+; LA32-UAL-NEXT:    ld.w $a1, $a1, 4
+; LA32-UAL-NEXT:    xor $a2, $a2, $a3
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    or $a0, $a2, $a0
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_8:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 8
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_8:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.d $a0, $a0, 0
+; LA64-UAL-NEXT:    ld.d $a1, $a1, 0
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_8:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero,...
[truncated]

@SixWeining
Copy link
Contributor

How does this optimization affect the benchmark? For example llvm-test-suite/MicroBenchmarks/MemFunctions. Add some test results to help with comparison and code review. It might be necessary to test the data under various conditions, including different vector widths and optimization levels (e.g., O2 or Os).

Like:

********** TEST 'test-suite :: MicroBenchmarks/MemFunctions/MemFunctions.test' RESULTS **********
MicroBenchmarks: 168 
compile_time: 12.4107 
hash: "296316bb3a67daddac2c52b4f8ea7f0a" 
link_time: 0.2373 
size: 610272 
size..bss: 2097  
size..comment: 327 
size..data: 1056  
size..data.rel.ro: 1264  
size..dynamic: 464 
size..dynstr: 5587  
size..dynsym: 4056  
size..eh_frame: 33380 
size..eh_frame_hdr: 5140  
size..fini_array: 8 
size..gcc_except_table: 20424 
size..gnu.build.attributes: 144 
size..gnu.hash: 32 
size..gnu.version: 338 
size..gnu.version_r: 336 
size..got: 368 
size..got.plt: 1104  
size..init_array: 168 
size..interp: 15 
size..note.ABI-tag: 32 
size..plt: 2208  
size..rela.dyn: 7992  
size..rela.plt: 3264  
size..relro_padding: 2720  
size..rodata: 15138
size..sdata: 8
size..text: 379696
size..tm_clone_table: 0
**********
*** MICRO-TEST: BM_MemCmp<1, EqZero, First>
    exec_time:  6204.1292
*** MICRO-TEST: BM_MemCmp<1, EqZero, Last>
    exec_time:  6255.3082
*** MICRO-TEST: BM_MemCmp<1, EqZero, Mid>
    exec_time:  6271.8094
*** MICRO-TEST: BM_MemCmp<1, EqZero, None>
    exec_time:  6183.1820
*** MICRO-TEST: BM_MemCmp<1, GreaterThanZero, First>
    exec_time:  6230.7436
*** MICRO-TEST: BM_MemCmp<1, GreaterThanZero, Last>
    exec_time:  6223.1609
...
*** MICRO-TEST: BM_MemCmp<8, GreaterThanZero, Last>
    exec_time:  3883.0747
*** MICRO-TEST: BM_MemCmp<8, GreaterThanZero, Mid>
    exec_time:  3629.4128
*** MICRO-TEST: BM_MemCmp<8, GreaterThanZero, None>
    exec_time:  4144.3657
*** MICRO-TEST: BM_MemCmp<8, LessThanZero, First>
    exec_time:  4162.2478
*** MICRO-TEST: BM_MemCmp<8, LessThanZero, Last>
    exec_time:  3886.9091
*** MICRO-TEST: BM_MemCmp<8, LessThanZero, Mid>
    exec_time:  3886.2970
*** MICRO-TEST: BM_MemCmp<8, LessThanZero, None>
    exec_time:  3639.5443

@zhaoqi5
Copy link
Contributor Author

zhaoqi5 commented Nov 6, 2025

How does this optimization affect the benchmark? For example llvm-test-suite/MicroBenchmarks/MemFunctions. Add some test results to help with comparison and code review. It might be necessary to test the data under various conditions, including different vector widths and optimization levels (e.g., O2 or Os).

Okay, I will try to test some benchmarks such as test-suite or spec cpu and add the results later. Thanks.

@zhaoqi5
Copy link
Contributor Author

zhaoqi5 commented Nov 6, 2025

I have just tested the MicroBenchmarks/MemFunctions in test-suite, using O3+lsx, O2+nolsx or Os+lsx options. All get the similar results:

  • About 208 memcmp in the benchmark are expanded after this commit.
  • Among its 168 tests, 144 have significant improvement (about 2 to 10 times).

Detail results of O3+lsx: before.txt, after.txt.

This benchmark is specifically designed to test memcmp, so it can directly show the benefits of this commit. Is it necessary to run spec cpu for more general results?


// TODO: Implement more hooks to provide TTI machinery for LoongArch.
LoongArchTTIImpl::TTI::MemCmpExpansionOptions
LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IsZeroCmp is not used currently?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. It is not been used for now. Whether it is ZeroCmp or not, enable memcmp expansion is beneficial.

if (!ST->hasUAL())
return Options;

// TODO: Set same as the default value of MaxLoadsPerMemcmp or
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't quite understand what this TODO mean...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MaxNumLoads is used to set the maximum number of loads allowed to use when expanding memcmp (refer to max-loads-per-memcmp option). In default, TLI->getMaxExpandSizeMemcmp returns the default value of MaxLoadsPerMemcmpOptSize(which is 4) or MaxLoadsPerMemcmp(which is 8).

Targets can set this two values to allow expanding memcmp to larger or smaller sequences.

@SixWeining
Copy link
Contributor

I have just tested the MicroBenchmarks/MemFunctions in test-suite, using O3+lsx, O2+nolsx or Os+lsx options. All get the similar results:

  • About 208 memcmp in the benchmark are expanded after this commit.
  • Among its 168 tests, 144 have significant improvement (about 2 to 10 times).

Detail results of O3+lsx: before.txt, after.txt.

This benchmark is specifically designed to test memcmp, so it can directly show the benefits of this commit. Is it necessary to run spec cpu for more general results?

I think that is enough.

@zhaoqi5
Copy link
Contributor Author

zhaoqi5 commented Nov 7, 2025

I have just tested the MicroBenchmarks/MemFunctions in test-suite, using O3+lsx, O2+nolsx or Os+lsx options. All get the similar results:

  • About 208 memcmp in the benchmark are expanded after this commit.
  • Among its 168 tests, 144 have significant improvement (about 2 to 10 times).

Detail results of O3+lsx: before.txt, after.txt.
This benchmark is specifically designed to test memcmp, so it can directly show the benefits of this commit. Is it necessary to run spec cpu for more general results?

I think that is enough.

Great. Thanks a lot.

@zhaoqi5 zhaoqi5 force-pushed the users/zhaoqi5/opt-enable-memcmp-expansion branch from b89ea92 to 25c01e2 Compare November 7, 2025 03:53
; LA64-NEXT: ret
; LA64-UAL-LABEL: bcmp_lt_zero:
; LA64-UAL: # %bb.0: # %entry
; LA64-UAL-NEXT: move $a0, $zero
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure whether it is correct.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After expand-memcmp pass, the original IR is expanded and optimized to:

  %0 = load i32, ptr %s1, align 1
  %1 = load i32, ptr %s2, align 1
  %2 = icmp ne i32 %0, %1
  %3 = zext i1 %2 to i32
  ret i1 false

So the result is always false.

And I noticed that the test bcmp_ge_zero always returns true, so seems always assume bcmp never returns negative result? I am not sure if this is the assumption of llvm or the misprocessing of this pass. I tried below using -O0(will call bcmp actually):

#include <stdio.h>
#include <string.h>
int main () {
  char *s0 = "0000000";
  char *s1 = "1111111";
  printf("= : %d\n", bcmp(s1, s1, 7));
  printf("> : %d\n", bcmp(s1, s0, 7));
  printf("< : %d\n", bcmp(s0, s1, 7));
  return 0;
}

The result is:

= : 0
> : 1
< : -1

Copy link
Contributor Author

@zhaoqi5 zhaoqi5 Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compiling this using clang, the function test can be optimized after this commit.

#include <stdio.h>
#include <string.h>
int test(char* s0, char* s1) {
  if (bcmp(s0, s1, 4) > -1) 
    return 1;
  return 0;
}
int main() {
  char s0[10], s1[10];
  printf("cin s0 (more than 4): ");
  scanf("%9s", s0);
  printf("cin s1 (more than 4): ");
  scanf("%9s", s1);
  printf("%d\n", test(s0, s1));
  return 0;
}

Results are difference:

> clang t.c -O3
> ./a.out
cin s0 (more than 4): aaaaa
cin s1 (more than 4): bbbbb
1
> clang t.c -O0
> ./a.out
cin s0 (more than 4): aaaaa
cin s1 (more than 4): bbbbb
0

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Appears after 46a13a0.

Base automatically changed from users/zhaoqi5/tests-memcmp-expansion to main November 10, 2025 01:59
After overriding `TargetTransformInfo::enableMemCmpExpansion`
in this commit, `MergeICmps` and `ExpandMemCmp` passes will be
enabled on LoongArch.
@zhaoqi5 zhaoqi5 force-pushed the users/zhaoqi5/opt-enable-memcmp-expansion branch from db651a0 to fd004e2 Compare November 10, 2025 02:05
@zhaoqi5 zhaoqi5 merged commit f734ceb into main Nov 10, 2025
10 checks passed
@zhaoqi5 zhaoqi5 deleted the users/zhaoqi5/opt-enable-memcmp-expansion branch November 10, 2025 03:43
@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 10, 2025

LLVM Buildbot has detected a new failure on builder lldb-aarch64-ubuntu running on linaro-lldb-aarch64-ubuntu while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/27046

Here is the relevant piece of the build log for the reference
Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: commands/memory/write/TestMemoryWrite.py (195 of 2370)
PASS: lldb-api :: commands/platform/file/close/TestPlatformFileClose.py (196 of 2370)
PASS: lldb-api :: commands/platform/file/read/TestPlatformFileRead.py (197 of 2370)
PASS: lldb-api :: commands/memory/read/TestMemoryRead.py (198 of 2370)
PASS: lldb-api :: commands/platform/connect/TestPlatformConnect.py (199 of 2370)
UNSUPPORTED: lldb-api :: commands/platform/sdk/TestPlatformSDK.py (200 of 2370)
PASS: lldb-api :: commands/plugin/TestPlugin.py (201 of 2370)
PASS: lldb-api :: commands/platform/process/launch/TestPlatformProcessLaunch.py (202 of 2370)
PASS: lldb-api :: commands/platform/process/list/TestProcessList.py (203 of 2370)
UNRESOLVED: lldb-api :: commands/gui/spawn-threads/TestGuiSpawnThreads.py (204 of 2370)
******************** TEST 'lldb-api :: commands/gui/spawn-threads/TestGuiSpawnThreads.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --arch aarch64 --build-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --cmake-build-type Release /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/commands/gui/spawn-threads -p TestGuiSpawnThreads.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision f734cebc396bfb0a3523d205071764f689432ab4)
  clang revision f734cebc396bfb0a3523d205071764f689432ab4
  llvm revision f734cebc396bfb0a3523d205071764f689432ab4
Skipping the following test categories: ['libc++', 'msvcstl', 'dsym', 'pdb', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
FAIL: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_gui (TestGuiSpawnThreads.TestGuiSpawnThreadsTest)
======================================================================
ERROR: test_gui (TestGuiSpawnThreads.TestGuiSpawnThreadsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/packages/Python/lldbsuite/test/decorators.py", line 156, in wrapper
    return func(*args, **kwargs)
  File "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/commands/gui/spawn-threads/TestGuiSpawnThreads.py", line 44, in test_gui
    self.child.expect_exact(f"thread #{i + 2}: tid =")
  File "/usr/local/lib/python3.10/dist-packages/pexpect/spawnbase.py", line 432, in expect_exact
    return exp.expect_loop(timeout)
  File "/usr/local/lib/python3.10/dist-packages/pexpect/expect.py", line 179, in expect_loop
    return self.eof(e)
  File "/usr/local/lib/python3.10/dist-packages/pexpect/expect.py", line 122, in eof
    raise exc
pexpect.exceptions.EOF: End Of File (EOF). Exception style platform.
<pexpect.pty_spawn.spawn object at 0xfb0d167bd420>
command: /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb
args: ['/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb', '--no-lldbinit', '--no-use-colors', '-O', 'settings clear --all', '-O', 'settings set symbols.enable-external-lookup false', '-O', 'settings set target.inherit-tcc true', '-O', 'settings set target.disable-aslr false', '-O', 'settings set target.detach-on-error false', '-O', 'settings set target.auto-apply-fixits false', '-O', 'settings set plugin.process.gdb-remote.packet-timeout 60', '-O', 'settings set symbols.clang-modules-cache-path "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api"', '-O', 'settings set use-color false', '-O', 'settings set show-statusline false', '--file', '/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/commands/gui/spawn-threads/TestGuiSpawnThreads.test_gui/a.out']
buffer (last 100 chars): b''
before (last 100 chars): b'8 0x0000bb9354225330 _start (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb+0x45330)\n'
after: <class 'pexpect.exceptions.EOF'>

@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 10, 2025

LLVM Buildbot has detected a new failure on builder openmp-s390x-linux running on systemz-1 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/17987

Here is the relevant piece of the build log for the reference
Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libomp :: tasking/issue-94260-2.c' FAILED ********************
Exit Code: -11

Command Output (stdout):
--
# RUN: at line 1
/home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp   -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic && /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# note: command had no output on stdout or stderr
# error: command failed with exit status: -11

--

********************


@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 10, 2025

LLVM Buildbot has detected a new failure on builder clang-aarch64-sve-vla running on linaro-g3-04 while building llvm at step 7 "ninja check 1".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/17/builds/12521

Here is the relevant piece of the build log for the reference
Step 7 (ninja check 1) failure: stage 1 checked (failure)
******************** TEST 'libFuzzer-aarch64-default-Linux :: merge-sigusr.test' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g

--
Command Output (stderr):
--
rm -rf /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp # RUN: at line 6
+ rm -rf /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp
mkdir -p /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp # RUN: at line 7
+ mkdir -p /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp
/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/./bin/clang    -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta   --driver-mode=g++ -O2 -gline-tables-only -fsanitize=address,fuzzer -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/compiler-rt/lib/fuzzer  -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta  /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/compiler-rt/test/fuzzer/SleepOneSecondTest.cpp -o /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/LFSIGUSR # RUN: at line 8
+ /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/./bin/clang -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta --driver-mode=g++ -O2 -gline-tables-only -fsanitize=address,fuzzer -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/compiler-rt/lib/fuzzer -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/compiler-rt/test/fuzzer/SleepOneSecondTest.cpp -o /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/LFSIGUSR
mkdir -p /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C1 /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2 # RUN: at line 10
+ mkdir -p /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C1 /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2
echo a > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/a # RUN: at line 11
+ echo a
echo b > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/b # RUN: at line 12
+ echo b
echo c > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/c # RUN: at line 13
+ echo c
echo d > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/d # RUN: at line 14
+ echo d
echo e > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/e # RUN: at line 15
+ echo e
echo f > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/f # RUN: at line 16
+ echo f
echo g > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 17
+ echo g
echo h > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 18
+ echo h
echo i > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 19
+ echo i
echo j > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 20
+ echo j
echo k > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 21
+ echo k
echo l > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 22
+ echo l
echo m > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 23
+ echo m
echo n > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 24
+ echo n
echo o > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 25
+ echo o
setsid  /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/LFSIGUSR -merge=1 -merge_control_file=/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/MCF /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C1 /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2 2>/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/log & export PID=$! # RUN: at line 28
+ export PID=1875274
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants