[LoongArch] Initial implementation for `enableMemCmpExpansion` hook #166526

zhaoqi5 · 2025-11-05T10:10:06Z

After overriding TargetTransformInfo::enableMemCmpExpansion in this commit, MergeICmps and ExpandMemCmp passes will be enabled on LoongArch.

llvmbot · 2025-11-06T08:00:04Z

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

Changes

After overriding TargetTransformInfo::enableMemCmpExpansion in this commit, MergeICmps and ExpandMemCmp passes will be enabled on LoongArch.

Patch is 220.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166526.diff

5 Files Affected:

(modified) llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp (+24-1)
(modified) llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h (+2-1)
(modified) llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll (+1619-527)
(modified) llvm/test/CodeGen/LoongArch/expandmemcmp.ll (+2594-715)
(modified) llvm/test/CodeGen/LoongArch/memcmp.ll (+18-9)

diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index f548a8dd0532b..f6637ef58cf9c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -111,4 +111,27 @@ bool LoongArchTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
   }
 }
 
-// TODO: Implement more hooks to provide TTI machinery for LoongArch.
+LoongArchTTIImpl::TTI::MemCmpExpansionOptions
+LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+  TTI::MemCmpExpansionOptions Options;
+
+  if (!ST->hasUAL())
+    return Options;
+
+  // TODO: Set same as the default value of MaxLoadsPerMemcmp or
+  // MaxLoadsPerMemcmpOptSize. May need more consideration?
+  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+  Options.NumLoadsPerBlock = Options.MaxNumLoads;
+  Options.AllowOverlappingLoads = true;
+
+  // TODO: Support for vectors.
+  if (ST->is64Bit()) {
+    Options.LoadSizes = {8, 4, 2, 1};
+    Options.AllowedTailExpansions = {3, 5, 6};
+  } else {
+    Options.LoadSizes = {4, 2, 1};
+    Options.AllowedTailExpansions = {3};
+  }
+
+  return Options;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index e3f16c7804994..9b479f9dc0dc5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -55,7 +55,8 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
 
   bool shouldExpandReduction(const IntrinsicInst *II) const override;
 
-  // TODO: Implement more hooks to provide TTI machinery for LoongArch.
+  TTI::MemCmpExpansionOptions
+  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll
index 82fe899bb795b..a6ed1f1db1678 100644
--- a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll
+++ b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll
@@ -38,260 +38,488 @@ entry:
 }
 
 define i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_1:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 1
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_1:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.bu $a0, $a0, 0
+; LA32-UAL-NEXT:    ld.bu $a1, $a1, 0
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_1:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 1
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_1:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.bu $a0, $a0, 0
+; LA64-UAL-NEXT:    ld.bu $a1, $a1, 0
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_1:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 1
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_1:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 1
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_2:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 2
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_2:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT:    ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_2:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 2
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_2:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.hu $a0, $a0, 0
+; LA64-UAL-NEXT:    ld.hu $a1, $a1, 0
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_2:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 2
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_2:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 2
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_3:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 3
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_3:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.hu $a2, $a0, 0
+; LA32-UAL-NEXT:    ld.hu $a3, $a1, 0
+; LA32-UAL-NEXT:    ld.bu $a0, $a0, 2
+; LA32-UAL-NEXT:    ld.bu $a1, $a1, 2
+; LA32-UAL-NEXT:    xor $a2, $a2, $a3
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    or $a0, $a2, $a0
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_3:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 3
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_3:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.hu $a2, $a0, 0
+; LA64-UAL-NEXT:    ld.hu $a3, $a1, 0
+; LA64-UAL-NEXT:    ld.bu $a0, $a0, 2
+; LA64-UAL-NEXT:    ld.bu $a1, $a1, 2
+; LA64-UAL-NEXT:    xor $a2, $a2, $a3
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    or $a0, $a2, $a0
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_3:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 3
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_3:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 3
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_4:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 4
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_4:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.w $a0, $a0, 0
+; LA32-UAL-NEXT:    ld.w $a1, $a1, 0
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_4:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 4
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_4:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.w $a0, $a0, 0
+; LA64-UAL-NEXT:    ld.w $a1, $a1, 0
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_4:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 4
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_4:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 4
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_5:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 5
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_5:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA32-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA32-UAL-NEXT:    ld.bu $a0, $a0, 4
+; LA32-UAL-NEXT:    ld.bu $a1, $a1, 4
+; LA32-UAL-NEXT:    xor $a2, $a2, $a3
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    or $a0, $a2, $a0
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_5:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 5
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_5:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA64-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA64-UAL-NEXT:    ld.bu $a0, $a0, 4
+; LA64-UAL-NEXT:    ld.bu $a1, $a1, 4
+; LA64-UAL-NEXT:    xor $a2, $a2, $a3
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    or $a0, $a2, $a0
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_5:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 5
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_5:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 5
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_6:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 6
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_6:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA32-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA32-UAL-NEXT:    ld.hu $a0, $a0, 4
+; LA32-UAL-NEXT:    ld.hu $a1, $a1, 4
+; LA32-UAL-NEXT:    xor $a2, $a2, $a3
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    or $a0, $a2, $a0
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_6:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 6
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_6:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA64-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA64-UAL-NEXT:    ld.hu $a0, $a0, 4
+; LA64-UAL-NEXT:    ld.hu $a1, $a1, 4
+; LA64-UAL-NEXT:    xor $a2, $a2, $a3
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    or $a0, $a2, $a0
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_6:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 6
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_6:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 6
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_7:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 7
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_7:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA32-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA32-UAL-NEXT:    ld.w $a0, $a0, 3
+; LA32-UAL-NEXT:    ld.w $a1, $a1, 3
+; LA32-UAL-NEXT:    xor $a2, $a2, $a3
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    or $a0, $a2, $a0
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_7:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 7
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_7:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA64-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA64-UAL-NEXT:    ld.w $a0, $a0, 3
+; LA64-UAL-NEXT:    ld.w $a1, $a1, 3
+; LA64-UAL-NEXT:    xor $a2, $a2, $a3
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    or $a0, $a2, $a0
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_7:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 7
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_size_7:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 7
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7)
   ret i32 %bcmp
 }
 
 define i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind optsize {
-; LA32-LABEL: bcmp_size_8:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 8
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_size_8:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA32-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA32-UAL-NEXT:    ld.w $a0, $a0, 4
+; LA32-UAL-NEXT:    ld.w $a1, $a1, 4
+; LA32-UAL-NEXT:    xor $a2, $a2, $a3
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    or $a0, $a2, $a0
+; LA32-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_size_8:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 8
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_size_8:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.d $a0, $a0, 0
+; LA64-UAL-NEXT:    ld.d $a1, $a1, 0
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    sltu $a0, $zero, $a0
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_size_8:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero,...
[truncated]

SixWeining · 2025-11-06T08:43:18Z

How does this optimization affect the benchmark? For example llvm-test-suite/MicroBenchmarks/MemFunctions. Add some test results to help with comparison and code review. It might be necessary to test the data under various conditions, including different vector widths and optimization levels (e.g., O2 or Os).

Like:

********** TEST 'test-suite :: MicroBenchmarks/MemFunctions/MemFunctions.test' RESULTS **********
MicroBenchmarks: 168 
compile_time: 12.4107 
hash: "296316bb3a67daddac2c52b4f8ea7f0a" 
link_time: 0.2373 
size: 610272 
size..bss: 2097  
size..comment: 327 
size..data: 1056  
size..data.rel.ro: 1264  
size..dynamic: 464 
size..dynstr: 5587  
size..dynsym: 4056  
size..eh_frame: 33380 
size..eh_frame_hdr: 5140  
size..fini_array: 8 
size..gcc_except_table: 20424 
size..gnu.build.attributes: 144 
size..gnu.hash: 32 
size..gnu.version: 338 
size..gnu.version_r: 336 
size..got: 368 
size..got.plt: 1104  
size..init_array: 168 
size..interp: 15 
size..note.ABI-tag: 32 
size..plt: 2208  
size..rela.dyn: 7992  
size..rela.plt: 3264  
size..relro_padding: 2720  
size..rodata: 15138
size..sdata: 8
size..text: 379696
size..tm_clone_table: 0
**********
*** MICRO-TEST: BM_MemCmp<1, EqZero, First>
    exec_time:  6204.1292
*** MICRO-TEST: BM_MemCmp<1, EqZero, Last>
    exec_time:  6255.3082
*** MICRO-TEST: BM_MemCmp<1, EqZero, Mid>
    exec_time:  6271.8094
*** MICRO-TEST: BM_MemCmp<1, EqZero, None>
    exec_time:  6183.1820
*** MICRO-TEST: BM_MemCmp<1, GreaterThanZero, First>
    exec_time:  6230.7436
*** MICRO-TEST: BM_MemCmp<1, GreaterThanZero, Last>
    exec_time:  6223.1609
...
*** MICRO-TEST: BM_MemCmp<8, GreaterThanZero, Last>
    exec_time:  3883.0747
*** MICRO-TEST: BM_MemCmp<8, GreaterThanZero, Mid>
    exec_time:  3629.4128
*** MICRO-TEST: BM_MemCmp<8, GreaterThanZero, None>
    exec_time:  4144.3657
*** MICRO-TEST: BM_MemCmp<8, LessThanZero, First>
    exec_time:  4162.2478
*** MICRO-TEST: BM_MemCmp<8, LessThanZero, Last>
    exec_time:  3886.9091
*** MICRO-TEST: BM_MemCmp<8, LessThanZero, Mid>
    exec_time:  3886.2970
*** MICRO-TEST: BM_MemCmp<8, LessThanZero, None>
    exec_time:  3639.5443

zhaoqi5 · 2025-11-06T08:52:11Z

How does this optimization affect the benchmark? For example llvm-test-suite/MicroBenchmarks/MemFunctions. Add some test results to help with comparison and code review. It might be necessary to test the data under various conditions, including different vector widths and optimization levels (e.g., O2 or Os).

Okay, I will try to test some benchmarks such as test-suite or spec cpu and add the results later. Thanks.

zhaoqi5 · 2025-11-06T12:34:25Z

I have just tested the MicroBenchmarks/MemFunctions in test-suite, using O3+lsx, O2+nolsx or Os+lsx options. All get the similar results:

About 208 memcmp in the benchmark are expanded after this commit.
Among its 168 tests, 144 have significant improvement (about 2 to 10 times).

Detail results of O3+lsx: before.txt, after.txt.

This benchmark is specifically designed to test memcmp, so it can directly show the benefits of this commit. Is it necessary to run spec cpu for more general results?

SixWeining · 2025-11-07T02:29:18Z

llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp


-// TODO: Implement more hooks to provide TTI machinery for LoongArch.
+LoongArchTTIImpl::TTI::MemCmpExpansionOptions
+LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {


IsZeroCmp is not used currently?

Yes. It is not been used for now. Whether it is ZeroCmp or not, enable memcmp expansion is beneficial.

SixWeining · 2025-11-07T02:30:29Z

llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp

+  if (!ST->hasUAL())
+    return Options;
+
+  // TODO: Set same as the default value of MaxLoadsPerMemcmp or


I don't quite understand what this TODO mean...

MaxNumLoads is used to set the maximum number of loads allowed to use when expanding memcmp (refer to max-loads-per-memcmp option). In default, TLI->getMaxExpandSizeMemcmp returns the default value of MaxLoadsPerMemcmpOptSize(which is 4) or MaxLoadsPerMemcmp(which is 8).

Targets can set this two values to allow expanding memcmp to larger or smaller sequences.

SixWeining · 2025-11-07T02:31:49Z

I have just tested the MicroBenchmarks/MemFunctions in test-suite, using O3+lsx, O2+nolsx or Os+lsx options. All get the similar results:

About 208 memcmp in the benchmark are expanded after this commit.

Among its 168 tests, 144 have significant improvement (about 2 to 10 times).

Detail results of O3+lsx: before.txt, after.txt.

This benchmark is specifically designed to test memcmp, so it can directly show the benefits of this commit. Is it necessary to run spec cpu for more general results?

I think that is enough.

zhaoqi5 · 2025-11-07T03:26:27Z

I have just tested the MicroBenchmarks/MemFunctions in test-suite, using O3+lsx, O2+nolsx or Os+lsx options. All get the similar results:

About 208 memcmp in the benchmark are expanded after this commit.

Among its 168 tests, 144 have significant improvement (about 2 to 10 times).

Detail results of O3+lsx: before.txt, after.txt.
This benchmark is specifically designed to test memcmp, so it can directly show the benefits of this commit. Is it necessary to run spec cpu for more general results?

I think that is enough.

Great. Thanks a lot.

SixWeining · 2025-11-07T04:16:10Z

llvm/test/CodeGen/LoongArch/expandmemcmp.ll

-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_lt_zero:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    move $a0, $zero


I'm not sure whether it is correct.

After expand-memcmp pass, the original IR is expanded and optimized to:

%0 = load i32, ptr %s1, align 1 %1 = load i32, ptr %s2, align 1 %2 = icmp ne i32 %0, %1 %3 = zext i1 %2 to i32 ret i1 false

So the result is always false.

And I noticed that the test bcmp_ge_zero always returns true, so seems always assume bcmp never returns negative result? I am not sure if this is the assumption of llvm or the misprocessing of this pass. I tried below using -O0(will call bcmp actually):

#include <stdio.h> #include <string.h> int main () { char *s0 = "0000000"; char *s1 = "1111111"; printf("= : %d\n", bcmp(s1, s1, 7)); printf("> : %d\n", bcmp(s1, s0, 7)); printf("< : %d\n", bcmp(s0, s1, 7)); return 0; }

The result is:

= : 0 > : 1 < : -1

Compiling this using clang, the function test can be optimized after this commit.

#include <stdio.h> #include <string.h> int test(char* s0, char* s1) { if (bcmp(s0, s1, 4) > -1) return 1; return 0; } int main() { char s0[10], s1[10]; printf("cin s0 (more than 4): "); scanf("%9s", s0); printf("cin s1 (more than 4): "); scanf("%9s", s1); printf("%d\n", test(s0, s1)); return 0; }

Results are difference:

> clang t.c -O3 > ./a.out cin s0 (more than 4): aaaaa cin s1 (more than 4): bbbbb 1 > clang t.c -O0 > ./a.out cin s0 (more than 4): aaaaa cin s1 (more than 4): bbbbb 0

Appears after 46a13a0.

After overriding `TargetTransformInfo::enableMemCmpExpansion` in this commit, `MergeICmps` and `ExpandMemCmp` passes will be enabled on LoongArch.

llvm-ci · 2025-11-10T03:55:15Z

LLVM Buildbot has detected a new failure on builder lldb-aarch64-ubuntu running on linaro-lldb-aarch64-ubuntu while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/27046

Here is the relevant piece of the build log for the reference

Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: commands/memory/write/TestMemoryWrite.py (195 of 2370)
PASS: lldb-api :: commands/platform/file/close/TestPlatformFileClose.py (196 of 2370)
PASS: lldb-api :: commands/platform/file/read/TestPlatformFileRead.py (197 of 2370)
PASS: lldb-api :: commands/memory/read/TestMemoryRead.py (198 of 2370)
PASS: lldb-api :: commands/platform/connect/TestPlatformConnect.py (199 of 2370)
UNSUPPORTED: lldb-api :: commands/platform/sdk/TestPlatformSDK.py (200 of 2370)
PASS: lldb-api :: commands/plugin/TestPlugin.py (201 of 2370)
PASS: lldb-api :: commands/platform/process/launch/TestPlatformProcessLaunch.py (202 of 2370)
PASS: lldb-api :: commands/platform/process/list/TestProcessList.py (203 of 2370)
UNRESOLVED: lldb-api :: commands/gui/spawn-threads/TestGuiSpawnThreads.py (204 of 2370)
******************** TEST 'lldb-api :: commands/gui/spawn-threads/TestGuiSpawnThreads.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --arch aarch64 --build-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --cmake-build-type Release /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/commands/gui/spawn-threads -p TestGuiSpawnThreads.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision f734cebc396bfb0a3523d205071764f689432ab4)
  clang revision f734cebc396bfb0a3523d205071764f689432ab4
  llvm revision f734cebc396bfb0a3523d205071764f689432ab4
Skipping the following test categories: ['libc++', 'msvcstl', 'dsym', 'pdb', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
FAIL: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_gui (TestGuiSpawnThreads.TestGuiSpawnThreadsTest)
======================================================================
ERROR: test_gui (TestGuiSpawnThreads.TestGuiSpawnThreadsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/packages/Python/lldbsuite/test/decorators.py", line 156, in wrapper
    return func(*args, **kwargs)
  File "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/commands/gui/spawn-threads/TestGuiSpawnThreads.py", line 44, in test_gui
    self.child.expect_exact(f"thread #{i + 2}: tid =")
  File "/usr/local/lib/python3.10/dist-packages/pexpect/spawnbase.py", line 432, in expect_exact
    return exp.expect_loop(timeout)
  File "/usr/local/lib/python3.10/dist-packages/pexpect/expect.py", line 179, in expect_loop
    return self.eof(e)
  File "/usr/local/lib/python3.10/dist-packages/pexpect/expect.py", line 122, in eof
    raise exc
pexpect.exceptions.EOF: End Of File (EOF). Exception style platform.
<pexpect.pty_spawn.spawn object at 0xfb0d167bd420>
command: /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb
args: ['/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb', '--no-lldbinit', '--no-use-colors', '-O', 'settings clear --all', '-O', 'settings set symbols.enable-external-lookup false', '-O', 'settings set target.inherit-tcc true', '-O', 'settings set target.disable-aslr false', '-O', 'settings set target.detach-on-error false', '-O', 'settings set target.auto-apply-fixits false', '-O', 'settings set plugin.process.gdb-remote.packet-timeout 60', '-O', 'settings set symbols.clang-modules-cache-path "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api"', '-O', 'settings set use-color false', '-O', 'settings set show-statusline false', '--file', '/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/commands/gui/spawn-threads/TestGuiSpawnThreads.test_gui/a.out']
buffer (last 100 chars): b''
before (last 100 chars): b'8 0x0000bb9354225330 _start (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb+0x45330)\n'
after: <class 'pexpect.exceptions.EOF'>

llvm-ci · 2025-11-10T03:56:05Z

LLVM Buildbot has detected a new failure on builder openmp-s390x-linux running on systemz-1 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/17987

Here is the relevant piece of the build log for the reference

Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libomp :: tasking/issue-94260-2.c' FAILED ********************
Exit Code: -11

Command Output (stdout):
--
# RUN: at line 1
/home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp   -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic && /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# note: command had no output on stdout or stderr
# error: command failed with exit status: -11

--

********************

llvm-ci · 2025-11-10T04:12:51Z

LLVM Buildbot has detected a new failure on builder clang-aarch64-sve-vla running on linaro-g3-04 while building llvm at step 7 "ninja check 1".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/17/builds/12521

Here is the relevant piece of the build log for the reference

Step 7 (ninja check 1) failure: stage 1 checked (failure)
******************** TEST 'libFuzzer-aarch64-default-Linux :: merge-sigusr.test' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g

--
Command Output (stderr):
--
rm -rf /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp # RUN: at line 6
+ rm -rf /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp
mkdir -p /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp # RUN: at line 7
+ mkdir -p /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp
/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/./bin/clang    -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta   --driver-mode=g++ -O2 -gline-tables-only -fsanitize=address,fuzzer -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/compiler-rt/lib/fuzzer  -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta  /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/compiler-rt/test/fuzzer/SleepOneSecondTest.cpp -o /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/LFSIGUSR # RUN: at line 8
+ /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/./bin/clang -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta --driver-mode=g++ -O2 -gline-tables-only -fsanitize=address,fuzzer -I/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/compiler-rt/lib/fuzzer -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/compiler-rt/test/fuzzer/SleepOneSecondTest.cpp -o /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/LFSIGUSR
mkdir -p /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C1 /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2 # RUN: at line 10
+ mkdir -p /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C1 /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2
echo a > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/a # RUN: at line 11
+ echo a
echo b > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/b # RUN: at line 12
+ echo b
echo c > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/c # RUN: at line 13
+ echo c
echo d > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/d # RUN: at line 14
+ echo d
echo e > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/e # RUN: at line 15
+ echo e
echo f > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/f # RUN: at line 16
+ echo f
echo g > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 17
+ echo g
echo h > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 18
+ echo h
echo i > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 19
+ echo i
echo j > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 20
+ echo j
echo k > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 21
+ echo k
echo l > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 22
+ echo l
echo m > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 23
+ echo m
echo n > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 24
+ echo n
echo o > /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2/g # RUN: at line 25
+ echo o
setsid  /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/LFSIGUSR -merge=1 -merge_control_file=/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/MCF /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C1 /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/C2 2>/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/AARCH64DefaultLinuxConfig/Output/merge-sigusr.test.tmp/log & export PID=$! # RUN: at line 28
+ export PID=1875274
...

tangaac mentioned this pull request Nov 5, 2025

LLVM optimization on PR 166526 [bot] llvm-la/llvm-opt-ci#86

Open

zhaoqi5 force-pushed the users/zhaoqi5/opt-enable-memcmp-expansion branch from 79e394b to 0c42622 Compare November 6, 2025 06:56

zhaoqi5 changed the base branch from main to users/zhaoqi5/tests-memcmp-expansion November 6, 2025 06:56

zhaoqi5 marked this pull request as ready for review November 6, 2025 07:59

llvmbot added the backend:loongarch label Nov 6, 2025

SixWeining reviewed Nov 7, 2025

View reviewed changes

zhaoqi5 force-pushed the users/zhaoqi5/opt-enable-memcmp-expansion branch from b89ea92 to 25c01e2 Compare November 7, 2025 03:53

SixWeining reviewed Nov 7, 2025

View reviewed changes

SixWeining approved these changes Nov 7, 2025

View reviewed changes

Base automatically changed from users/zhaoqi5/tests-memcmp-expansion to main November 10, 2025 01:59

zhaoqi5 added 4 commits November 10, 2025 10:02

[LoongArch] Initial implementation for enableMemCmpExpansion hook

d62b0ad

After overriding `TargetTransformInfo::enableMemCmpExpansion` in this commit, `MergeICmps` and `ExpandMemCmp` passes will be enabled on LoongArch.

enable tail expansion which will reduce branches like aarch64/riscv

3cc959d

rebase && update tests

25bbbf0

remove TODO

fd004e2

zhaoqi5 force-pushed the users/zhaoqi5/opt-enable-memcmp-expansion branch from db651a0 to fd004e2 Compare November 10, 2025 02:05

zhaoqi5 merged commit f734ceb into main Nov 10, 2025
10 checks passed

zhaoqi5 deleted the users/zhaoqi5/opt-enable-memcmp-expansion branch November 10, 2025 03:43

nigham mentioned this pull request Nov 10, 2025

[libc] Implement fchown #167286

Merged

[LoongArch] Initial implementation for enableMemCmpExpansion hook #166526

[LoongArch] Initial implementation for enableMemCmpExpansion hook #166526

Uh oh!

Conversation

zhaoqi5 commented Nov 5, 2025

Uh oh!

llvmbot commented Nov 6, 2025

Uh oh!

SixWeining commented Nov 6, 2025

Uh oh!

zhaoqi5 commented Nov 6, 2025

Uh oh!

zhaoqi5 commented Nov 6, 2025

Uh oh!

SixWeining Nov 7, 2025

Choose a reason for hiding this comment

Uh oh!

zhaoqi5 Nov 7, 2025

Choose a reason for hiding this comment

Uh oh!

SixWeining Nov 7, 2025

Choose a reason for hiding this comment

Uh oh!

zhaoqi5 Nov 7, 2025

Choose a reason for hiding this comment

Uh oh!

SixWeining commented Nov 7, 2025

Uh oh!

zhaoqi5 commented Nov 7, 2025

Uh oh!

SixWeining Nov 7, 2025

Choose a reason for hiding this comment

Uh oh!

zhaoqi5 Nov 7, 2025

Choose a reason for hiding this comment

Uh oh!

zhaoqi5 Nov 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

zhaoqi5 Nov 7, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Nov 10, 2025

Uh oh!

llvm-ci commented Nov 10, 2025

Uh oh!

llvm-ci commented Nov 10, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

[LoongArch] Initial implementation for `enableMemCmpExpansion` hook #166526

[LoongArch] Initial implementation for `enableMemCmpExpansion` hook #166526

zhaoqi5 Nov 7, 2025 •

edited

Loading