[RA] Disable split around hint register if optimize for size #68619

weiguozhi · 2023-10-09T18:52:25Z

Split a virtual register with hint may generate COPY instructions in multiple cold basic blocks, and increase code size. So disable this split when the function is optimized for size.

llvmbot · 2023-10-09T18:53:36Z

@llvm/pr-subscribers-backend-arm
@llvm/pr-subscribers-debuginfo

@llvm/pr-subscribers-backend-x86

Changes

Split a virtual register with hint may generate COPY instructions in multiple cold basic blocks, and increase code size. So disable this split when the function is optimized for size.

Full diff: https://github.com/llvm/llvm-project/pull/68619.diff

4 Files Affected:

(modified) llvm/lib/CodeGen/RegAllocGreedy.cpp (+3)
(modified) llvm/test/CodeGen/ARM/thumb2-size-opt.ll (+2-2)
(added) llvm/test/CodeGen/X86/no-split-size.ll (+92)
(modified) llvm/test/DebugInfo/ARM/sdag-split-arg.ll (+2-2)

diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 4d476924a7dbf7b..1bfaba00a267fb6 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1217,6 +1217,9 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
                                      const LiveInterval &VirtReg,
                                      SmallVectorImpl<Register> &NewVRegs,
                                      AllocationOrder &Order) {
+  if (MF->getFunction().hasOptSize())
+    return false;
+
   // Don't allow repeated splitting as a safe guard against looping.
   if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
     return false;
diff --git a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
index 8cf7a702e8ed54d..f9f29fc064a20ce 100644
--- a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
+++ b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
@@ -85,8 +85,8 @@ entry:
 
 define i32 @bundled_instruction(ptr %addr, ptr %addr2, i1 %tst) minsize {
 ; CHECK-LABEL: bundled_instruction:
-; CHECK: iteee ne
-; CHECK: ldmeq r2!, {{{r[0-9]+}}}
+; CHECK: itee ne
+; CHECK: ldmeq r3!, {{{r[0-9]+}}}
   br i1 %tst, label %true, label %false
 
 true:
diff --git a/llvm/test/CodeGen/X86/no-split-size.ll b/llvm/test/CodeGen/X86/no-split-size.ll
new file mode 100644
index 000000000000000..305aeea34d8f213
--- /dev/null
+++ b/llvm/test/CodeGen/X86/no-split-size.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+
+; @foo is optimized for size. Variables %p2, %p3, %p4, %p5 and %p6 are not split
+; in cold blocks.
+
+define i64 @foo(ptr %ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset %rbx, -48
+; CHECK-NEXT:    .cfi_offset %r12, -40
+; CHECK-NEXT:    .cfi_offset %r13, -32
+; CHECK-NEXT:    .cfi_offset %r14, -24
+; CHECK-NEXT:    .cfi_offset %r15, -16
+; CHECK-NEXT:    movq %r9, %r14
+; CHECK-NEXT:    movq %r8, %rbx
+; CHECK-NEXT:    movq %rcx, %r12
+; CHECK-NEXT:    movq %rdx, %r15
+; CHECK-NEXT:    movq %rsi, %r13
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    je .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %if.else
+; CHECK-NEXT:    testq %r13, %r13
+; CHECK-NEXT:    movq %r15, %rax
+; CHECK-NEXT:    je .LBB0_3
+; CHECK-NEXT:  .LBB0_4: # %if.end
+; CHECK-NEXT:    addq %r13, %rax
+; CHECK-NEXT:    addq %r12, %r15
+; CHECK-NEXT:    addq %rax, %r15
+; CHECK-NEXT:    addq %r14, %rbx
+; CHECK-NEXT:    addq %r15, %rbx
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB0_1: # %if.then
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    callq bar1@PLT
+; CHECK-NEXT:    jmp .LBB0_4
+; CHECK-NEXT:  .LBB0_3: # %if.then2
+; CHECK-NEXT:    callq bar2@PLT
+; CHECK-NEXT:    jmp .LBB0_4
+entry:
+  %tobool.not = icmp eq ptr %ptr, null
+  br i1 %tobool.not, label %if.then, label %if.else, !prof !5
+
+if.then:                                          ; preds = %entry
+  %call1 = call i64 @bar1()
+  br label %if.end
+
+if.else:
+  %cond = icmp eq i64 %p2, 0
+  br i1 %cond, label %if.then2, label %if.end, !prof !5
+
+if.then2:
+  %call2 = call i64 @bar2()
+  br label %if.end
+
+if.end:
+  %call = phi i64 [ %call1, %if.then ], [%call2, %if.then2], [ %p3, %if.else ]
+  %add1 = add i64 %call, %p2
+  %add2 = add i64 %add1, %p3
+  %add3 = add i64 %add2, %p4
+  %add4 = add i64 %add3, %p5
+  %res = add i64 %add4, %p6
+  ret i64 %res
+}
+
+attributes #0 = { optsize }
+
+!5 = !{!"branch_weights", i32 1, i32 2000}
+
+declare i64 @bar1()
+declare i64 @bar2()
diff --git a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
index de1d822a8c8015f..9699c102c0b76b8 100644
--- a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
+++ b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
@@ -19,8 +19,8 @@ target triple = "thumbv7k-apple-watchos2.0.0"
 ; Function Attrs: optsize ssp
 define i64 @_Z3foox(i64 returned) local_unnamed_addr #0 !dbg !13 {
   tail call void @llvm.dbg.value(metadata i64 %0, metadata !17, metadata !DIExpression()), !dbg !18
-  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r0
-  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r1
+  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r5
+  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r4
 
   %2 = load i64, ptr @g, align 8, !dbg !19, !tbaa !21
   %3 = icmp eq i64 %2, %0, !dbg !19

zmodem

Thanks! lgtm with nits.

llvm/lib/CodeGen/RegAllocGreedy.cpp

llvm/test/CodeGen/X86/no-split-size.ll

Split a virtual register with hint may generate COPY instructions in multiple cold basic blocks, and increase code size. So disable this split when the function is optimized for size.

…eferred physical register If a virtual register is not assigned preferred physical register, it means some COPY instructions will be changed to real register move instructions. In this case we can try to split the virtual register in colder blocks, if success, the original COPY instructions can be deleted, and the new COPY instructions in colder blocks will be generated as register move instructions. It results in fewer dynamic register move instructions executed. The new test case split-reg-with-hint.ll gives an example, the hot path contains 24 instructions without this patch, now it is only 4 instructions with this patch. Differential Revision: https://reviews.llvm.org/D156491

[RA] Disable split around hint register if optimize for size

6fe5912

Split a virtual register with hint may generate COPY instructions in multiple cold basic blocks, and increase code size. So disable this split when the function is optimized for size.

weiguozhi requested a review from qcolombet October 9, 2023 18:52

llvmbot added backend:ARM backend:X86 debuginfo labels Oct 9, 2023

weiguozhi requested a review from zmodem October 9, 2023 18:55

zmodem approved these changes Oct 9, 2023

View reviewed changes

llvm/lib/CodeGen/RegAllocGreedy.cpp Show resolved Hide resolved

llvm/test/CodeGen/X86/no-split-size.ll Outdated Show resolved Hide resolved

[RA] Disable split around hint register if optimize for size

0ad54fa

Split a virtual register with hint may generate COPY instructions in multiple cold basic blocks, and increase code size. So disable this split when the function is optimized for size.

weiguozhi merged commit b6043f9 into llvm:main Oct 11, 2023
3 checks passed

weiguozhi deleted the size branch October 18, 2023 18:09

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RA] Disable split around hint register if optimize for size #68619

[RA] Disable split around hint register if optimize for size #68619

weiguozhi commented Oct 9, 2023

llvmbot commented Oct 9, 2023 •

edited

Loading

zmodem left a comment

[RA] Disable split around hint register if optimize for size #68619

[RA] Disable split around hint register if optimize for size #68619

Conversation

weiguozhi commented Oct 9, 2023

llvmbot commented Oct 9, 2023 • edited Loading

zmodem left a comment

Choose a reason for hiding this comment

llvmbot commented Oct 9, 2023 •

edited

Loading