Skip to content

Conversation

@hstk30-hw
Copy link
Contributor

Add testcase for terminal rule(#169219)

@llvmbot
Copy link
Member

llvmbot commented Nov 30, 2025

@llvm/pr-subscribers-backend-aarch64

Author: None (hstk30-hw)

Changes

Add testcase for terminal rule(#169219)


Full diff: https://github.com/llvm/llvm-project/pull/170035.diff

1 Files Affected:

  • (added) llvm/test/CodeGen/AArch64/apply-terminal-rule.mir (+98)
diff --git a/llvm/test/CodeGen/AArch64/apply-terminal-rule.mir b/llvm/test/CodeGen/AArch64/apply-terminal-rule.mir
new file mode 100644
index 0000000000000..f59452d8f253d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/apply-terminal-rule.mir
@@ -0,0 +1,98 @@
+# RUN: llc -mtriple=aarch64 -run-pass=register-coalescer -terminal-rule=1 -o - %s | FileCheck %s
+
+# Apply terminal rule for: %5
+--- |
+  @A = external dso_local global [100 x i32], align 4
+  
+  define i32 @test(i32 %n) {
+  entry:
+    %cmp63 = icmp sgt i32 %n, 0
+    br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup
+  
+  for.body.preheader:                               ; preds = %entry
+    %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0), align 4
+    br label %for.body
+  
+  for.cond.cleanup:                                 ; preds = %for.body, %entry
+    %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ]
+    ret i32 %sum.0.lcssa
+  
+  for.body:                                         ; preds = %for.body, %for.body.preheader
+    %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
+    %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
+    %call = tail call i32 @_Z3usei(i32 %0)
+    %div = sdiv i32 %sum.065, %call
+    %lsr.iv.next = add i32 %lsr.iv, -1
+    %exitcond.not = icmp eq i32 %lsr.iv.next, 0
+    br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+  }
+  
+  declare i32 @_Z3usei(i32)
+
+...
+---
+name:            test
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$w0', virtual-reg: '%6' }
+frameInfo:
+  maxAlignment:    1
+  adjustsStack:    true
+  hasCalls:        true
+  maxCallFrameSize: 0
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.4(0x30000000)
+    liveins: $w0
+
+    %0:gpr32common = COPY $w0
+    dead $wzr = SUBSWri %0:gpr32common, 1, 0, implicit-def $nzcv
+    Bcc 10, %bb.1, implicit killed $nzcv
+    B %bb.4
+
+  bb.1.for.body.preheader:
+    successors: %bb.3(0x80000000)
+
+    %1:gpr64common = ADRP target-flags(aarch64-page) @A
+    %2:gpr32 = LDRWui %1:gpr64common, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`)
+    %3:gpr32all = COPY %2:gpr32
+    %4:gpr32sp = COPY %0:gpr32common
+    %5:gpr32 = COPY %0:gpr32common
+    B %bb.3
+
+  bb.2.for.cond.cleanup:
+    %6:gpr32all = COPY %7:gpr32all
+    $w0 = COPY %6:gpr32all
+    RET_ReallyLR implicit $w0
+
+  bb.3.for.body:
+    successors: %bb.2(0x04000000), %bb.3(0x7c000000)
+    
+    %8:gpr32 = COPY %5:gpr32
+    %9:gpr32sp = COPY %4:gpr32sp
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    $w0 = COPY %3:gpr32all
+    BL @_Z3usei, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp,implicit killed $w0, implicit-def $sp, implicit-def $w0
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    %10:gpr32 = COPY $w0
+    %11:gpr32 = SDIVWr %8:gpr32, %10:gpr32
+    %12:gpr32all = COPY %11:gpr32
+    %13:gpr32 = SUBSWri %9:gpr32sp, 1, 0, implicit-def $nzcv
+    %14:gpr32all = COPY %13:gpr32
+    %7:gpr32all = COPY %12:gpr32all
+    %4:gpr32sp = COPY %14:gpr32all
+    %5:gpr32 = COPY %12:gpr32all
+    Bcc 0, %bb.2, implicit killed $nzcv
+    B %bb.3
+
+  bb.4:
+  ; CHECK-LABEL: bb.4:
+  ; CHECK-NOT: %6:gpr32 = COPY %5
+    successors: %bb.2(0x80000000)
+
+    %7:gpr32all = COPY %0:gpr32common
+    B %bb.2
+
+...

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just update_mir_test_checks the test?

I believe we saw a number of perf regressions after #168661 from loops that contained extra copies that were not present before. IIUC the performance went back down in some cases to where they were before the terminal rule was enabled.

@hstk30-hw hstk30-hw merged commit f1f3160 into llvm:main Dec 9, 2025
10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants