Skip to content

[SLP][NFC]Add a test with inst count heuristic for AMDHSA, NFC#199693

Merged
alexey-bataev merged 1 commit into
mainfrom
users/alexey-bataev/spr/slpnfcadd-a-test-with-inst-count-heuristic-for-amdhsa-nfc
May 26, 2026
Merged

[SLP][NFC]Add a test with inst count heuristic for AMDHSA, NFC#199693
alexey-bataev merged 1 commit into
mainfrom
users/alexey-bataev/spr/slpnfcadd-a-test-with-inst-count-heuristic-for-amdhsa-nfc

Conversation

@alexey-bataev
Copy link
Copy Markdown
Member

No description provided.

Created using spr 1.3.7
@alexey-bataev alexey-bataev merged commit 721cddf into main May 26, 2026
8 of 11 checks passed
@alexey-bataev alexey-bataev deleted the users/alexey-bataev/spr/slpnfcadd-a-test-with-inst-count-heuristic-for-amdhsa-nfc branch May 26, 2026 14:25
@llvmorg-github-actions
Copy link
Copy Markdown

llvmorg-github-actions Bot commented May 26, 2026

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Alexey Bataev (alexey-bataev)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/199693.diff

1 Files Affected:

  • (added) llvm/test/Transforms/SLPVectorizer/AMDGPU/inst-count-heuristic.ll (+144)
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/inst-count-heuristic.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/inst-count-heuristic.ll
new file mode 100644
index 0000000000000..067e2c77c0624
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/inst-count-heuristic.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -passes=slp-vectorizer \
+; RUN:     < %s | FileCheck -check-prefix=GFX950 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -passes=slp-vectorizer \
+; RUN:     -slp-inst-count-check=true < %s | FileCheck -check-prefix=GFX950 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=slp-vectorizer \
+; RUN:     < %s | FileCheck -check-prefix=GFX942 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=slp-vectorizer \
+; RUN:     < %s | FileCheck -check-prefix=GFX906 %s
+
+define amdgpu_kernel void @phi5_rotate(
+; GFX950-LABEL: define amdgpu_kernel void @phi5_rotate(
+; GFX950-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX950-NEXT:  [[ENTRY:.*]]:
+; GFX950-NEXT:    br label %[[LOOP:.*]]
+; GFX950:       [[LOOP]]:
+; GFX950-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; GFX950-NEXT:    [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ]
+; GFX950-NEXT:    [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ]
+; GFX950-NEXT:    [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ]
+; GFX950-NEXT:    [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ]
+; GFX950-NEXT:    [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ]
+; GFX950-NEXT:    [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]]
+; GFX950-NEXT:    store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4
+; GFX950-NEXT:    [[I1:%.*]] = or disjoint i32 [[I]], 1
+; GFX950-NEXT:    [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]]
+; GFX950-NEXT:    store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4
+; GFX950-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 2
+; GFX950-NEXT:    [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
+; GFX950-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; GFX950:       [[EXIT]]:
+; GFX950-NEXT:    ret void
+;
+; GFX942-LABEL: define amdgpu_kernel void @phi5_rotate(
+; GFX942-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX942-NEXT:  [[ENTRY:.*]]:
+; GFX942-NEXT:    br label %[[LOOP:.*]]
+; GFX942:       [[LOOP]]:
+; GFX942-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; GFX942-NEXT:    [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ]
+; GFX942-NEXT:    [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ]
+; GFX942-NEXT:    [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ]
+; GFX942-NEXT:    [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ]
+; GFX942-NEXT:    [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ]
+; GFX942-NEXT:    [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]]
+; GFX942-NEXT:    store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4
+; GFX942-NEXT:    [[I1:%.*]] = or disjoint i32 [[I]], 1
+; GFX942-NEXT:    [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]]
+; GFX942-NEXT:    store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4
+; GFX942-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 2
+; GFX942-NEXT:    [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
+; GFX942-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; GFX942:       [[EXIT]]:
+; GFX942-NEXT:    ret void
+;
+; GFX906-LABEL: define amdgpu_kernel void @phi5_rotate(
+; GFX906-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX906-NEXT:  [[ENTRY:.*]]:
+; GFX906-NEXT:    br label %[[LOOP:.*]]
+; GFX906:       [[LOOP]]:
+; GFX906-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; GFX906-NEXT:    [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ]
+; GFX906-NEXT:    [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ]
+; GFX906-NEXT:    [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ]
+; GFX906-NEXT:    [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ]
+; GFX906-NEXT:    [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ]
+; GFX906-NEXT:    [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]]
+; GFX906-NEXT:    store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4
+; GFX906-NEXT:    [[I1:%.*]] = or disjoint i32 [[I]], 1
+; GFX906-NEXT:    [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]]
+; GFX906-NEXT:    store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4
+; GFX906-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 2
+; GFX906-NEXT:    [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
+; GFX906-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; GFX906:       [[EXIT]]:
+; GFX906-NEXT:    ret void
+;
+; GFX941-LABEL: define amdgpu_kernel void @phi5_rotate(
+; GFX941-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX941-NEXT:  [[ENTRY:.*]]:
+; GFX941-NEXT:    br label %[[LOOP:.*]]
+; GFX941:       [[LOOP]]:
+; GFX941-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; GFX941-NEXT:    [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ]
+; GFX941-NEXT:    [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ]
+; GFX941-NEXT:    [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ]
+; GFX941-NEXT:    [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ]
+; GFX941-NEXT:    [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ]
+; GFX941-NEXT:    [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]]
+; GFX941-NEXT:    store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4
+; GFX941-NEXT:    [[I1:%.*]] = or disjoint i32 [[I]], 1
+; GFX941-NEXT:    [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]]
+; GFX941-NEXT:    store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4
+; GFX941-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 2
+; GFX941-NEXT:    [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
+; GFX941-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; GFX941:       [[EXIT]]:
+; GFX941-NEXT:    ret void
+; GFX940-LABEL: define amdgpu_kernel void @phi5_rotate(
+; GFX940-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX940-NEXT:  [[ENTRY:.*]]:
+; GFX940-NEXT:    br label %[[LOOP:.*]]
+; GFX940:       [[LOOP]]:
+; GFX940-NEXT:    [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; GFX940-NEXT:    [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ]
+; GFX940-NEXT:    [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ]
+; GFX940-NEXT:    [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ]
+; GFX940-NEXT:    [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ]
+; GFX940-NEXT:    [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ]
+; GFX940-NEXT:    [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]]
+; GFX940-NEXT:    store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4
+; GFX940-NEXT:    [[I1:%.*]] = or disjoint i32 [[I]], 1
+; GFX940-NEXT:    [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]]
+; GFX940-NEXT:    store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4
+; GFX940-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 2
+; GFX940-NEXT:    [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
+; GFX940-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; GFX940:       [[EXIT]]:
+; GFX940-NEXT:    ret void
+  ptr addrspace(1) nocapture %out,
+  i32 %n,
+  i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4) {
+entry:
+  br label %loop
+
+loop:
+  %i   = phi i32 [ 0,   %entry ], [ %i.next, %loop ]
+  %x0  = phi i32 [ %s0, %entry ], [ %x4,     %loop ]
+  %x1  = phi i32 [ %s1, %entry ], [ %x0,     %loop ]
+  %x2  = phi i32 [ %s2, %entry ], [ %x1,     %loop ]
+  %x3  = phi i32 [ %s3, %entry ], [ %x2,     %loop ]
+  %x4  = phi i32 [ %s4, %entry ], [ %x3,     %loop ]
+  %gep0 = getelementptr i32, ptr addrspace(1) %out, i32 %i
+  store i32 %x0, ptr addrspace(1) %gep0, align 4
+  %i1   = or disjoint i32 %i, 1
+  %gep1 = getelementptr i32, ptr addrspace(1) %out, i32 %i1
+  store i32 %x1, ptr addrspace(1) %gep1, align 4
+  %i.next = add nuw i32 %i, 2
+  %cmp  = icmp ult i32 %i.next, %n
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}

llvm-sync Bot pushed a commit to arm/arm-toolchain that referenced this pull request May 26, 2026
llvm-upstreamsync Bot pushed a commit to qualcomm/cpullvm-toolchain that referenced this pull request May 26, 2026
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant