diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/inst-count-heuristic.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/inst-count-heuristic.ll new file mode 100644 index 0000000000000..067e2c77c0624 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/inst-count-heuristic.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -passes=slp-vectorizer \ +; RUN: < %s | FileCheck -check-prefix=GFX950 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -passes=slp-vectorizer \ +; RUN: -slp-inst-count-check=true < %s | FileCheck -check-prefix=GFX950 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=slp-vectorizer \ +; RUN: < %s | FileCheck -check-prefix=GFX942 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=slp-vectorizer \ +; RUN: < %s | FileCheck -check-prefix=GFX906 %s + +define amdgpu_kernel void @phi5_rotate( +; GFX950-LABEL: define amdgpu_kernel void @phi5_rotate( +; GFX950-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] { +; GFX950-NEXT: [[ENTRY:.*]]: +; GFX950-NEXT: br label %[[LOOP:.*]] +; GFX950: [[LOOP]]: +; GFX950-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] +; GFX950-NEXT: [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ] +; GFX950-NEXT: [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ] +; GFX950-NEXT: [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ] +; GFX950-NEXT: [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ] +; GFX950-NEXT: [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ] +; GFX950-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]] +; GFX950-NEXT: store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4 +; GFX950-NEXT: [[I1:%.*]] = or disjoint i32 [[I]], 1 +; GFX950-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]] +; GFX950-NEXT: store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4 +; GFX950-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 2 +; GFX950-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; GFX950-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; GFX950: [[EXIT]]: +; GFX950-NEXT: ret void +; +; GFX942-LABEL: define amdgpu_kernel void @phi5_rotate( +; GFX942-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] { +; GFX942-NEXT: [[ENTRY:.*]]: +; GFX942-NEXT: br label %[[LOOP:.*]] +; GFX942: [[LOOP]]: +; GFX942-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] +; GFX942-NEXT: [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ] +; GFX942-NEXT: [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ] +; GFX942-NEXT: [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ] +; GFX942-NEXT: [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ] +; GFX942-NEXT: [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ] +; GFX942-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]] +; GFX942-NEXT: store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4 +; GFX942-NEXT: [[I1:%.*]] = or disjoint i32 [[I]], 1 +; GFX942-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]] +; GFX942-NEXT: store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4 +; GFX942-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 2 +; GFX942-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; GFX942-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; GFX942: [[EXIT]]: +; GFX942-NEXT: ret void +; +; GFX906-LABEL: define amdgpu_kernel void @phi5_rotate( +; GFX906-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] { +; GFX906-NEXT: [[ENTRY:.*]]: +; GFX906-NEXT: br label %[[LOOP:.*]] +; GFX906: [[LOOP]]: +; GFX906-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] +; GFX906-NEXT: [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ] +; GFX906-NEXT: [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ] +; GFX906-NEXT: [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ] +; GFX906-NEXT: [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ] +; GFX906-NEXT: [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ] +; GFX906-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]] +; GFX906-NEXT: store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4 +; GFX906-NEXT: [[I1:%.*]] = or disjoint i32 [[I]], 1 +; GFX906-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]] +; GFX906-NEXT: store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4 +; GFX906-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 2 +; GFX906-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; GFX906-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; GFX906: [[EXIT]]: +; GFX906-NEXT: ret void +; +; GFX941-LABEL: define amdgpu_kernel void @phi5_rotate( +; GFX941-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] { +; GFX941-NEXT: [[ENTRY:.*]]: +; GFX941-NEXT: br label %[[LOOP:.*]] +; GFX941: [[LOOP]]: +; GFX941-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] +; GFX941-NEXT: [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ] +; GFX941-NEXT: [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ] +; GFX941-NEXT: [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ] +; GFX941-NEXT: [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ] +; GFX941-NEXT: [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ] +; GFX941-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]] +; GFX941-NEXT: store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4 +; GFX941-NEXT: [[I1:%.*]] = or disjoint i32 [[I]], 1 +; GFX941-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]] +; GFX941-NEXT: store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4 +; GFX941-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 2 +; GFX941-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; GFX941-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; GFX941: [[EXIT]]: +; GFX941-NEXT: ret void +; GFX940-LABEL: define amdgpu_kernel void @phi5_rotate( +; GFX940-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[S0:%.*]], i32 [[S1:%.*]], i32 [[S2:%.*]], i32 [[S3:%.*]], i32 [[S4:%.*]]) #[[ATTR0:[0-9]+]] { +; GFX940-NEXT: [[ENTRY:.*]]: +; GFX940-NEXT: br label %[[LOOP:.*]] +; GFX940: [[LOOP]]: +; GFX940-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] +; GFX940-NEXT: [[X0:%.*]] = phi i32 [ [[S0]], %[[ENTRY]] ], [ [[X4:%.*]], %[[LOOP]] ] +; GFX940-NEXT: [[X1:%.*]] = phi i32 [ [[S1]], %[[ENTRY]] ], [ [[X0]], %[[LOOP]] ] +; GFX940-NEXT: [[X2:%.*]] = phi i32 [ [[S2]], %[[ENTRY]] ], [ [[X1]], %[[LOOP]] ] +; GFX940-NEXT: [[X3:%.*]] = phi i32 [ [[S3]], %[[ENTRY]] ], [ [[X2]], %[[LOOP]] ] +; GFX940-NEXT: [[X4]] = phi i32 [ [[S4]], %[[ENTRY]] ], [ [[X3]], %[[LOOP]] ] +; GFX940-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I]] +; GFX940-NEXT: store i32 [[X0]], ptr addrspace(1) [[GEP0]], align 4 +; GFX940-NEXT: [[I1:%.*]] = or disjoint i32 [[I]], 1 +; GFX940-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[I1]] +; GFX940-NEXT: store i32 [[X1]], ptr addrspace(1) [[GEP1]], align 4 +; GFX940-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 2 +; GFX940-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; GFX940-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; GFX940: [[EXIT]]: +; GFX940-NEXT: ret void + ptr addrspace(1) nocapture %out, + i32 %n, + i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4) { +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + %x0 = phi i32 [ %s0, %entry ], [ %x4, %loop ] + %x1 = phi i32 [ %s1, %entry ], [ %x0, %loop ] + %x2 = phi i32 [ %s2, %entry ], [ %x1, %loop ] + %x3 = phi i32 [ %s3, %entry ], [ %x2, %loop ] + %x4 = phi i32 [ %s4, %entry ], [ %x3, %loop ] + %gep0 = getelementptr i32, ptr addrspace(1) %out, i32 %i + store i32 %x0, ptr addrspace(1) %gep0, align 4 + %i1 = or disjoint i32 %i, 1 + %gep1 = getelementptr i32, ptr addrspace(1) %out, i32 %i1 + store i32 %x1, ptr addrspace(1) %gep1, align 4 + %i.next = add nuw i32 %i, 2 + %cmp = icmp ult i32 %i.next, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret void +}