Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPIRV] Add support for the SPV_KHR_subgroup_rotate extension #82374

Merged

Conversation

VyacheslavLevytskyy
Copy link
Contributor

This PR adds support for the SPV_KHR_subgroup_rotate extension that enables rotating values across invocations within a subgroup:

@llvmbot
Copy link
Collaborator

llvmbot commented Feb 20, 2024

@llvm/pr-subscribers-backend-spir-v

Author: Vyacheslav Levytskyy (VyacheslavLevytskyy)

Changes

This PR adds support for the SPV_KHR_subgroup_rotate extension that enables rotating values across invocations within a subgroup:


Patch is 24.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/82374.diff

6 Files Affected:

  • (modified) llvm/lib/Target/SPIRV/SPIRVBuiltins.td (+6-1)
  • (modified) llvm/lib/Target/SPIRV/SPIRVInstrInfo.td (+5)
  • (modified) llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp (+9)
  • (modified) llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp (+4)
  • (modified) llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td (+1)
  • (added) llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_subgroup_rotate/subgroup-rotate.ll (+357)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 571cfcfd6e7e5c..bcd27c82439f96 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -619,7 +619,8 @@ class GroupBuiltin<string name, Op operation> {
                              !eq(operation, OpGroupNonUniformShuffleDown),
                              !eq(operation, OpGroupBroadcast),
                              !eq(operation, OpGroupNonUniformBroadcast),
-                             !eq(operation, OpGroupNonUniformBroadcastFirst));
+                             !eq(operation, OpGroupNonUniformBroadcastFirst),
+                             !eq(operation, OpGroupNonUniformRotateKHR));
   bit HasBoolArg = !or(!and(IsAllOrAny, !eq(IsAllEqual, false)), IsBallot, IsLogical);
 }
 
@@ -877,6 +878,10 @@ defm : DemangledGroupBuiltin<"group_non_uniform_scan_inclusive_logical_xors", Wo
 defm : DemangledGroupBuiltin<"group_non_uniform_scan_exclusive_logical_xors", WorkOrSub, OpGroupNonUniformLogicalXor>;
 defm : DemangledGroupBuiltin<"group_clustered_reduce_logical_xor", WorkOrSub, OpGroupNonUniformLogicalXor>;
 
+// cl_khr_subgroup_rotate / SPV_KHR_subgroup_rotate
+defm : DemangledGroupBuiltin<"group_rotate", OnlySub, OpGroupNonUniformRotateKHR>;
+defm : DemangledGroupBuiltin<"group_clustered_rotate", OnlySub, OpGroupNonUniformRotateKHR>;
+
 // cl_khr_work_group_uniform_arithmetic / SPV_KHR_uniform_group_instructions
 defm : DemangledGroupBuiltin<"group_reduce_imul", OnlyWork, OpGroupIMulKHR>;
 defm : DemangledGroupBuiltin<"group_reduce_mulu", OnlyWork, OpGroupIMulKHR>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index 0f11bc34d176f7..86f65b6320d530 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -765,6 +765,11 @@ def OpGroupNonUniformLogicalAnd: OpGroupNUGroup<"LogicalAnd", 362>;
 def OpGroupNonUniformLogicalOr: OpGroupNUGroup<"LogicalOr", 363>;
 def OpGroupNonUniformLogicalXor: OpGroupNUGroup<"LogicalXor", 364>;
 
+// SPV_KHR_subgroup_rotate
+def OpGroupNonUniformRotateKHR: Op<4431, (outs ID:$res),
+                  (ins TYPE:$type, ID:$scope, ID:$value, ID:$delta, variable_ops),
+                  "$res = OpGroupNonUniformRotateKHR $type $scope $value $delta">;
+
 // 3.49.7, Constant-Creation Instructions
 
 //  - SPV_INTEL_function_pointers
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index dbda2871e153de..9b9575b9879948 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1069,6 +1069,15 @@ void addInstrRequirements(const MachineInstr &MI,
       Reqs.addCapability(SPIRV::Capability::FunctionPointersINTEL);
     }
     break;
+  case SPIRV::OpGroupNonUniformRotateKHR:
+    if (!ST.canUseExtension(SPIRV::Extension::SPV_KHR_subgroup_rotate))
+      report_fatal_error("OpGroupNonUniformRotateKHR instruction requires the "
+                         "following SPIR-V extension: SPV_KHR_subgroup_rotate",
+                         false);
+    Reqs.addExtension(SPIRV::Extension::SPV_KHR_subgroup_rotate);
+    Reqs.addCapability(SPIRV::Capability::GroupNonUniformRotateKHR);
+    Reqs.addCapability(SPIRV::Capability::GroupNonUniform);
+    break;
   case SPIRV::OpGroupIMulKHR:
   case SPIRV::OpGroupFMulKHR:
   case SPIRV::OpGroupBitwiseAndKHR:
diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
index e186154aa408bd..4694363614ef60 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
@@ -75,6 +75,10 @@ cl::list<SPIRV::Extension::Extension> Extensions(
             "Allows to use the LinkOnceODR linkage type that is to let "
             "a function or global variable to be merged with other functions "
             "or global variables of the same name when linkage occurs."),
+        clEnumValN(SPIRV::Extension::SPV_KHR_subgroup_rotate,
+                   "SPV_KHR_subgroup_rotate",
+                   "Adds a new instruction that enables rotating values across "
+                   "invocations within a subgroup."),
         clEnumValN(SPIRV::Extension::SPV_INTEL_function_pointers,
                    "SPV_INTEL_function_pointers",
                    "Allows translation of function pointers.")));
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 4e5ac0d531b2d5..6c36087baa85ed 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -455,6 +455,7 @@ defm BitInstructions : CapabilityOperand<6025, 0, 0, [SPV_KHR_bit_instructions],
 defm ExpectAssumeKHR : CapabilityOperand<5629, 0, 0, [SPV_KHR_expect_assume], []>;
 defm FunctionPointersINTEL : CapabilityOperand<5603, 0, 0, [SPV_INTEL_function_pointers], []>;
 defm IndirectReferencesINTEL : CapabilityOperand<5604, 0, 0, [SPV_INTEL_function_pointers], []>;
+defm GroupNonUniformRotateKHR : CapabilityOperand<6026, 0, 0, [SPV_KHR_subgroup_rotate], [GroupNonUniform]>;
 defm AtomicFloat32AddEXT : CapabilityOperand<6033, 0, 0, [SPV_EXT_shader_atomic_float_add], []>;
 defm AtomicFloat64AddEXT : CapabilityOperand<6034, 0, 0, [SPV_EXT_shader_atomic_float_add], []>;
 defm AtomicFloat16AddEXT : CapabilityOperand<6095, 0, 0, [SPV_EXT_shader_atomic_float16_add], []>;
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_subgroup_rotate/subgroup-rotate.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_subgroup_rotate/subgroup-rotate.ll
new file mode 100644
index 00000000000000..f4af0773084d3e
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_subgroup_rotate/subgroup-rotate.ll
@@ -0,0 +1,357 @@
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-extensions=SPV_KHR_subgroup_rotate %s -o - | FileCheck %s
+; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-extensions=SPV_KHR_subgroup_rotate %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-ERROR: LLVM ERROR: OpGroupNonUniformRotateKHR instruction requires the following SPIR-V extension: SPV_KHR_subgroup_rotate
+
+; CHECK: OpCapability GroupNonUniformRotateKHR
+; CHECK: OpExtension "SPV_KHR_subgroup_rotate"
+
+; CHECK-DAG: %[[TyInt8:.*]] = OpTypeInt 8 0
+; CHECK-DAG: %[[TyInt16:.*]] = OpTypeInt 16 0
+; CHECK-DAG: %[[TyInt32:.*]] = OpTypeInt 32 0
+; CHECK-DAG: %[[TyInt64:.*]] = OpTypeInt 64 0
+; CHECK-DAG: %[[TyFloat:.*]] = OpTypeFloat 32
+; CHECK-DAG: %[[TyHalf:.*]] = OpTypeFloat 16
+; CHECK-DAG: %[[TyDouble:.*]] = OpTypeFloat 64
+; CHECK-DAG: %[[ScopeSubgroup:.*]] = OpConstant %[[TyInt32]] 3
+; CHECK-DAG: %[[ConstInt2:.*]] = OpConstant %[[TyInt32]] 2
+; CHECK-DAG: %[[ConstInt4:.*]] = OpConstant %[[TyInt32]] 4
+		
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir"
+
+; Function Attrs: convergent noinline norecurse nounwind optnone
+define dso_local spir_kernel void @testRotateChar(ptr addrspace(1) noundef align 1 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
+entry:
+  %dst.addr = alloca ptr addrspace(1), align 4
+  %v = alloca i8, align 1
+  store ptr addrspace(1) %dst, ptr %dst.addr, align 4
+  store i8 0, ptr %v, align 1
+  %0 = load i8, ptr %v, align 1
+; CHECK: OpGroupNonUniformRotateKHR %[[TyInt8]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]]
+  %call = call spir_func signext i8 @_Z16sub_group_rotateci(i8 noundef signext %0, i32 noundef 2) #2
+  %1 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %1, i32 0
+  store i8 %call, ptr addrspace(1) %arrayidx, align 1
+  %2 = load i8, ptr %v, align 1
+; CHECK: OpGroupNonUniformRotateKHR %[[TyInt8]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]]
+  %call1 = call spir_func signext i8 @_Z26sub_group_clustered_rotatecij(i8 noundef signext %2, i32 noundef 2, i32 noundef 4) #2
+  %3 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr addrspace(1) %3, i32 1
+  store i8 %call1, ptr addrspace(1) %arrayidx2, align 1
+  ret void
+}
+
+; Function Attrs: convergent nounwind
+declare spir_func signext i8 @_Z16sub_group_rotateci(i8 noundef signext, i32 noundef) #1
+
+; Function Attrs: convergent nounwind
+declare spir_func signext i8 @_Z26sub_group_clustered_rotatecij(i8 noundef signext, i32 noundef, i32 noundef) #1
+
+; Function Attrs: convergent noinline norecurse nounwind optnone
+define dso_local spir_kernel void @testRotateUChar(ptr addrspace(1) noundef align 1 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !6 {
+entry:
+  %dst.addr = alloca ptr addrspace(1), align 4
+  %v = alloca i8, align 1
+  store ptr addrspace(1) %dst, ptr %dst.addr, align 4
+  store i8 0, ptr %v, align 1
+  %0 = load i8, ptr %v, align 1
+; CHECK: OpGroupNonUniformRotateKHR %[[TyInt8]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]]
+  %call = call spir_func zeroext i8 @_Z16sub_group_rotatehi(i8 noundef zeroext %0, i32 noundef 2) #2
+  %1 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %1, i32 0
+  store i8 %call, ptr addrspace(1) %arrayidx, align 1
+  %2 = load i8, ptr %v, align 1
+; CHECK: OpGroupNonUniformRotateKHR %[[TyInt8]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]]
+  %call1 = call spir_func zeroext i8 @_Z26sub_group_clustered_rotatehij(i8 noundef zeroext %2, i32 noundef 2, i32 noundef 4) #2
+  %3 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr addrspace(1) %3, i32 1
+  store i8 %call1, ptr addrspace(1) %arrayidx2, align 1
+  ret void
+}
+
+; Function Attrs: convergent nounwind
+declare spir_func zeroext i8 @_Z16sub_group_rotatehi(i8 noundef zeroext, i32 noundef) #1
+
+; Function Attrs: convergent nounwind
+declare spir_func zeroext i8 @_Z26sub_group_clustered_rotatehij(i8 noundef zeroext, i32 noundef, i32 noundef) #1
+
+; Function Attrs: convergent noinline norecurse nounwind optnone
+define dso_local spir_kernel void @testRotateShort(ptr addrspace(1) noundef align 2 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !8 !kernel_arg_base_type !8 !kernel_arg_type_qual !6 {
+entry:
+  %dst.addr = alloca ptr addrspace(1), align 4
+  %v = alloca i16, align 2
+  store ptr addrspace(1) %dst, ptr %dst.addr, align 4
+  store i16 0, ptr %v, align 2
+  %0 = load i16, ptr %v, align 2
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt16]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]]
+  %call = call spir_func signext i16 @_Z16sub_group_rotatesi(i16 noundef signext %0, i32 noundef 2) #2
+  %1 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx = getelementptr inbounds i16, ptr addrspace(1) %1, i32 0
+  store i16 %call, ptr addrspace(1) %arrayidx, align 2
+  %2 = load i16, ptr %v, align 2
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt16]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]]
+  %call1 = call spir_func signext i16 @_Z26sub_group_clustered_rotatesij(i16 noundef signext %2, i32 noundef 2, i32 noundef 4) #2
+  %3 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx2 = getelementptr inbounds i16, ptr addrspace(1) %3, i32 1
+  store i16 %call1, ptr addrspace(1) %arrayidx2, align 2
+  ret void
+}
+
+; Function Attrs: convergent nounwind
+declare spir_func signext i16 @_Z16sub_group_rotatesi(i16 noundef signext, i32 noundef) #1
+
+; Function Attrs: convergent nounwind
+declare spir_func signext i16 @_Z26sub_group_clustered_rotatesij(i16 noundef signext, i32 noundef, i32 noundef) #1
+
+; Function Attrs: convergent noinline norecurse nounwind optnone
+define dso_local spir_kernel void @testRotateUShort(ptr addrspace(1) noundef align 2 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !6 {
+entry:
+  %dst.addr = alloca ptr addrspace(1), align 4
+  %v = alloca i16, align 2
+  store ptr addrspace(1) %dst, ptr %dst.addr, align 4
+  store i16 0, ptr %v, align 2
+  %0 = load i16, ptr %v, align 2
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt16]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]]
+  %call = call spir_func zeroext i16 @_Z16sub_group_rotateti(i16 noundef zeroext %0, i32 noundef 2) #2
+  %1 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx = getelementptr inbounds i16, ptr addrspace(1) %1, i32 0
+  store i16 %call, ptr addrspace(1) %arrayidx, align 2
+  %2 = load i16, ptr %v, align 2
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt16]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]]
+  %call1 = call spir_func zeroext i16 @_Z26sub_group_clustered_rotatetij(i16 noundef zeroext %2, i32 noundef 2, i32 noundef 4) #2
+  %3 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx2 = getelementptr inbounds i16, ptr addrspace(1) %3, i32 1
+  store i16 %call1, ptr addrspace(1) %arrayidx2, align 2
+  ret void
+}
+
+; Function Attrs: convergent nounwind
+declare spir_func zeroext i16 @_Z16sub_group_rotateti(i16 noundef zeroext, i32 noundef) #1
+
+; Function Attrs: convergent nounwind
+declare spir_func zeroext i16 @_Z26sub_group_clustered_rotatetij(i16 noundef zeroext, i32 noundef, i32 noundef) #1
+
+; Function Attrs: convergent noinline norecurse nounwind optnone
+define dso_local spir_kernel void @testRotateInt(ptr addrspace(1) noundef align 4 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !6 {
+entry:
+  %dst.addr = alloca ptr addrspace(1), align 4
+  %v = alloca i32, align 4
+  store ptr addrspace(1) %dst, ptr %dst.addr, align 4
+  store i32 0, ptr %v, align 4
+  %0 = load i32, ptr %v, align 4
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt32]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]]
+  %call = call spir_func i32 @_Z16sub_group_rotateii(i32 noundef %0, i32 noundef 2) #2
+  %1 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %1, i32 0
+  store i32 %call, ptr addrspace(1) %arrayidx, align 4
+  %2 = load i32, ptr %v, align 4
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt32]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]]
+  %call1 = call spir_func i32 @_Z26sub_group_clustered_rotateiij(i32 noundef %2, i32 noundef 2, i32 noundef 4) #2
+  %3 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %3, i32 1
+  store i32 %call1, ptr addrspace(1) %arrayidx2, align 4
+  ret void
+}
+
+; Function Attrs: convergent nounwind
+declare spir_func i32 @_Z16sub_group_rotateii(i32 noundef, i32 noundef) #1
+
+; Function Attrs: convergent nounwind
+declare spir_func i32 @_Z26sub_group_clustered_rotateiij(i32 noundef, i32 noundef, i32 noundef) #1
+
+; Function Attrs: convergent noinline norecurse nounwind optnone
+define dso_local spir_kernel void @testRotateUInt(ptr addrspace(1) noundef align 4 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !6 {
+entry:
+  %dst.addr = alloca ptr addrspace(1), align 4
+  %v = alloca i32, align 4
+  store ptr addrspace(1) %dst, ptr %dst.addr, align 4
+  store i32 0, ptr %v, align 4
+  %0 = load i32, ptr %v, align 4
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt32]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]]
+  %call = call spir_func i32 @_Z16sub_group_rotateji(i32 noundef %0, i32 noundef 2) #2
+  %1 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %1, i32 0
+  store i32 %call, ptr addrspace(1) %arrayidx, align 4
+  %2 = load i32, ptr %v, align 4
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt32]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]]
+  %call1 = call spir_func i32 @_Z26sub_group_clustered_rotatejij(i32 noundef %2, i32 noundef 2, i32 noundef 4) #2
+  %3 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %3, i32 1
+  store i32 %call1, ptr addrspace(1) %arrayidx2, align 4
+  ret void
+}
+
+; Function Attrs: convergent nounwind
+declare spir_func i32 @_Z16sub_group_rotateji(i32 noundef, i32 noundef) #1
+
+; Function Attrs: convergent nounwind
+declare spir_func i32 @_Z26sub_group_clustered_rotatejij(i32 noundef, i32 noundef, i32 noundef) #1
+
+; Function Attrs: convergent noinline norecurse nounwind optnone
+define dso_local spir_kernel void @testRotateLong(ptr addrspace(1) noundef align 8 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !6 {
+entry:
+  %dst.addr = alloca ptr addrspace(1), align 4
+  %v = alloca i64, align 8
+  store ptr addrspace(1) %dst, ptr %dst.addr, align 4
+  store i64 0, ptr %v, align 8
+  %0 = load i64, ptr %v, align 8
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt64]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]]
+  %call = call spir_func i64 @_Z16sub_group_rotateli(i64 noundef %0, i32 noundef 2) #2
+  %1 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx = getelementptr inbounds i64, ptr addrspace(1) %1, i32 0
+  store i64 %call, ptr addrspace(1) %arrayidx, align 8
+  %2 = load i64, ptr %v, align 8
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt64]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]]
+  %call1 = call spir_func i64 @_Z26sub_group_clustered_rotatelij(i64 noundef %2, i32 noundef 2, i32 noundef 4) #2
+  %3 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx2 = getelementptr inbounds i64, ptr addrspace(1) %3, i32 1
+  store i64 %call1, ptr addrspace(1) %arrayidx2, align 8
+  ret void
+}
+
+; Function Attrs: convergent nounwind
+declare spir_func i64 @_Z16sub_group_rotateli(i64 noundef, i32 noundef) #1
+
+; Function Attrs: convergent nounwind
+declare spir_func i64 @_Z26sub_group_clustered_rotatelij(i64 noundef, i32 noundef, i32 noundef) #1
+
+; Function Attrs: convergent noinline norecurse nounwind optnone
+define dso_local spir_kernel void @testRotateULong(ptr addrspace(1) noundef align 8 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !6 {
+entry:
+  %dst.addr = alloca ptr addrspace(1), align 4
+  %v = alloca i64, align 8
+  store ptr addrspace(1) %dst, ptr %dst.addr, align 4
+  store i64 0, ptr %v, align 8
+  %0 = load i64, ptr %v, align 8
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt64]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]]
+  %call = call spir_func i64 @_Z16sub_group_rotatemi(i64 noundef %0, i32 noundef 2) #2
+  %1 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx = getelementptr inbounds i64, ptr addrspace(1) %1, i32 0
+  store i64 %call, ptr addrspace(1) %arrayidx, align 8
+  %2 = load i64, ptr %v, align 8
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyInt64]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]] %[[ConstInt4]]
+  %call1 = call spir_func i64 @_Z26sub_group_clustered_rotatemij(i64 noundef %2, i32 noundef 2, i32 noundef 4) #2
+  %3 = load ptr addrspace(1), ptr %dst.addr, align 4
+  %arrayidx2 = getelementptr inbounds i64, ptr addrspace(1) %3, i32 1
+  store i64 %call1, ptr addrspace(1) %arrayidx2, align 8
+  ret void
+}
+
+; Function Attrs: convergent nounwind
+declare spir_func i64 @_Z16sub_group_rotatemi(i64 noundef, i32 noundef) #1
+
+; Function Attrs: convergent nounwind
+declare spir_func i64 @_Z26sub_group_clustered_rotatemij(i64 noundef, i32 noundef, i32 noundef) #1
+
+; Function Attrs: convergent noinline norecurse nounwind optnone
+define dso_local spir_kernel void @testRotateFloat(ptr addrspace(1) noundef align 4 %dst) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !6 {
+entry:
+  %dst.addr = alloca ptr addrspace(1), align 4
+  %v = alloca float, align 4
+  store ptr addrspace(1) %dst, ptr %dst.addr, align 4
+  store float 0.000000e+00, ptr %v, align 4
+  %0 = load float, ptr %v, align 4
+  ; CHECK: OpGroupNonUniformRotateKHR %[[TyFloat]] %[[ScopeSubgroup]] %[[#]] %[[ConstInt2]]
+  %call = call spir_func float @_Z16sub_group...
[truncated]

%v = alloca i8, align 1
store ptr addrspace(1) %dst, ptr %dst.addr, align 4
store i8 0, ptr %v, align 1
%0 = load i8, ptr %v, align 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Overall the patch looks good to me. However one small nit: please name all virtual registers in LIT tests.

@VyacheslavLevytskyy VyacheslavLevytskyy merged commit fddf23c into llvm:main Feb 22, 2024
3 of 5 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants