diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 8e14fb03127fc..03bfeacc2071f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -1569,16 +1569,57 @@ Instruction *SPIRVEmitIntrinsics::visitSwitchInst(SwitchInst &I) { return BrI; } -Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) { - if (I.getSourceElementType() == IntegerType::getInt8Ty(CurrF->getContext()) && - TM->getSubtargetImpl()->isLogicalSPIRV()) { - Instruction *Result = buildLogicalAccessChainFromGEP(I); - if (Result) - return Result; +static bool isFirstIndexZero(const GetElementPtrInst *GEP) { + if (GEP->getNumIndices() == 0) + return false; + if (const auto *CI = dyn_cast(GEP->getOperand(1))) { + return CI->getZExtValue() == 0; } + return false; +} +Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) { IRBuilder<> B(I.getParent()); B.SetInsertPoint(&I); + + if (TM->getSubtargetImpl()->isLogicalSPIRV() && !isFirstIndexZero(&I)) { + // Logical SPIR-V cannot use the OpPtrAccessChain instruction. If the first + // index of the GEP is not 0, then we need to try to adjust it. + // + // If the GEP is doing byte addressing, try to rebuild the full access chain + // from the type of the pointer. + if (I.getSourceElementType() == + IntegerType::getInt8Ty(CurrF->getContext())) { + return buildLogicalAccessChainFromGEP(I); + } + + // Look for the array-to-pointer decay. If this is the pattern + // we can adjust the types, and prepend a 0 to the indices. + Value *PtrOp = I.getPointerOperand(); + Type *SrcElemTy = I.getSourceElementType(); + Type *DeducedPointeeTy = deduceElementType(PtrOp, true); + + if (auto *ArrTy = dyn_cast(DeducedPointeeTy)) { + if (ArrTy->getElementType() == SrcElemTy) { + SmallVector NewIndices; + Type *FirstIdxType = I.getOperand(1)->getType(); + NewIndices.push_back(ConstantInt::get(FirstIdxType, 0)); + for (Value *Idx : I.indices()) + NewIndices.push_back(Idx); + + SmallVector Types = {I.getType(), I.getPointerOperandType()}; + SmallVector Args; + Args.push_back(B.getInt1(I.isInBounds())); + Args.push_back(I.getPointerOperand()); + Args.append(NewIndices.begin(), NewIndices.end()); + + auto *NewI = B.CreateIntrinsic(Intrinsic::spv_gep, {Types}, {Args}); + replaceAllUsesWithAndErase(B, &I, NewI); + return NewI; + } + } + } + SmallVector Types = {I.getType(), I.getOperand(0)->getType()}; SmallVector Args; Args.push_back(B.getInt1(I.isInBounds())); @@ -1772,16 +1813,12 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I, Value *Pointer = GEPI->getPointerOperand(); Type *OpTy = nullptr; - // Knowing the accessed type is mandatory for logical SPIR-V. Sadly, - // the GEP source element type should not be used for this purpose, and - // the alternative type-scavenging method is not working. - // Physical SPIR-V can work around this, but not logical, hence still - // try to rely on the broken type scavenging for logical. - bool IsRewrittenGEP = - GEPI->getSourceElementType() == IntegerType::getInt8Ty(I->getContext()); - if (IsRewrittenGEP && TM->getSubtargetImpl()->isLogicalSPIRV()) { - Value *Src = getPointerRoot(Pointer); - OpTy = GR->findDeducedElementType(Src); + // Logical SPIR-V is not allowed to use Op*PtrAccessChain instructions. If + // the first index is 0, then we can trivially lower to OpAccessChain. If + // not we need to try to rewrite the GEP. We avoid adding a pointer cast at + // this time, and will rewrite the GEP when visiting it. + if (TM->getSubtargetImpl()->isLogicalSPIRV() && !isFirstIndexZero(GEPI)) { + return; } // In all cases, fall back to the GEP type if type scavenging failed. diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index d3fc08eb56cb3..69606c10fb224 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -467,6 +467,7 @@ static bool isConstReg(MachineRegisterInfo *MRI, MachineInstr *OpDef, switch (Opcode) { case TargetOpcode::G_CONSTANT: case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_IMPLICIT_DEF: return true; case TargetOpcode::G_INTRINSIC: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: @@ -3088,6 +3089,11 @@ bool SPIRVInstructionSelector::selectGEP(Register ResVReg, .addUse(GR.getSPIRVTypeID(ResType)) // Object to get a pointer to. .addUse(I.getOperand(3).getReg()); + assert(Opcode == SPIRV::OpPtrAccessChain || + Opcode == SPIRV::OpInBoundsPtrAccessChain || + (getImm(I.getOperand(4), MRI) && foldImm(I.getOperand(4), MRI) == 0) && + "Cannot translate GEP to OpAccessChain. First index must be 0."); + // Adding indices. const unsigned StartingIndex = (Opcode == SPIRV::OpAccessChain || Opcode == SPIRV::OpInBoundsAccessChain) diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp index 4ce871b6f5e5d..81c7596530ee2 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp @@ -104,9 +104,13 @@ class SPIRVLegalizePointerCast : public FunctionPass { Value *loadFirstValueFromAggregate(IRBuilder<> &B, Type *ElementType, Value *Source, LoadInst *BadLoad) { SmallVector Types = {BadLoad->getPointerOperandType(), - BadLoad->getPointerOperandType()}; - SmallVector Args{/* isInBounds= */ B.getInt1(false), Source, - B.getInt32(0), B.getInt32(0)}; + Source->getType()}; + SmallVector Args{/* isInBounds= */ B.getInt1(false), Source}; + + Type *AggregateType = GR->findDeducedElementType(Source); + assert(AggregateType && "Could not deduce aggregate type"); + buildGEPIndexChain(B, ElementType, AggregateType, Args); + auto *GEP = B.CreateIntrinsic(Intrinsic::spv_gep, {Types}, {Args}); GR->buildAssignPtr(B, ElementType, GEP); @@ -201,34 +205,20 @@ class SPIRVLegalizePointerCast : public FunctionPass { auto *SAT = dyn_cast(FromTy); auto *SVT = dyn_cast(FromTy); - auto *SST = dyn_cast(FromTy); auto *DVT = dyn_cast(ToTy); B.SetInsertPoint(LI); - // Destination is the element type of Source, and source is an array -> - // Loading 1st element. + // Destination is the element type of some member of FromTy. For example, + // loading the 1st element of an array: // - float a = array[0]; - if (SAT && SAT->getElementType() == ToTy) - Output = loadFirstValueFromAggregate(B, SAT->getElementType(), - OriginalOperand, LI); - // Destination is the element type of Source, and source is a vector -> - // Vector to scalar. - // - float a = vector.x; - else if (!DVT && SVT && SVT->getElementType() == ToTy) { - Output = loadFirstValueFromAggregate(B, SVT->getElementType(), - OriginalOperand, LI); - } + if (isTypeFirstElementAggregate(ToTy, FromTy)) + Output = loadFirstValueFromAggregate(B, ToTy, OriginalOperand, LI); // Destination is a smaller vector than source or different vector type. // - float3 v3 = vector4; // - float4 v2 = int4; else if (SVT && DVT) Output = loadVectorFromVector(B, SVT, DVT, OriginalOperand); - // Destination is the scalar type stored at the start of an aggregate. - // - struct S { float m }; - // - float v = s.m; - else if (SST && SST->getTypeAtIndex(0u) == ToTy) - Output = loadFirstValueFromAggregate(B, ToTy, OriginalOperand, LI); else if (SAT && DVT && SAT->getElementType() == DVT->getElementType()) Output = loadVectorFromArray(B, DVT, OriginalOperand); else @@ -334,7 +324,7 @@ class SPIRVLegalizePointerCast : public FunctionPass { Value *storeToFirstValueAggregate(IRBuilder<> &B, Value *Src, Value *Dst, Type *DstPointeeType, Align Alignment) { SmallVector Types = {Dst->getType(), Dst->getType()}; - SmallVector Args{/* isInBounds= */ B.getInt1(true), Dst}; + SmallVector Args{/* isInBounds= */ B.getInt1(true), Dst}; buildGEPIndexChain(B, Src->getType(), DstPointeeType, Args); auto *GEP = B.CreateIntrinsic(Intrinsic::spv_gep, {Types}, {Args}); GR->buildAssignPtr(B, Src->getType(), GEP); diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-array.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-array.ll new file mode 100644 index 0000000000000..5d45178715d70 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-array.ll @@ -0,0 +1,77 @@ +; RUN: llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[FLOAT:[0-9]+]] = OpTypeFloat 32 +; CHECK-DAG: %[[VEC4:[0-9]+]] = OpTypeVector %[[FLOAT]] 4 +; CHECK-DAG: %[[PTR_VEC4:[0-9]+]] = OpTypePointer Uniform %[[VEC4]] +; CHECK-DAG: %[[INT:[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: %[[PTR_INT:[0-9]+]] = OpTypePointer Uniform %[[INT]] +; CHECK-DAG: %[[INT64:[0-9]+]] = OpTypeInt 64 0 +; CHECK-DAG: %[[CONST_4:[0-9]+]] = OpConstant %[[INT]] 4{{$}} + +; CHECK-DAG: %[[ARRAY:[0-9]+]] = OpTypeArray %[[VEC4]] %[[CONST_4]] +; CHECK-DAG: %[[PTR_ARRAY:[0-9]+]] = OpTypePointer Uniform %[[ARRAY]] + +; CHECK-DAG: %[[STRUCT_INNER:[0-9]+]] = OpTypeStruct %[[ARRAY]] %[[INT]] +; CHECK-DAG: %[[STRUCT_CBUFFER:[0-9]+]] = OpTypeStruct %[[STRUCT_INNER]] +; CHECK-DAG: %[[PTR_CBUFFER:[0-9]+]] = OpTypePointer Uniform %[[STRUCT_CBUFFER]] + +; CHECK-DAG: OpDecorate %[[ARRAY]] ArrayStride 16 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_INNER]] 0 Offset 0 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_INNER]] 1 Offset 64 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_CBUFFER]] 0 Offset 0 +; CHECK-DAG: OpDecorate %[[STRUCT_CBUFFER]] Block + +; CHECK-DAG: %[[ZERO:[0-9]+]] = OpConstant %[[INT]] 0{{$}} +; CHECK-DAG: %[[ONE:[0-9]+]] = OpConstant %[[INT]] 1{{$}} + +; CHECK: %[[CBUFFER:[0-9]+]] = OpVariable %[[PTR_CBUFFER]] Uniform + +%__cblayout_MyCBuffer = type <{ [4 x <4 x float>], i32 }> + +@MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) poison +@colors = external hidden local_unnamed_addr addrspace(12) global [4 x <4 x float>], align 16 +@index = external hidden local_unnamed_addr addrspace(12) global i32, align 4 +@MyCBuffer.str = private unnamed_addr constant [10 x i8] c"MyCBuffer\00", align 1 +@.str = private unnamed_addr constant [7 x i8] c"output\00", align 1 + +declare target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_s___cblayout_MyCBuffers_2_0t(i32, i32, i32, i32, ptr) + +define void @main() #1 { +entry: +; Get pointers to the two elements of the cbuffer +; CHECK: %[[COPY:[0-9]+]] = OpCopyObject %[[PTR_CBUFFER]] %[[CBUFFER]] +; CHECK: %[[PTR_ARRAY_ACCESS:[0-9]+]] = OpAccessChain %[[PTR_ARRAY]] %[[COPY]] %[[ZERO]] %[[ZERO]] +; CHECK: %[[PTR_INT_ACCESS:[0-9]+]] = OpAccessChain %[[PTR_INT]] %[[COPY]] %[[ZERO]] %[[ONE]] + %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_s___cblayout_MyCBuffers_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8 + + %0 = tail call target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v4f32_12_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + +; CHECK: %[[VAL_INT:[0-9]+]] = OpLoad %[[INT]] %[[PTR_INT_ACCESS]] Aligned 4 + %1 = load i32, ptr addrspace(12) @index, align 4 + +; CHECK: %[[VAL_INT64:[0-9]+]] = OpSConvert %[[INT64]] %[[VAL_INT]] + %idxprom.i = sext i32 %1 to i64 + +; CHECK: %[[PTR_ELEM:[0-9]+]] = OpInBoundsAccessChain %[[PTR_VEC4]] %[[PTR_ARRAY_ACCESS]] %[[VAL_INT64]] + %arrayidx.i = getelementptr inbounds <4 x float>, ptr addrspace(12) @colors, i64 %idxprom.i + +; CHECK: %[[VAL_ELEM:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_ELEM]] Aligned 16 + %2 = load <4 x float>, ptr addrspace(12) %arrayidx.i, align 16 + +; CHECK: OpStore {{%[0-9]+}} %[[VAL_ELEM]] Aligned 16 + %3 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4f32_12_1t(target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 1) %0, i32 0) + store <4 x float> %2, ptr addrspace(11) %3, align 16 + ret void +} + +declare target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v4f32_12_1t(i32, i32, i32, i32, ptr) + +declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4f32_12_1t(target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 1), i32) + +attributes #1 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + +!hlsl.cbs = !{!0} + +!0 = !{ptr @MyCBuffer.cb, ptr addrspace(12) @colors, ptr addrspace(12) @index} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-simple.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-simple.ll new file mode 100644 index 0000000000000..1dd2c92bca09d --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-simple.ll @@ -0,0 +1,73 @@ +; RUN: llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[FLOAT:[0-9]+]] = OpTypeFloat 32 +; CHECK-DAG: %[[VEC4:[0-9]+]] = OpTypeVector %[[FLOAT]] 4 +; CHECK-DAG: %[[PTR_FLOAT:[0-9]+]] = OpTypePointer Uniform %[[FLOAT]] +; CHECK-DAG: %[[PTR_VEC4:[0-9]+]] = OpTypePointer Uniform %[[VEC4]] +; CHECK-DAG: %[[STRUCT:[0-9]+]] = OpTypeStruct %[[VEC4]] %[[FLOAT]] +; CHECK-DAG: %[[CBUFFER_TYPE:[0-9]+]] = OpTypeStruct %[[STRUCT]] +; CHECK-DAG: %[[PTR_CBUFFER:[0-9]+]] = OpTypePointer Uniform %[[CBUFFER_TYPE]] +; CHECK-DAG: %[[INT:[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: %[[ZERO:[0-9]+]] = OpConstant %[[INT]] 0{{$}} +; CHECK-DAG: %[[ONE:[0-9]+]] = OpConstant %[[INT]] 1{{$}} + +; CHECK-DAG: OpMemberDecorate %[[STRUCT]] 0 Offset 0 +; CHECK-DAG: OpMemberDecorate %[[STRUCT]] 1 Offset 16 +; CHECK-DAG: OpMemberDecorate %[[CBUFFER_TYPE]] 0 Offset 0 +; CHECK-DAG: OpDecorate %[[CBUFFER_TYPE]] Block + +; CHECK-DAG: %[[CBUFFER:[0-9]+]] = OpVariable %[[PTR_CBUFFER]] Uniform + +%__cblayout_MyCBuffer = type <{ <4 x float>, float }> + +@MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) poison +@color = external hidden local_unnamed_addr addrspace(12) global <4 x float>, align 16 +@factor = external hidden local_unnamed_addr addrspace(12) global float, align 4 +@MyCBuffer.str = private unnamed_addr constant [10 x i8] c"MyCBuffer\00", align 1 +@.str = private unnamed_addr constant [7 x i8] c"output\00", align 1 + +declare target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_s___cblayout_MyCBuffers_2_0t(i32, i32, i32, i32, ptr) + +define void @main() #1 { +entry: +; CHECK: %[[COPY:[0-9]+]] = OpCopyObject %[[PTR_CBUFFER]] %[[CBUFFER]] +; CHECK: %[[PTR_VEC4_ACCESS:[0-9]+]] = OpAccessChain %[[PTR_VEC4]] %[[COPY]] %[[ZERO]] %[[ZERO]] +; CHECK: %[[PTR_FLOAT_ACCESS:[0-9]+]] = OpAccessChain %[[PTR_FLOAT]] %[[COPY]] %[[ZERO]] %[[ONE]] + %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_s___cblayout_MyCBuffers_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8 + + %0 = tail call target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v4f32_12_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + %1 = tail call i32 @llvm.spv.thread.id.i32(i32 0) + %2 = tail call i32 @llvm.spv.thread.id.i32(i32 1) + %conv.i = uitofp i32 %1 to float + %conv2.i = uitofp i32 %2 to float + %3 = insertelement <4 x float> , float %conv.i, i64 0 + %vecinit5.i = insertelement <4 x float> %3, float %conv2.i, i64 1 + +; CHECK: %[[VAL_VEC4:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_VEC4_ACCESS]] Aligned 16 + %4 = load <4 x float>, ptr addrspace(12) @color, align 16 + %mul.i = fmul reassoc nnan ninf nsz arcp afn <4 x float> %vecinit5.i, %4 + +; CHECK: %[[VAL_FLOAT:[0-9]+]] = OpLoad %[[FLOAT]] %[[PTR_FLOAT_ACCESS]] Aligned 4 + %5 = load float, ptr addrspace(12) @factor, align 4 + + %splat.splatinsert.i = insertelement <4 x float> poison, float %5, i64 0 + %splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> poison, <4 x i32> zeroinitializer + %mul6.i = fmul reassoc nnan ninf nsz arcp afn <4 x float> %mul.i, %splat.splat.i + %6 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4f32_12_1t(target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 1) %0, i32 0) + store <4 x float> %mul6.i, ptr addrspace(11) %6, align 16 + ret void +} + +declare i32 @llvm.spv.thread.id.i32(i32) + +declare target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v4f32_12_1t(i32, i32, i32, i32, ptr) + +declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4f32_12_1t(target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 1), i32) + +attributes #1 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + +!hlsl.cbs = !{!0} + +!0 = !{ptr @MyCBuffer.cb, ptr addrspace(12) @color, ptr addrspace(12) @factor} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-struct.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-struct.ll new file mode 100644 index 0000000000000..60512fe3ed718 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer-struct.ll @@ -0,0 +1,158 @@ +; RUN: llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[FLOAT:[0-9]+]] = OpTypeFloat 32 +; CHECK-DAG: %[[VEC4:[0-9]+]] = OpTypeVector %[[FLOAT]] 4 +; CHECK-DAG: %[[PTR_VEC4:[0-9]+]] = OpTypePointer Uniform %[[VEC4]] +; CHECK-DAG: %[[INT:[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: %[[ZERO:[0-9]+]] = OpConstant %[[INT]] 0{{$}} + +; CHECK-DAG: %[[STRUCT_MATRIX:[0-9]+]] = OpTypeStruct %[[VEC4]] %[[VEC4]] %[[VEC4]] %[[VEC4]] +; CHECK-DAG: %[[PTR_MATRIX:[0-9]+]] = OpTypePointer Uniform %[[STRUCT_MATRIX]] +; CHECK-DAG: %[[PTR_FLOAT:[0-9]+]] = OpTypePointer Uniform %[[FLOAT]] + +; CHECK-DAG: %[[STRUCT_MYSTRUCT:[0-9]+]] = OpTypeStruct %[[STRUCT_MATRIX]] %[[STRUCT_MATRIX]] %[[STRUCT_MATRIX]] + +; CHECK-DAG: %[[PTR_MYSTRUCT:[0-9]+]] = OpTypePointer Uniform %[[STRUCT_MYSTRUCT]] +; CHECK-DAG: %[[STRUCT_INNER:[0-9]+]] = OpTypeStruct %[[STRUCT_MYSTRUCT]] %[[FLOAT]] + +; CHECK-DAG: %[[STRUCT_CBUFFER:[0-9]+]] = OpTypeStruct %[[STRUCT_INNER]] +; CHECK-DAG: %[[PTR_CBUFFER:[0-9]+]] = OpTypePointer Uniform %[[STRUCT_CBUFFER]] +; CHECK-DAG: %[[INT64:[0-9]+]] = OpTypeInt 64 0 + +; CHECK-DAG: OpMemberDecorate %[[STRUCT_CBUFFER]] 0 Offset 0 +; CHECK-DAG: OpDecorate %[[STRUCT_CBUFFER]] Block +; CHECK-DAG: OpMemberDecorate %[[STRUCT_INNER]] 0 Offset 0 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_INNER]] 1 Offset 192 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_MYSTRUCT]] 0 Offset 0 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_MYSTRUCT]] 1 Offset 64 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_MYSTRUCT]] 2 Offset 128 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_MATRIX]] 0 Offset 0 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_MATRIX]] 1 Offset 16 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_MATRIX]] 2 Offset 32 +; CHECK-DAG: OpMemberDecorate %[[STRUCT_MATRIX]] 3 Offset 48 + +; CHECK-DAG: %[[ONE:[0-9]+]] = OpConstant %[[INT]] 1{{$}} +; CHECK-DAG: %[[ZERO_64:[0-9]+]] = OpConstant %[[INT64]] 0{{$}} +; CHECK-DAG: %[[ONE_64:[0-9]+]] = OpConstant %[[INT64]] 1{{$}} +; CHECK-DAG: %[[TWO_64:[0-9]+]] = OpConstant %[[INT64]] 2{{$}} +; CHECK-DAG: %[[THREE_64:[0-9]+]] = OpConstant %[[INT64]] 3{{$}} + +; CHECK: %[[CBUFFER:[0-9]+]] = OpVariable %[[PTR_CBUFFER]] Uniform + +%__cblayout_MyCBuffer = type <{ %MyStruct, float }> +%MyStruct = type <{ %MyMatrix, %MyMatrix, %MyMatrix }> +%MyMatrix = type <{ <4 x float>, <4 x float>, <4 x float>, <4 x float> }> + +@MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) poison +@transforms = external hidden local_unnamed_addr addrspace(12) global %MyStruct, align 1 +@blend = external hidden local_unnamed_addr addrspace(12) global float, align 4 +@MyCBuffer.str = private unnamed_addr constant [10 x i8] c"MyCBuffer\00", align 1 +@.str = private unnamed_addr constant [7 x i8] c"output\00", align 1 + +declare target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_s___cblayout_MyCBuffers_2_0t(i32, i32, i32, i32, ptr) + +declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) + +define void @main() #3 { +entry: +; CHECK: %[[COPY:[0-9]+]] = OpCopyObject %[[PTR_CBUFFER]] %[[CBUFFER]] +; CHECK: %[[PTR_STRUCT:[0-9]+]] = OpAccessChain %[[PTR_MYSTRUCT]] %[[COPY]] %[[ZERO]] %[[ZERO]] +; CHECK: %[[PTR_FLOAT_VAL:[0-9]+]] = OpAccessChain %[[PTR_FLOAT]] %[[COPY]] %[[ZERO]] %[[ONE]] + %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_s___cblayout_MyCBuffers_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8 + + %0 = tail call target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v4f32_12_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) + %1 = tail call i32 @llvm.spv.thread.id.i32(i32 0) + %2 = tail call i32 @llvm.spv.thread.id.i32(i32 1) + %conv.i = uitofp i32 %1 to float + %conv2.i = uitofp i32 %2 to float + %3 = insertelement <4 x float> poison, float %conv.i, i64 0 + +; CHECK: %[[PTR_M0_V0:[0-9]+]] = OpAccessChain %[[PTR_VEC4]] %[[PTR_STRUCT]] %[[ZERO]] %[[ZERO]] +; CHECK: %[[VAL_M0_V0:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_M0_V0]] Aligned 16 + %4 = load <4 x float>, ptr addrspace(12) @transforms, align 16 + +; CHECK: %[[PTR_M0_V1:[0-9]+]] = OpInBoundsAccessChain %[[PTR_VEC4]] %[[PTR_STRUCT]] %[[ZERO_64]] %[[ONE_64]] +; CHECK: %[[VAL_M0_V1:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_M0_V1]] Aligned 16 + %5 = load <4 x float>, ptr addrspace(12) getelementptr inbounds nuw (i8, ptr addrspace(12) @transforms, i64 16), align 16 + +; CHECK: %[[PTR_M0_V3:[0-9]+]] = OpInBoundsAccessChain %[[PTR_VEC4]] %[[PTR_STRUCT]] %[[ZERO_64]] %[[THREE_64]] +; CHECK: %[[VAL_M0_V3:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_M0_V3]] Aligned 16 + %6 = load <4 x float>, ptr addrspace(12) getelementptr inbounds nuw (i8, ptr addrspace(12) @transforms, i64 48), align 16 + + %splat.splat.i18.i = shufflevector <4 x float> %3, <4 x float> poison, <4 x i32> zeroinitializer + %7 = insertelement <4 x float> poison, float %conv2.i, i64 0 + %splat.splat2.i19.i = shufflevector <4 x float> %7, <4 x float> poison, <4 x i32> zeroinitializer + %mul3.i20.i = fmul reassoc nnan ninf nsz arcp afn <4 x float> %splat.splat2.i19.i, %5 + %8 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i18.i, <4 x float> nofpclass(nan inf) %4, <4 x float> %mul3.i20.i) + %9 = fadd reassoc nnan ninf nsz arcp afn <4 x float> %8, %6 +; CHECK: %[[PTR_M1:[0-9]+]] = OpInBoundsAccessChain %[[PTR_MATRIX]] %[[PTR_STRUCT]] %[[ONE_64]] +; CHECK: %[[PTR_M1_V0:[0-9]+]] = OpAccessChain %[[PTR_VEC4]] %[[PTR_M1]] %[[ZERO]] +; CHECK: %[[VAL_M1_V0:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_M1_V0]] Aligned 16 + %10 = load <4 x float>, ptr addrspace(12) getelementptr inbounds nuw (i8, ptr addrspace(12) @transforms, i64 64), align 16 +; CHECK: %[[PTR_M1_V1:[0-9]+]] = OpInBoundsAccessChain %[[PTR_VEC4]] %[[PTR_STRUCT]] %[[ONE_64]] %[[ONE_64]] +; CHECK: %[[VAL_M1_V1:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_M1_V1]] Aligned 16 + %11 = load <4 x float>, ptr addrspace(12) getelementptr inbounds nuw (i8, ptr addrspace(12) @transforms, i64 80), align 16 +; CHECK: %[[PTR_M1_V2:[0-9]+]] = OpInBoundsAccessChain %[[PTR_VEC4]] %[[PTR_STRUCT]] %[[ONE_64]] %[[TWO_64]] +; CHECK: %[[VAL_M1_V2:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_M1_V2]] Aligned 16 + %12 = load <4 x float>, ptr addrspace(12) getelementptr inbounds nuw (i8, ptr addrspace(12) @transforms, i64 96), align 16 +; CHECK: %[[PTR_M1_V3:[0-9]+]] = OpInBoundsAccessChain %[[PTR_VEC4]] %[[PTR_STRUCT]] %[[ONE_64]] %[[THREE_64]] +; CHECK: %[[VAL_M1_V3:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_M1_V3]] Aligned 16 + %13 = load <4 x float>, ptr addrspace(12) getelementptr inbounds nuw (i8, ptr addrspace(12) @transforms, i64 112), align 16 + %splat.splat.i13.i = shufflevector <4 x float> %9, <4 x float> poison, <4 x i32> zeroinitializer + %splat.splat2.i14.i = shufflevector <4 x float> %9, <4 x float> poison, <4 x i32> + %mul3.i15.i = fmul reassoc nnan ninf nsz arcp afn <4 x float> %splat.splat2.i14.i, %11 + %14 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i13.i, <4 x float> nofpclass(nan inf) %10, <4 x float> %mul3.i15.i) + %splat.splat5.i16.i = shufflevector <4 x float> %9, <4 x float> poison, <4 x i32> + %15 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat5.i16.i, <4 x float> nofpclass(nan inf) %12, <4 x float> %14) + %splat.splat7.i17.i = shufflevector <4 x float> %9, <4 x float> poison, <4 x i32> + %16 = tail call reassoc nnan ninf nsz arcp afn noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat7.i17.i, <4 x float> nofpclass(nan inf) %13, <4 x float> %15) +; CHECK: %[[PTR_M2:[0-9]+]] = OpInBoundsAccessChain %[[PTR_MATRIX]] %[[PTR_STRUCT]] %[[TWO_64]] +; CHECK: %[[PTR_M2_V0:[0-9]+]] = OpAccessChain %[[PTR_VEC4]] %[[PTR_M2]] %[[ZERO]] +; CHECK: %[[VAL_M2_V0:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_M2_V0]] Aligned 16 + %17 = load <4 x float>, ptr addrspace(12) getelementptr inbounds nuw (i8, ptr addrspace(12) @transforms, i64 128), align 16 +; CHECK: %[[PTR_M2_V1:[0-9]+]] = OpInBoundsAccessChain %[[PTR_VEC4]] %[[PTR_STRUCT]] %[[TWO_64]] %[[ONE_64]] +; CHECK: %[[VAL_M2_V1:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_M2_V1]] Aligned 16 + %18 = load <4 x float>, ptr addrspace(12) getelementptr inbounds nuw (i8, ptr addrspace(12) @transforms, i64 144), align 16 +; CHECK: %[[PTR_M2_V2:[0-9]+]] = OpInBoundsAccessChain %[[PTR_VEC4]] %[[PTR_STRUCT]] %[[TWO_64]] %[[TWO_64]] +; CHECK: %[[VAL_M2_V2:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_M2_V2]] Aligned 16 + %19 = load <4 x float>, ptr addrspace(12) getelementptr inbounds nuw (i8, ptr addrspace(12) @transforms, i64 160), align 16 +; CHECK: %[[PTR_M2_V3:[0-9]+]] = OpInBoundsAccessChain %[[PTR_VEC4]] %[[PTR_STRUCT]] %[[TWO_64]] %[[THREE_64]] +; CHECK: %[[VAL_M2_V3:[0-9]+]] = OpLoad %[[VEC4]] %[[PTR_M2_V3]] Aligned 16 + %20 = load <4 x float>, ptr addrspace(12) getelementptr inbounds nuw (i8, ptr addrspace(12) @transforms, i64 176), align 16 + %splat.splat.i.i = shufflevector <4 x float> %16, <4 x float> poison, <4 x i32> zeroinitializer + %splat.splat2.i.i = shufflevector <4 x float> %16, <4 x float> poison, <4 x i32> + %mul3.i.i = fmul reassoc nnan ninf nsz arcp afn <4 x float> %splat.splat2.i.i, %18 + %21 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i.i, <4 x float> nofpclass(nan inf) %17, <4 x float> %mul3.i.i) + %splat.splat5.i.i = shufflevector <4 x float> %16, <4 x float> poison, <4 x i32> + %22 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat5.i.i, <4 x float> nofpclass(nan inf) %19, <4 x float> %21) + %splat.splat7.i.i = shufflevector <4 x float> %16, <4 x float> poison, <4 x i32> + %23 = tail call reassoc nnan ninf nsz arcp afn noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat7.i.i, <4 x float> nofpclass(nan inf) %20, <4 x float> %22) + %24 = load float, ptr addrspace(12) @blend, align 4 +; CHECK: %[[VAL_FLOAT:[0-9]+]] = OpLoad %[[FLOAT]] %[[PTR_FLOAT_VAL]] Aligned 4 +; CHECK: %[[SPLAT_INS:[0-9]+]] = OpCompositeInsert %[[VEC4]] %[[VAL_FLOAT]] {{.*}} 0 +; CHECK: %[[SPLAT:[0-9]+]] = OpVectorShuffle %[[VEC4]] %[[SPLAT_INS]] {{.*}} 0 0 0 0 +; CHECK: %[[RES:[0-9]+]] = OpFMul %[[VEC4]] {{%[0-9]+}} %[[SPLAT]] + %splat.splatinsert.i = insertelement <4 x float> poison, float %24, i64 0 + %splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> poison, <4 x i32> zeroinitializer + %mul.i = fmul reassoc nnan ninf nsz arcp afn <4 x float> %23, %splat.splat.i + %25 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4f32_12_1t(target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 1) %0, i32 0) + store <4 x float> %mul.i, ptr addrspace(11) %25, align 16 +; CHECK: OpStore {{%[0-9]+}} %[[RES]] Aligned 16 + ret void +} + +declare i32 @llvm.spv.thread.id.i32(i32) + +declare target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v4f32_12_1t(i32, i32, i32, i32, ptr) + +declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4f32_12_1t(target("spirv.VulkanBuffer", [0 x <4 x float>], 12, 1), i32) + +attributes #1 = { alwaysinline mustprogress nofree norecurse nosync nounwind willreturn memory(none) } +attributes #3 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } +attributes #4 = { mustprogress nofree nosync nounwind willreturn memory(none) } + +!hlsl.cbs = !{!0} + +!0 = !{ptr @MyCBuffer.cb, ptr addrspace(12) @transforms, ptr addrspace(12) @blend} \ No newline at end of file