Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 142 additions & 10 deletions llvm/lib/Target/DirectX/DXILResourceAccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "DirectX.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/DXILResource.h"
#include "llvm/Frontend/HLSL/HLSLResource.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
Expand All @@ -20,6 +21,7 @@
#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/Utils/ValueMapper.h"

#define DEBUG_TYPE "dxil-resource-access"
Expand All @@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset,
APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
if (GEP->accumulateConstantOffset(DL, ConstantOffset)) {
APInt Scaled = ConstantOffset.udiv(ScalarSize);
return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled);
return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled);
}

auto IndexIt = GEP->idx_begin();
assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
"GEP is not indexing through pointer");
++IndexIt;
Value *Offset = *IndexIt;
assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
return Offset;
unsigned NumIndices = GEP->getNumIndices();

// If we have a single index we're indexing into a top level array. This
// generally only happens with cbuffers.
if (NumIndices == 1)
return *GEP->idx_begin();

// If we have two indices, this should be a simple access through a pointer.
if (NumIndices == 2) {
auto IndexIt = GEP->idx_begin();
assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
"GEP is not indexing through pointer");
++IndexIt;
Value *Offset = *IndexIt;
assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
return Offset;
}

llvm_unreachable("Unhandled GEP structure for resource access");
}

static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI,
Expand Down Expand Up @@ -171,6 +185,123 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) {
LI->replaceAllUsesWith(V);
}

namespace {
/// Helper for building a `load.cbufferrow` intrinsic given a simple type.
struct CBufferRowIntrin {
Intrinsic::ID IID;
Type *RetTy;
unsigned int EltSize;
unsigned int NumElts;

CBufferRowIntrin(const DataLayout &DL, Type *Ty) {
assert(Ty == Ty->getScalarType() && "Expected scalar type");

switch (DL.getTypeSizeInBits(Ty)) {
case 16:
IID = Intrinsic::dx_resource_load_cbufferrow_8;
RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty);
EltSize = 2;
NumElts = 8;
break;
case 32:
IID = Intrinsic::dx_resource_load_cbufferrow_4;
RetTy = StructType::get(Ty, Ty, Ty, Ty);
EltSize = 4;
NumElts = 4;
break;
case 64:
IID = Intrinsic::dx_resource_load_cbufferrow_2;
RetTy = StructType::get(Ty, Ty);
EltSize = 8;
NumElts = 2;
break;
default:
llvm_unreachable("Only 16, 32, and 64 bit types supported");
}
}
};
} // namespace

static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset,
dxil::ResourceTypeInfo &RTI) {
const DataLayout &DL = LI->getDataLayout();

Type *Ty = LI->getType();
assert(!isa<StructType>(Ty) && "Structs not handled yet");
CBufferRowIntrin Intrin(DL, Ty->getScalarType());

StringRef Name = LI->getName();
Value *Handle = II->getOperand(0);

IRBuilder<> Builder(LI);

ConstantInt *GlobalOffset = dyn_cast<ConstantInt>(II->getOperand(1));
assert(GlobalOffset && "CBuffer getpointer index must be constant");

unsigned int FixedOffset = GlobalOffset->getZExtValue();
// If we have a further constant offset we can just fold it in to the fixed
// offset.
if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since Offset is not optional would dyn_cast_or_null be better here?

Suggested change
if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) {
if (auto *ConstOffset = dyn_cast_or_null<ConstantInt>(Offset)) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dyn_cast_or_null and dyn_cast_if_present are the same thing. There are comments to the effect of or_null being deprecated from 2022, but it doesn't appear that anybody is really working towards that. I can switch to or_null for familiarity sake if you prefer.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll leave that up to you. I just had to look up dyn_cast_if_present because I have not seen it before. I had no idea dyn_cast_or_null was suggested to be deprecated.

FixedOffset += ConstOffset->getZExtValue();
Offset = nullptr;
}

Value *CurrentRow = ConstantInt::get(
Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes);
unsigned int CurrentIndex =
(FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;

assert(!(CurrentIndex && Offset) &&
"Dynamic indexing into elements of cbuffer rows is not supported");
if (Offset)
CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is Offset in bytes or rows? Is it being added to CurrentRow here, but on line 245 is it added to FixedOffset, which is in bytes.


auto *CBufLoad = Builder.CreateIntrinsic(
Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load");
auto *Elt =
Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract");

unsigned int Remaining =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming the only valid return values for Dl.getTypeSizeInBits(Ty) are 64,32, and 16. It looks like all possible values of Remaining are going to be zero.

Was there something else causing this check to be here?

((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
if (Remaining == 0) {
// We only have a single element, so we're done.
Value *Result = Elt;

// However, if we loaded a <1 x T>, then we need to adjust the type.
if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
assert(VT->getNumElements() == 1 && "Can't have multiple elements here");
Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are poisonvalues being used here instead of something like undef?

Builder.getInt32(0), Name);
}
LI->replaceAllUsesWith(Result);
return;
}

// Walk each element and extract it, wrapping to new rows as needed.
SmallVector<Value *> Extracts{Elt};
while (Remaining--) {
CurrentIndex %= Intrin.NumElts;

if (CurrentIndex == 0) {
CurrentRow = Builder.CreateAdd(CurrentRow,
ConstantInt::get(Builder.getInt32Ty(), 1));
CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID,
{Handle, CurrentRow}, nullptr,
Name + ".load");
}

Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
Name + ".extract"));
}

// Finally, we build up the original loaded value.
Value *Result = PoisonValue::get(Ty);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be undef too?

for (int I = 0, E = Extracts.size(); I < E; ++I)
Result = Builder.CreateInsertElement(
Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I));
LI->replaceAllUsesWith(Result);
}

static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
dxil::ResourceTypeInfo &RTI) {
switch (RTI.getResourceKind()) {
Expand All @@ -179,6 +310,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
case dxil::ResourceKind::RawBuffer:
case dxil::ResourceKind::StructuredBuffer:
return createRawLoad(II, LI, Offset);
case dxil::ResourceKind::CBuffer:
return createCBufferLoad(II, LI, Offset, RTI);
case dxil::ResourceKind::Texture1D:
case dxil::ResourceKind::Texture2D:
case dxil::ResourceKind::Texture2DMS:
Expand All @@ -190,9 +323,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
case dxil::ResourceKind::TextureCubeArray:
case dxil::ResourceKind::FeedbackTexture2D:
case dxil::ResourceKind::FeedbackTexture2DArray:
case dxil::ResourceKind::CBuffer:
case dxil::ResourceKind::TBuffer:
// TODO: handle these
reportFatalUsageError("Load not yet implemented for resource type");
return;
case dxil::ResourceKind::Sampler:
case dxil::ResourceKind::RTAccelerationStructure:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
;
; Tests for indexed types in dynamically indexed arrays in cbuffers.
;
; struct S {
; float x[2];
; uint q;
; };
; cbuffer CB : register(b0) {
; uint32_t3 w[3]; // offset 0, size 12 (+4) * 3
; S v[3]; // offset 48, size 24 (+8) * 3
; }
%S = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, i32 }>
%__cblayout_CB = type <{
<{
[2 x <{ <3 x i32>, target("dx.Padding", 4) }>],
<3 x i32>
}>,
target("dx.Padding", 4),
<{
[2 x <{ %S, target("dx.Padding", 8) }>], %S
}>
}>

@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison

; CHECK: define void @f
define void @f(ptr %dst, i32 %idx) {
entry:
%CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4

; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
%CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4

;; w[2].z
;
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
; CHECK: store i32 [[X]], ptr %dst
%w_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
%w_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %w_ptr, i32 40
%w_load = load i32, ptr addrspace(2) %w_gep, align 4
store i32 %w_load, ptr %dst, align 4

;; v[2].q
;
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8)
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
; CHECK: store i32 [[X]], ptr [[PTR]]
%v_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48)
%v_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %v_ptr, i32 84
%v_load = load i32, ptr addrspace(2) %v_gep, align 4
%v.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
store i32 %v_load, ptr %v.i, align 4

ret void
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
;
; Test for when we have indices into both the array and the vector: ie, s[1][3]

; cbuffer CB : register(b0) {
; uint4 s[2]; // offset 0, size 16 * 2
; }
%__cblayout_CB = type <{ [2 x <4 x i32>] }>

@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison

; CHECK: define void @f
define void @f(ptr %dst) {
entry:
%CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4

; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
%CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4

;; s[1].z
;
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
; CHECK: store i32 [[X]], ptr %dst
%i8_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
%i8_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %i8_ptr, i32 28
%i8_vecext = load i32, ptr addrspace(2) %i8_gep, align 4
store i32 %i8_vecext, ptr %dst, align 4

;; s[1].z
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
;; s[1].z
;; s[1].w

The code is accessing 4th element of the s[1] vector (here and in the second case below as well), and the test description also mentions s[1][3].

;
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
;;
;; It would be nice to avoid the redundant vector creation here, but that's
;; outside of the scope of this pass.
;;
; CHECK: [[X_VEC:%.*]] = insertelement <4 x i32> {{%.*}}, i32 [[X]], i32 3
; CHECK: [[X_EXT:%.*]] = extractelement <4 x i32> [[X_VEC]], i32 3
; CHECK: store i32 [[X_EXT]], ptr %dst
%typed_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
%typed_gep = getelementptr <4 x i32>, ptr addrspace(2) %typed_ptr, i32 1
%typed_load = load <4 x i32>, ptr addrspace(2) %typed_gep, align 16
%typed_vecext = extractelement <4 x i32> %typed_load, i32 3
store i32 %typed_vecext, ptr %dst, align 4

ret void
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s

; cbuffer CB : register(b0) {
; float a1[3];
; }
%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }>

@CB.cb = global target("dx.CBuffer", %__cblayout_CB) poison

; CHECK: define void @f
define void @f(ptr %dst) {
entry:
%CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4

;; a1[1]
;; Note that the valid GEPs of a1 are `0, 0, 0`, `0, 0, 1`, and `0, 1`.
;
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
; CHECK: store float [[X]], ptr %dst
%CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8
%a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
%a1_gep = getelementptr inbounds <{ [2 x <{ float, [12 x i8] }>], float }>, ptr addrspace(2) %a1_ptr, i32 0, i32 0, i32 1
%a1 = load float, ptr addrspace(2) %a1_gep, align 4
store float %a1, ptr %dst, align 32

ret void
}
Loading