llvm · bogner · Oct 21, 2025 · Oct 23, 2025 · Oct 23, 2025 · hekota
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -10,6 +10,7 @@
 #include "DirectX.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/DXILResource.h"
+#include "llvm/Frontend/HLSL/HLSLResource.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
@@ -20,6 +21,7 @@
 #include "llvm/IR/IntrinsicsDirectX.h"
 #include "llvm/IR/User.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 
 #define DEBUG_TYPE "dxil-resource-access"
@@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset,
   APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
   if (GEP->accumulateConstantOffset(DL, ConstantOffset)) {
     APInt Scaled = ConstantOffset.udiv(ScalarSize);
-    return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled);
+    return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled);
   }
 
-  auto IndexIt = GEP->idx_begin();
-  assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
-         "GEP is not indexing through pointer");
-  ++IndexIt;
-  Value *Offset = *IndexIt;
-  assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
-  return Offset;
+  unsigned NumIndices = GEP->getNumIndices();
+
+  // If we have a single index we're indexing into a top level array. This
+  // generally only happens with cbuffers.
+  if (NumIndices == 1)
+    return *GEP->idx_begin();
+
+  // If we have two indices, this should be a simple access through a pointer.
+  if (NumIndices == 2) {
+    auto IndexIt = GEP->idx_begin();
+    assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
+           "GEP is not indexing through pointer");
+    ++IndexIt;
+    Value *Offset = *IndexIt;
+    assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
+    return Offset;
+  }
+
+  llvm_unreachable("Unhandled GEP structure for resource access");
 }
 
 static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI,
@@ -171,6 +185,123 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) {
   LI->replaceAllUsesWith(V);
 }
 
+namespace {
+/// Helper for building a `load.cbufferrow` intrinsic given a simple type.
+struct CBufferRowIntrin {
+  Intrinsic::ID IID;
+  Type *RetTy;
+  unsigned int EltSize;
+  unsigned int NumElts;
+
+  CBufferRowIntrin(const DataLayout &DL, Type *Ty) {
+    assert(Ty == Ty->getScalarType() && "Expected scalar type");
+
+    switch (DL.getTypeSizeInBits(Ty)) {
+    case 16:
+      IID = Intrinsic::dx_resource_load_cbufferrow_8;
+      RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty);
+      EltSize = 2;
+      NumElts = 8;
+      break;
+    case 32:
+      IID = Intrinsic::dx_resource_load_cbufferrow_4;
+      RetTy = StructType::get(Ty, Ty, Ty, Ty);
+      EltSize = 4;
+      NumElts = 4;
+      break;
+    case 64:
+      IID = Intrinsic::dx_resource_load_cbufferrow_2;
+      RetTy = StructType::get(Ty, Ty);
+      EltSize = 8;
+      NumElts = 2;
+      break;
+    default:
+      llvm_unreachable("Only 16, 32, and 64 bit types supported");
+    }
+  }
+};
+} // namespace
+
+static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset,
+                              dxil::ResourceTypeInfo &RTI) {
+  const DataLayout &DL = LI->getDataLayout();
+
+  Type *Ty = LI->getType();
+  assert(!isa<StructType>(Ty) && "Structs not handled yet");
+  CBufferRowIntrin Intrin(DL, Ty->getScalarType());
+
+  StringRef Name = LI->getName();
+  Value *Handle = II->getOperand(0);
+
+  IRBuilder<> Builder(LI);
+
+  ConstantInt *GlobalOffset = dyn_cast<ConstantInt>(II->getOperand(1));
+  assert(GlobalOffset && "CBuffer getpointer index must be constant");
+
+  unsigned int FixedOffset = GlobalOffset->getZExtValue();
+  // If we have a further constant offset we can just fold it in to the fixed
+  // offset.
+  if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) {
-  if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) {
+  if (auto *ConstOffset = dyn_cast_or_null<ConstantInt>(Offset)) {
-  if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) {
+  if (auto *ConstOffset = dyn_cast_or_null<ConstantInt>(Offset)) {
+    FixedOffset += ConstOffset->getZExtValue();
+    Offset = nullptr;
+  }
+
+  Value *CurrentRow = ConstantInt::get(
+      Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes);
+  unsigned int CurrentIndex =
+      (FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
+
+  assert(!(CurrentIndex && Offset) &&
+         "Dynamic indexing into elements of cbuffer rows is not supported");
+  if (Offset)
+    CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset;
+
+  auto *CBufLoad = Builder.CreateIntrinsic(
+      Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load");
+  auto *Elt =
+      Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract");
+
+  unsigned int Remaining =
+      ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
+  if (Remaining == 0) {
+    // We only have a single element, so we're done.
+    Value *Result = Elt;
+
+    // However, if we loaded a <1 x T>, then we need to adjust the type.
+    if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+      assert(VT->getNumElements() == 1 && "Can't have multiple elements here");
+      Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
+                                           Builder.getInt32(0), Name);
+    }
+    LI->replaceAllUsesWith(Result);
+    return;
+  }
+
+  // Walk each element and extract it, wrapping to new rows as needed.
+  SmallVector<Value *> Extracts{Elt};
+  while (Remaining--) {
+    CurrentIndex %= Intrin.NumElts;
+
+    if (CurrentIndex == 0) {
+      CurrentRow = Builder.CreateAdd(CurrentRow,
+                                     ConstantInt::get(Builder.getInt32Ty(), 1));
+      CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID,
+                                         {Handle, CurrentRow}, nullptr,
+                                         Name + ".load");
+    }
+
+    Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
+                                                  Name + ".extract"));
+  }
+
+  // Finally, we build up the original loaded value.
+  Value *Result = PoisonValue::get(Ty);
+  for (int I = 0, E = Extracts.size(); I < E; ++I)
+    Result = Builder.CreateInsertElement(
+        Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I));
+  LI->replaceAllUsesWith(Result);
+}
+
 static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
                                 dxil::ResourceTypeInfo &RTI) {
   switch (RTI.getResourceKind()) {
@@ -179,6 +310,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
   case dxil::ResourceKind::RawBuffer:
   case dxil::ResourceKind::StructuredBuffer:
     return createRawLoad(II, LI, Offset);
+  case dxil::ResourceKind::CBuffer:
+    return createCBufferLoad(II, LI, Offset, RTI);
   case dxil::ResourceKind::Texture1D:
   case dxil::ResourceKind::Texture2D:
   case dxil::ResourceKind::Texture2DMS:
@@ -190,9 +323,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
   case dxil::ResourceKind::TextureCubeArray:
   case dxil::ResourceKind::FeedbackTexture2D:
   case dxil::ResourceKind::FeedbackTexture2DArray:
-  case dxil::ResourceKind::CBuffer:
   case dxil::ResourceKind::TBuffer:
-    // TODO: handle these
+    reportFatalUsageError("Load not yet implemented for resource type");
     return;
   case dxil::ResourceKind::Sampler:
   case dxil::ResourceKind::RTAccelerationStructure:

diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll
@@ -0,0 +1,59 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+;
+; Tests for indexed types in dynamically indexed arrays in cbuffers.
+;
+; struct S {
+;   float x[2];
+;   uint q;
+; };
+; cbuffer CB : register(b0) {
+;   uint32_t3 w[3]; // offset  0, size 12 (+4) * 3
+;   S v[3];         // offset 48, size 24 (+8) * 3
+; }
+%S = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, i32 }>
+%__cblayout_CB = type <{
+  <{
+    [2 x <{ <3 x i32>, target("dx.Padding", 4) }>],
+    <3 x i32>
+  }>,
+  target("dx.Padding", 4),
+  <{
+    [2 x <{ %S, target("dx.Padding", 8) }>], %S
+  }>
+}>
+
+@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst, i32 %idx) {
+entry:
+  %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
+
+  ;; w[2].z
+  ;
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
+  ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
+  ; CHECK: store i32 [[X]], ptr %dst
+  %w_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+  %w_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %w_ptr, i32 40
+  %w_load = load i32, ptr addrspace(2) %w_gep, align 4
+  store i32 %w_load, ptr %dst, align 4
+
+  ;; v[2].q
+  ;
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8)
+  ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
+  ; CHECK: store i32 [[X]], ptr [[PTR]]
+  %v_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48)
+  %v_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %v_ptr, i32 84
+  %v_load = load i32, ptr addrspace(2) %v_gep, align 4
+  %v.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
+  store i32 %v_load, ptr %v.i, align 4
+
+  ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+;
+; Test for when we have indices into both the array and the vector: ie, s[1][3]
+
+; cbuffer CB : register(b0) {
+;   uint4 s[2]; // offset   0,  size 16        * 2
+; }
+%__cblayout_CB = type <{ [2 x <4 x i32>] }>
+
+@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst) {
+entry:
+  %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
+
+  ;; s[1].z
+  ;
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+  ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
+  ; CHECK: store i32 [[X]], ptr %dst
+  %i8_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+  %i8_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %i8_ptr, i32 28
+  %i8_vecext = load i32, ptr addrspace(2) %i8_gep, align 4
+  store i32 %i8_vecext, ptr %dst, align 4
+
+  ;; s[1].z
-  ;; s[1].z
+  ;; s[1].w
-  ;; s[1].z
+  ;; s[1].w
+  ;
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+  ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
+  ;;
+  ;; It would be nice to avoid the redundant vector creation here, but that's
+  ;; outside of the scope of this pass.
+  ;;
+  ; CHECK: [[X_VEC:%.*]] = insertelement <4 x i32> {{%.*}}, i32 [[X]], i32 3
+  ; CHECK: [[X_EXT:%.*]] = extractelement <4 x i32> [[X_VEC]], i32 3
+  ; CHECK: store i32 [[X_EXT]], ptr %dst
+  %typed_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+  %typed_gep = getelementptr <4 x i32>, ptr addrspace(2) %typed_ptr, i32 1
+  %typed_load = load <4 x i32>, ptr addrspace(2) %typed_gep, align 16
+  %typed_vecext = extractelement <4 x i32> %typed_load, i32 3
+  store i32 %typed_vecext, ptr %dst, align 4
+
+  ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+
+; cbuffer CB : register(b0) {
+;   float a1[3];
+; }
+%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }>
+
+@CB.cb = global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst) {
+entry:
+  %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
+
+  ;; a1[1]
+  ;; Note that the valid GEPs of a1 are `0, 0, 0`, `0, 0, 1`, and `0, 1`.
+  ;
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+  ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+  ; CHECK: store float [[X]], ptr %dst
+  %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8
+  %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+  %a1_gep = getelementptr inbounds <{ [2 x <{ float, [12 x i8] }>], float }>, ptr addrspace(2) %a1_ptr, i32 0, i32 0, i32 1
+  %a1 = load float, ptr addrspace(2) %a1_gep, align 4
+  store float %a1, ptr %dst, align 32
+
+  ret void
+}