[DirectX] Simplify DXIL data scalarization, and data scalarize whole GEP chains #168096

Icohedron · 2025-11-14T18:13:23Z

The DXIL data scalarizer only needs to change vectors into arrays. It does not need to change the types of GEPs to match the pointer type. This PR simplifies the visitGetElementPtrInst method to do just that while also accounting for nested GEPs from ConstantExprs. (Before this PR, there were still vector types lingering in nested GEPs with ConstantExprs.)
The equivalentArrayTypeFromVector function was awkwardly placed near the top of the file and away from the other helper functions. The function is now moved next to the other helper functions.
Removed an unnecessary || condition from isVectorOrArrayOfVectors

Related tests have also been cleaned up, and the test CHECKs have been modified to account for the new simplified behavior.

llvmbot · 2025-11-14T18:13:44Z

@llvm/pr-subscribers-backend-directx

Author: Deric C. (Icohedron)

Changes

The DXIL data scalarizer only needs to change vectors into arrays. It does not need to change the types of GEPs to match the pointer type. This PR simplifies the visitGetElementPtrInst method to do just that while also accounting for nested GEPs from ConstantExprs. (Before this PR, there were still vector types lingering in nested GEPs with ConstantExprs.)
The equivalentArrayTypeFromVector function was awkwardly placed near the top of the file and away from the other helper functions. The function is now moved next to the other helper functions.
Removed an unnecessary || condition from isVectorOrArrayOfVectors

Related tests have also been cleaned up, and the test CHECKs have been modified to account for the new simplified behavior.

Full diff: https://github.com/llvm/llvm-project/pull/168096.diff

4 Files Affected:

(modified) llvm/lib/Target/DirectX/DXILDataScalarization.cpp (+48-83)
(modified) llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll (+21-14)
(modified) llvm/test/CodeGen/DirectX/scalarize-alloca.ll (+3-3)
(modified) llvm/test/CodeGen/DirectX/scalarize-global.ll (+3-3)

diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index 9f1616f6960fe..5f18c37ef1125 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -29,20 +29,6 @@ static const int MaxVecSize = 4;
 
 using namespace llvm;
 
-// Recursively creates an array-like version of a given vector type.
-static Type *equivalentArrayTypeFromVector(Type *T) {
-  if (auto *VecTy = dyn_cast<VectorType>(T))
-    return ArrayType::get(VecTy->getElementType(),
-                          dyn_cast<FixedVectorType>(VecTy)->getNumElements());
-  if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
-    Type *NewElementType =
-        equivalentArrayTypeFromVector(ArrayTy->getElementType());
-    return ArrayType::get(NewElementType, ArrayTy->getNumElements());
-  }
-  // If it's not a vector or array, return the original type.
-  return T;
-}
-
 class DXILDataScalarizationLegacy : public ModulePass {
 
 public:
@@ -121,12 +107,25 @@ DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) {
 static bool isVectorOrArrayOfVectors(Type *T) {
   if (isa<VectorType>(T))
     return true;
-  if (ArrayType *ArrType = dyn_cast<ArrayType>(T))
-    return isa<VectorType>(ArrType->getElementType()) ||
-           isVectorOrArrayOfVectors(ArrType->getElementType());
+  if (ArrayType *ArrayTy = dyn_cast<ArrayType>(T))
+    return isVectorOrArrayOfVectors(ArrayTy->getElementType());
   return false;
 }
 
+// Recursively creates an array-like version of a given vector type.
+static Type *equivalentArrayTypeFromVector(Type *T) {
+  if (auto *VecTy = dyn_cast<VectorType>(T))
+    return ArrayType::get(VecTy->getElementType(),
+                          dyn_cast<FixedVectorType>(VecTy)->getNumElements());
+  if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
+    Type *NewElementType =
+        equivalentArrayTypeFromVector(ArrayTy->getElementType());
+    return ArrayType::get(NewElementType, ArrayTy->getNumElements());
+  }
+  // If it's not a vector or array, return the original type.
+  return T;
+}
+
 bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
   Type *AllocatedType = AI.getAllocatedType();
   if (!isVectorOrArrayOfVectors(AllocatedType))
@@ -135,7 +134,7 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
   IRBuilder<> Builder(&AI);
   Type *NewType = equivalentArrayTypeFromVector(AllocatedType);
   AllocaInst *ArrAlloca =
-      Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarize");
+      Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarized");
   ArrAlloca->setAlignment(AI.getAlign());
   AI.replaceAllUsesWith(ArrAlloca);
   AI.eraseFromParent();
@@ -303,78 +302,44 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
 bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
   GEPOperator *GOp = cast<GEPOperator>(&GEPI);
   Value *PtrOperand = GOp->getPointerOperand();
-  Type *NewGEPType = GOp->getSourceElementType();
-
-  // Unwrap GEP ConstantExprs to find the base operand and element type
-  while (auto *GEPCE = dyn_cast_or_null<GEPOperator>(
-             dyn_cast<ConstantExpr>(PtrOperand))) {
-    GOp = GEPCE;
-    PtrOperand = GEPCE->getPointerOperand();
-    NewGEPType = GEPCE->getSourceElementType();
-  }
-
-  Type *const OrigGEPType = NewGEPType;
-  Value *const OrigOperand = PtrOperand;
-
-  if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
-    NewGEPType = NewGlobal->getValueType();
-    PtrOperand = NewGlobal;
-  } else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) {
-    Type *AllocatedType = Alloca->getAllocatedType();
-    if (isa<ArrayType>(AllocatedType) &&
-        AllocatedType != GOp->getResultElementType())
-      NewGEPType = AllocatedType;
-  } else
-    return false; // Only GEPs into an alloca or global variable are considered
-
-  // Defer changing i8 GEP types until dxil-flatten-arrays
-  if (OrigGEPType->isIntegerTy(8))
-    NewGEPType = OrigGEPType;
-
-  // If the original type is a "sub-type" of the new type, then ensure the gep
-  // correctly zero-indexes the extra dimensions to keep the offset calculation
-  // correct.
-  // Eg:
-  //  i32, [4 x i32] and [8 x [4 x i32]] are sub-types of [8 x [4 x i32]], etc.
-  //
-  // So then:
-  //   gep [4 x i32] %idx
-  //     -> gep [8 x [4 x i32]], i32 0, i32 %idx
-  //   gep i32 %idx
-  //     -> gep [8 x [4 x i32]], i32 0, i32 0, i32 %idx
-  uint32_t MissingDims = 0;
-  Type *SubType = NewGEPType;
-
-  // The new type will be in its array version; so match accordingly.
-  Type *const GEPArrType = equivalentArrayTypeFromVector(OrigGEPType);
-
-  while (SubType != GEPArrType) {
-    MissingDims++;
-
-    ArrayType *ArrType = dyn_cast<ArrayType>(SubType);
-    if (!ArrType) {
-      assert(SubType == GEPArrType &&
-             "GEP uses an DXIL invalid sub-type of alloca/global variable");
-      break;
-    }
-
-    SubType = ArrType->getElementType();
+  Type *GEPType = GOp->getSourceElementType();
+
+  // Replace a GEP ConstantExpr pointer operand with a GEP instruction so that
+  // it can be visited
+  if (auto *PtrOpGEPCE = dyn_cast<ConstantExpr>(PtrOperand);
+      PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) {
+    GetElementPtrInst *OldGEPI =
+        cast<GetElementPtrInst>(PtrOpGEPCE->getAsInstruction());
+    OldGEPI->insertBefore(GEPI.getIterator());
+
+    IRBuilder<> Builder(&GEPI);
+    SmallVector<Value *> Indices(GEPI.indices());
+    Value *NewGEP =
+        Builder.CreateGEP(GEPI.getSourceElementType(), OldGEPI, Indices,
+                          GEPI.getName(), GEPI.getNoWrapFlags());
+    assert(isa<GetElementPtrInst>(NewGEP) &&
+           "Expected newly-created GEP to be an instruction");
+    GetElementPtrInst *NewGEPI = cast<GetElementPtrInst>(NewGEP);
+
+    GEPI.replaceAllUsesWith(NewGEPI);
+    GEPI.eraseFromParent();
+    visitGetElementPtrInst(*OldGEPI);
+    visitGetElementPtrInst(*NewGEPI);
+    return true;
   }
 
-  bool NeedsTransform = OrigOperand != PtrOperand ||
-                        OrigGEPType != NewGEPType || MissingDims != 0;
+  Type *NewGEPType = equivalentArrayTypeFromVector(GEPType);
+  Value *NewPtrOperand = PtrOperand;
+  if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand))
+    NewPtrOperand = NewGlobal;
 
+  bool NeedsTransform = NewPtrOperand != PtrOperand || NewGEPType != GEPType;
   if (!NeedsTransform)
     return false;
 
   IRBuilder<> Builder(&GEPI);
-  SmallVector<Value *, MaxVecSize> Indices;
-
-  for (uint32_t I = 0; I < MissingDims; I++)
-    Indices.push_back(Builder.getInt32(0));
-  llvm::append_range(Indices, GOp->indices());
-
-  Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices,
+  SmallVector<Value *, MaxVecSize> Indices(GOp->idx_begin(), GOp->idx_end());
+  Value *NewGEP = Builder.CreateGEP(NewGEPType, NewPtrOperand, Indices,
                                     GOp->getName(), GOp->getNoWrapFlags());
 
   GOp->replaceAllUsesWith(NewGEP);
diff --git a/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll
index 156a8e7c5c386..def886f933d08 100644
--- a/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll
+++ b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll
@@ -11,9 +11,10 @@ define void @CSMain() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE:%.*]] = alloca [4 x i32], align 16
 ;
-; SCHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1), i32 0, i32 2
-; SCHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(3) [[TMP0]], align 16
-; SCHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16
+; SCHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds [10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1
+; SCHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [10 x [4 x i32]], ptr addrspace(3) [[GEP0]], i32 0, i32 2
+; SCHECK-NEXT:    [[LOAD:%.*]] = load <4 x i32>, ptr addrspace(3) [[GEP1]], align 16
+; SCHECK-NEXT:    store <4 x i32> [[LOAD]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16
 ;
 ; FCHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE_I14:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 1
 ; FCHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE_I25:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 2
@@ -40,12 +41,13 @@ define void @Main() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[BFRAGPACKED_I:%.*]] = alloca i32, align 16
 ;
-; SCHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1
-; SCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(3) [[TMP0]], align 16
-; SCHECK-NEXT:    store i32 [[TMP1]], ptr [[BFRAGPACKED_I]], align 16
+; SCHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1
+; SCHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) [[GEP0]], i32 0, i32 1
+; SCHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(3) [[GEP1]], align 16
+; SCHECK-NEXT:    store i32 [[LOAD]], ptr [[BFRAGPACKED_I]], align 16
 ;
-; FCHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16
-; FCHECK-NEXT:    store i32 [[TMP0]], ptr [[BFRAGPACKED_I]], align 16
+; FCHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16
+; FCHECK-NEXT:    store i32 [[LOAD]], ptr [[BFRAGPACKED_I]], align 16
 ;
 ; CHECK-NEXT:    ret void
 entry:
@@ -57,10 +59,12 @@ entry:
 
 define void @global_nested_geps_3d() {
 ; CHECK-LABEL: define void @global_nested_geps_3d() {
-; SCHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1
-; SCHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+; SCHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1
+; SCHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[GEP0]], i32 0, i32 1
+; SCHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr [[GEP1]], i32 0, i32 1
+; SCHECK-NEXT:    load i32, ptr [[GEP2]], align 4
 ;
-; FCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4
+; FCHECK-NEXT:    load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4
 ;
 ; CHECK-NEXT:    ret void
   %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* @cTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4
@@ -69,10 +73,13 @@ define void @global_nested_geps_3d() {
 
 define void @global_nested_geps_4d() {
 ; CHECK-LABEL: define void @global_nested_geps_4d() {
-; SCHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x <2 x i32>]], ptr getelementptr inbounds ([2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1
-; SCHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+; SCHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1
+; SCHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], ptr [[GEP0]], i32 0, i32 1
+; SCHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[GEP1]], i32 0, i32 1
+; SCHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[GEP2]], i32 0, i32 1
+; SCHECK-NEXT:    load i32, ptr [[GEP3]], align 4
 ;
-; FCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4
+; FCHECK-NEXT:    load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4
 ;
 ; CHECK-NEXT:    ret void
   %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* getelementptr inbounds ([2 x [2 x [2 x <2 x i32>]]], [2 x [2 x [2 x <2 x i32>]]]* @dTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4
diff --git a/llvm/test/CodeGen/DirectX/scalarize-alloca.ll b/llvm/test/CodeGen/DirectX/scalarize-alloca.ll
index 475935d2eb135..85e3bb0185e44 100644
--- a/llvm/test/CodeGen/DirectX/scalarize-alloca.ll
+++ b/llvm/test/CodeGen/DirectX/scalarize-alloca.ll
@@ -48,7 +48,7 @@ define void @subtype_array_test() {
   ; SCHECK:  [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
   ; FCHECK:  [[alloca_val:%.*]] = alloca [32 x i32], align 4
   ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
+  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[alloca_val]], i32 [[tid]]
   ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
   ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
   ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
@@ -64,7 +64,7 @@ define void @subtype_vector_test() {
   ; SCHECK:  [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
   ; FCHECK:  [[alloca_val:%.*]] = alloca [32 x i32], align 4
   ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
+  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[alloca_val]], i32 [[tid]]
   ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
   ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
   ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
@@ -80,7 +80,7 @@ define void @subtype_scalar_test() {
   ; SCHECK:  [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
   ; FCHECK:  [[alloca_val:%.*]] = alloca [32 x i32], align 4
   ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 0, i32 [[tid]]
+  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i32, ptr [[alloca_val]], i32 [[tid]]
   ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
   ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
   ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
diff --git a/llvm/test/CodeGen/DirectX/scalarize-global.ll b/llvm/test/CodeGen/DirectX/scalarize-global.ll
index ca10f6ece5a85..c27dc4083bfd3 100644
--- a/llvm/test/CodeGen/DirectX/scalarize-global.ll
+++ b/llvm/test/CodeGen/DirectX/scalarize-global.ll
@@ -11,7 +11,7 @@
 ; CHECK-LABEL: subtype_array_test
 define <4 x i32> @subtype_array_test() {
   ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]]
+  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) [[arrayofVecData]], i32 [[tid]]
   ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
   ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
   ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
@@ -26,7 +26,7 @@ define <4 x i32> @subtype_array_test() {
 ; CHECK-LABEL: subtype_vector_test
 define <4 x i32> @subtype_vector_test() {
   ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]]
+  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) [[arrayofVecData]], i32 [[tid]]
   ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
   ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
   ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
@@ -41,7 +41,7 @@ define <4 x i32> @subtype_vector_test() {
 ; CHECK-LABEL: subtype_scalar_test
 define <4 x i32> @subtype_scalar_test() {
   ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
-  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 0, i32 [[tid]]
+  ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(3) [[arrayofVecData]], i32 [[tid]]
   ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
   ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
   ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]

…gep-cleanup

inbelic

Nice that we can keep it simple

Icohedron added 2 commits November 14, 2025 09:57

Simplify data scalarization GEP handling

7fb08b4

Data scalarize entire GEP chains

6132b19

Icohedron requested review from farzonl and inbelic November 14, 2025 18:13

Icohedron added the backend:DirectX label Nov 14, 2025

Icohedron added this to HLSL Support Nov 14, 2025

Merge remote-tracking branch 'upstream/main' into data-scalarization-…

29620ed

…gep-cleanup

inbelic approved these changes Nov 14, 2025

View reviewed changes

farzonl approved these changes Nov 18, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[DirectX] Simplify DXIL data scalarization, and data scalarize whole GEP chains #168096

[DirectX] Simplify DXIL data scalarization, and data scalarize whole GEP chains #168096

Icohedron commented Nov 14, 2025

Uh oh!

llvmbot commented Nov 14, 2025

Uh oh!

inbelic left a comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[DirectX] Simplify DXIL data scalarization, and data scalarize whole GEP chains #168096

Are you sure you want to change the base?

[DirectX] Simplify DXIL data scalarization, and data scalarize whole GEP chains #168096

Conversation

Icohedron commented Nov 14, 2025

Uh oh!

llvmbot commented Nov 14, 2025

Uh oh!

inbelic left a comment

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants