Skip to content

Emit array GEPs in memcpy/memset legalization #148886

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 15, 2025

Conversation

Icohedron
Copy link
Contributor

Fixes #148089

@llvmbot
Copy link
Member

llvmbot commented Jul 15, 2025

@llvm/pr-subscribers-backend-directx

Author: Deric C. (Icohedron)

Changes

Fixes #148089


Full diff: https://github.com/llvm/llvm-project/pull/148886.diff

3 Files Affected:

  • (modified) llvm/lib/Target/DirectX/DXILLegalizePass.cpp (+11-11)
  • (modified) llvm/test/CodeGen/DirectX/legalize-memcpy.ll (+28-28)
  • (modified) llvm/test/CodeGen/DirectX/legalize-memset.ll (+10-10)
diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index 76a46c7a2b760..6eff77f8d51e5 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -347,7 +347,6 @@ static void emitMemcpyExpansion(IRBuilder<> &Builder, Value *Dst, Value *Src,
   if (ByteLength == 0)
     return;
 
-  LLVMContext &Ctx = Builder.getContext();
   const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
 
   auto GetArrTyFromVal = [](Value *Val) -> ArrayType * {
@@ -392,10 +391,11 @@ static void emitMemcpyExpansion(IRBuilder<> &Builder, Value *Dst, Value *Src,
   assert(ByteLength % DstElemByteSize == 0 &&
          "memcpy length must be divisible by array element type");
   for (uint64_t I = 0; I < NumElemsToCopy; ++I) {
-    Value *Offset = ConstantInt::get(Type::getInt32Ty(Ctx), I);
-    Value *SrcPtr = Builder.CreateInBoundsGEP(SrcElemTy, Src, Offset, "gep");
+    SmallVector<Value *, 2> Indices = {Builder.getInt32(0),
+                                       Builder.getInt32(I)};
+    Value *SrcPtr = Builder.CreateInBoundsGEP(SrcArrTy, Src, Indices, "gep");
     Value *SrcVal = Builder.CreateLoad(SrcElemTy, SrcPtr);
-    Value *DstPtr = Builder.CreateInBoundsGEP(DstElemTy, Dst, Offset, "gep");
+    Value *DstPtr = Builder.CreateInBoundsGEP(DstArrTy, Dst, Indices, "gep");
     Builder.CreateStore(SrcVal, DstPtr);
   }
 }
@@ -403,7 +403,6 @@ static void emitMemcpyExpansion(IRBuilder<> &Builder, Value *Dst, Value *Src,
 static void emitMemsetExpansion(IRBuilder<> &Builder, Value *Dst, Value *Val,
                                 ConstantInt *SizeCI,
                                 DenseMap<Value *, Value *> &ReplacedValues) {
-  LLVMContext &Ctx = Builder.getContext();
   [[maybe_unused]] const DataLayout &DL =
       Builder.GetInsertBlock()->getModule()->getDataLayout();
   [[maybe_unused]] uint64_t OrigSize = SizeCI->getZExtValue();
@@ -444,8 +443,9 @@ static void emitMemsetExpansion(IRBuilder<> &Builder, Value *Dst, Value *Val,
   }
 
   for (uint64_t I = 0; I < Size; ++I) {
-    Value *Offset = ConstantInt::get(Type::getInt32Ty(Ctx), I);
-    Value *Ptr = Builder.CreateGEP(ElemTy, Dst, Offset, "gep");
+    Value *Zero = Builder.getInt32(0);
+    Value *Offset = Builder.getInt32(I);
+    Value *Ptr = Builder.CreateGEP(ArrTy, Dst, {Zero, Offset}, "gep");
     Builder.CreateStore(TypedVal, Ptr);
   }
 }
@@ -478,9 +478,9 @@ static void legalizeMemCpy(Instruction &I,
   ToRemove.push_back(CI);
 }
 
-static void removeMemSet(Instruction &I,
-                         SmallVectorImpl<Instruction *> &ToRemove,
-                         DenseMap<Value *, Value *> &ReplacedValues) {
+static void legalizeMemSet(Instruction &I,
+                           SmallVectorImpl<Instruction *> &ToRemove,
+                           DenseMap<Value *, Value *> &ReplacedValues) {
 
   CallInst *CI = dyn_cast<CallInst>(&I);
   if (!CI)
@@ -603,7 +603,7 @@ class DXILLegalizationPipeline {
     LegalizationPipeline[Stage1].push_back(legalizeGetHighLowi64Bytes);
     LegalizationPipeline[Stage1].push_back(legalizeFreeze);
     LegalizationPipeline[Stage1].push_back(legalizeMemCpy);
-    LegalizationPipeline[Stage1].push_back(removeMemSet);
+    LegalizationPipeline[Stage1].push_back(legalizeMemSet);
     LegalizationPipeline[Stage1].push_back(updateFnegToFsub);
     // Note: legalizeGetHighLowi64Bytes and
     // downcastI64toI32InsertExtractElements both modify extractelement, so they
diff --git a/llvm/test/CodeGen/DirectX/legalize-memcpy.ll b/llvm/test/CodeGen/DirectX/legalize-memcpy.ll
index 0cf9d83b3af25..55bdefc12aa77 100644
--- a/llvm/test/CodeGen/DirectX/legalize-memcpy.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-memcpy.ll
@@ -6,9 +6,9 @@ define void @replace_int_memcpy_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [1 x i32], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [1 x i32], align 4
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [1 x i32], ptr [[TMP1]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [1 x i32], ptr [[TMP2]], i32 0, i32 0
 ; CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP1]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -23,17 +23,17 @@ define void @replace_3int_memcpy_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [3 x i32], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [3 x i32], align 4
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP2]], i32 0, i32 0
 ; CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP1]], align 4
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[GEP2]], align 4
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP2]], i32 0, i32 1
 ; CHECK-NEXT:    store i32 [[TMP4]], ptr [[GEP3]], align 4
-; CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2
+; CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[GEP4]], align 4
-; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 2
+; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP2]], i32 0, i32 2
 ; CHECK-NEXT:    store i32 [[TMP5]], ptr [[GEP5]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -48,13 +48,13 @@ define void @replace_mismatched_size_int_memcpy_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x i32], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [3 x i32], align 4
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP2]], i32 0, i32 0
 ; CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP1]], align 4
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[GEP2]], align 4
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP2]], i32 0, i32 1
 ; CHECK-NEXT:    store i32 [[TMP4]], ptr [[GEP3]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -69,13 +69,13 @@ define void @replace_int16_memcpy_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x i16], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [2 x i16], align 2
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [2 x i16], ptr [[TMP1]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[GEP]], align 2
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x i16], ptr [[TMP2]], i32 0, i32 0
 ; CHECK-NEXT:    store i16 [[TMP3]], ptr [[GEP1]], align 2
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x i16], ptr [[TMP1]], i32 0, i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[GEP2]], align 2
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 1
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x i16], ptr [[TMP2]], i32 0, i32 1
 ; CHECK-NEXT:    store i16 [[TMP4]], ptr [[GEP3]], align 2
 ; CHECK-NEXT:    ret void
 ;
@@ -90,13 +90,13 @@ define void @replace_float_memcpy_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x float], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [2 x float], align 4
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [2 x float], ptr [[TMP1]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[GEP]], align 4
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x float], ptr [[TMP2]], i32 0, i32 0
 ; CHECK-NEXT:    store float [[TMP3]], ptr [[GEP1]], align 4
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x float], ptr [[TMP1]], i32 0, i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[GEP2]], align 4
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 1
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x float], ptr [[TMP2]], i32 0, i32 1
 ; CHECK-NEXT:    store float [[TMP4]], ptr [[GEP3]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -111,13 +111,13 @@ define void @replace_double_memcpy_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x double], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [2 x double], align 4
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [2 x double], ptr [[TMP1]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load double, ptr [[GEP]], align 8
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x double], ptr [[TMP2]], i32 0, i32 0
 ; CHECK-NEXT:    store double [[TMP3]], ptr [[GEP1]], align 8
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x double], ptr [[TMP1]], i32 0, i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load double, ptr [[GEP2]], align 8
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x double], ptr [[TMP2]], i32 0, i32 1
 ; CHECK-NEXT:    store double [[TMP4]], ptr [[GEP3]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -132,13 +132,13 @@ define void @replace_half_memcpy_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x half], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [2 x half], align 2
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [2 x half], ptr [[TMP1]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[GEP]], align 2
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds half, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x half], ptr [[TMP2]], i32 0, i32 0
 ; CHECK-NEXT:    store half [[TMP3]], ptr [[GEP1]], align 2
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i32 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x half], ptr [[TMP1]], i32 0, i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load half, ptr [[GEP2]], align 2
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds half, ptr [[TMP2]], i32 1
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x half], ptr [[TMP2]], i32 0, i32 1
 ; CHECK-NEXT:    store half [[TMP4]], ptr [[GEP3]], align 2
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/CodeGen/DirectX/legalize-memset.ll b/llvm/test/CodeGen/DirectX/legalize-memset.ll
index e97817ba824ed..a73e7378cfb9f 100644
--- a/llvm/test/CodeGen/DirectX/legalize-memset.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-memset.ll
@@ -6,9 +6,9 @@ define void @replace_float_memset_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[ACCUM_I_FLAT:%.*]] = alloca [2 x float], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[ACCUM_I_FLAT]])
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, ptr [[ACCUM_I_FLAT]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [2 x float], ptr [[ACCUM_I_FLAT]], i32 0, i32 0
 ; CHECK-NEXT:    store float 0.000000e+00, ptr [[GEP]], align 4
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr float, ptr [[ACCUM_I_FLAT]], i32 1
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr [2 x float], ptr [[ACCUM_I_FLAT]], i32 0, i32 1
 ; CHECK-NEXT:    store float 0.000000e+00, ptr [[GEP1]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[ACCUM_I_FLAT]])
 ; CHECK-NEXT:    ret void
@@ -25,9 +25,9 @@ define void @replace_half_memset_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[ACCUM_I_FLAT:%.*]] = alloca [2 x half], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr half, ptr [[ACCUM_I_FLAT]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [2 x half], ptr [[ACCUM_I_FLAT]], i32 0, i32 0
 ; CHECK-NEXT:    store half 0xH0000, ptr [[GEP]], align 2
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr half, ptr [[ACCUM_I_FLAT]], i32 1
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr [2 x half], ptr [[ACCUM_I_FLAT]], i32 0, i32 1
 ; CHECK-NEXT:    store half 0xH0000, ptr [[GEP1]], align 2
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
 ; CHECK-NEXT:    ret void
@@ -44,9 +44,9 @@ define void @replace_double_memset_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[ACCUM_I_FLAT:%.*]] = alloca [2 x double], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[ACCUM_I_FLAT]])
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr double, ptr [[ACCUM_I_FLAT]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [2 x double], ptr [[ACCUM_I_FLAT]], i32 0, i32 0
 ; CHECK-NEXT:    store double 0.000000e+00, ptr [[GEP]], align 8
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr double, ptr [[ACCUM_I_FLAT]], i32 1
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr [2 x double], ptr [[ACCUM_I_FLAT]], i32 0, i32 1
 ; CHECK-NEXT:    store double 0.000000e+00, ptr [[GEP1]], align 8
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[ACCUM_I_FLAT]])
 ; CHECK-NEXT:    ret void
@@ -63,9 +63,9 @@ define void @replace_int16_memset_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[CACHE_I:%.*]] = alloca [2 x i16], align 2
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[CACHE_I]])
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr [[CACHE_I]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [2 x i16], ptr [[CACHE_I]], i32 0, i32 0
 ; CHECK-NEXT:    store i16 0, ptr [[GEP]], align 2
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i16, ptr [[CACHE_I]], i32 1
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr [2 x i16], ptr [[CACHE_I]], i32 0, i32 1
 ; CHECK-NEXT:    store i16 0, ptr [[GEP1]], align 2
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[CACHE_I]])
 ; CHECK-NEXT:    ret void
@@ -82,7 +82,7 @@ define void @replace_int_memset_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[ACCUM_I_FLAT:%.*]] = alloca [1 x i32], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[ACCUM_I_FLAT]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [1 x i32], ptr [[ACCUM_I_FLAT]], i32 0, i32 0
 ; CHECK-NEXT:    store i32 0, ptr [[GEP]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
 ; CHECK-NEXT:    ret void
@@ -102,7 +102,7 @@ define void @replace_int_memset_to_var_test() #0 {
 ; CHECK-NEXT:    store i32 1, ptr [[I]], align 4
 ; CHECK-NEXT:    [[I8_LOAD:%.*]] = load i32, ptr [[I]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[ACCUM_I_FLAT]], i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [1 x i32], ptr [[ACCUM_I_FLAT]], i32 0, i32 0
 ; CHECK-NEXT:    store i32 [[I8_LOAD]], ptr [[GEP]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]])
 ; CHECK-NEXT:    ret void

@Icohedron Icohedron merged commit a9021e5 into llvm:main Jul 15, 2025
8 of 10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[DirectX] GEPs from memset and memcpy legalization fail to validate due to Explicit load/store type does not match pointee type of pointer operand
6 participants