[AMDGPU] Trim trailing undefs from the end of image and buffer store

Remove undef values from the end of the vector operand in image and buffer store instructions. Also instead of call to computeKnownFPClass, use only findScalarElement. Continuation of: 88421ea Trim zero components from buffer and image stores Differential Revision: https://reviews.llvm.org/D152440
llvm · Jun 15, 2023 · 7047cb5 · 7047cb5
1 parent 974b1a6
commit 7047cb5
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 8 deletions.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -385,17 +385,20 @@ static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV,
   APInt DemandedElts = APInt::getAllOnes(VWidth);
 
   for (int i = VWidth - 1; i > 0; --i) {
-    APInt DemandOneElt = APInt::getOneBitSet(VWidth, i);
-    KnownFPClass KnownFPClass =
-        computeKnownFPClass(UseV, DemandOneElt, IC.getDataLayout(),
-                            /*InterestedClasses=*/fcAllFlags,
-                            /*Depth=*/0, &IC.getTargetLibraryInfo(),
-                            &IC.getAssumptionCache(), I,
-                            &IC.getDominatorTree());
-    if (KnownFPClass.KnownFPClasses != fcPosZero)
+    auto *Elt = findScalarElement(UseV, i);
+    if (!Elt)
       break;
+
+    if (auto *ConstElt = dyn_cast<Constant>(Elt)) {
+      if (!ConstElt->isNullValue() && !isa<UndefValue>(Elt))
+        break;
+    } else {
+      break;
+    }
+
     DemandedElts.clearBit(i);
   }
+
   return DemandedElts;
 }
 

diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll
@@ -84,6 +84,19 @@ define amdgpu_ps void @struct_tbuffer_store_insert_zeros_at_beginning(<4 x i32>
   ret void
 }
 
+define amdgpu_ps void @struct_tbuffer_store_insert_undefs(<4 x i32> inreg %a, float %vdata1, i32 %b) {
+; GCN-LABEL: @struct_tbuffer_store_insert_undefs(
+; GCN-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float poison, float 1.000000e+00>, float [[VDATA1:%.*]], i64 0
+; GCN-NEXT:    call void @llvm.amdgcn.struct.tbuffer.store.v2f32(<2 x float> [[TMP1]], <4 x i32> [[A:%.*]], i32 [[B:%.*]], i32 0, i32 42, i32 0, i32 15)
+; GCN-NEXT:    ret void
+;
+  %newvdata1 = insertelement <4 x float> poison, float %vdata1, i32 0
+  %newvdata2 = insertelement <4 x float> %newvdata1, float 1.0, i32 1
+  call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %newvdata2, <4 x i32> %a, i32 %b, i32 0, i32 42, i32 0, i32 15)
+  ret void
+}
+
+
 declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2
 declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
 declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2