Skip to content

Commit

Permalink
[AMDGPU] Trim trailing undefs from the end of image and buffer store
Browse files Browse the repository at this point in the history
Remove undef values from the end of the vector operand in image and
buffer store instructions.
Also instead of call to computeKnownFPClass, use only findScalarElement.

Continuation of: 88421ea Trim zero components from buffer and image stores

Differential Revision: https://reviews.llvm.org/D152440
  • Loading branch information
Mateja Marjanovic committed Jun 15, 2023
1 parent 974b1a6 commit 7047cb5
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 8 deletions.
19 changes: 11 additions & 8 deletions llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,17 +385,20 @@ static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV,
APInt DemandedElts = APInt::getAllOnes(VWidth);

for (int i = VWidth - 1; i > 0; --i) {
APInt DemandOneElt = APInt::getOneBitSet(VWidth, i);
KnownFPClass KnownFPClass =
computeKnownFPClass(UseV, DemandOneElt, IC.getDataLayout(),
/*InterestedClasses=*/fcAllFlags,
/*Depth=*/0, &IC.getTargetLibraryInfo(),
&IC.getAssumptionCache(), I,
&IC.getDominatorTree());
if (KnownFPClass.KnownFPClasses != fcPosZero)
auto *Elt = findScalarElement(UseV, i);
if (!Elt)
break;

if (auto *ConstElt = dyn_cast<Constant>(Elt)) {
if (!ConstElt->isNullValue() && !isa<UndefValue>(Elt))
break;
} else {
break;
}

DemandedElts.clearBit(i);
}

return DemandedElts;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,19 @@ define amdgpu_ps void @struct_tbuffer_store_insert_zeros_at_beginning(<4 x i32>
ret void
}

define amdgpu_ps void @struct_tbuffer_store_insert_undefs(<4 x i32> inreg %a, float %vdata1, i32 %b) {
; GCN-LABEL: @struct_tbuffer_store_insert_undefs(
; GCN-NEXT: [[TMP1:%.*]] = insertelement <2 x float> <float poison, float 1.000000e+00>, float [[VDATA1:%.*]], i64 0
; GCN-NEXT: call void @llvm.amdgcn.struct.tbuffer.store.v2f32(<2 x float> [[TMP1]], <4 x i32> [[A:%.*]], i32 [[B:%.*]], i32 0, i32 42, i32 0, i32 15)
; GCN-NEXT: ret void
;
%newvdata1 = insertelement <4 x float> poison, float %vdata1, i32 0
%newvdata2 = insertelement <4 x float> %newvdata1, float 1.0, i32 1
call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %newvdata2, <4 x i32> %a, i32 %b, i32 0, i32 42, i32 0, i32 15)
ret void
}


declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2
declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2
Expand Down

0 comments on commit 7047cb5

Please sign in to comment.