Skip to content

Commit

Permalink
[DAG] Convert truncstore(extend(x)) back to store(x)
Browse files Browse the repository at this point in the history
Pulled out of D106237, this folds truncstore(extend(x)) back to store(x)
if the original store was legal. This can come up due to the order we
fold nodes. A fold from X86 needs to be adjusted to prevent infinite
loops, to have it pick the operand of a trunc more directly.

Differential Revision: https://reviews.llvm.org/D117901
  • Loading branch information
davemgreen committed Jan 22, 2022
1 parent 0283b07 commit b27e545
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 9 deletions.
9 changes: 9 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -18396,6 +18396,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Value.getValueType().isInteger() &&
(!isa<ConstantSDNode>(Value) ||
!cast<ConstantSDNode>(Value)->isOpaque())) {
// Convert a truncating store of a extension into a standard store.
if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
Value.getOpcode() == ISD::SIGN_EXTEND ||
Value.getOpcode() == ISD::ANY_EXTEND) &&
Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
ST->getMemOperand());

APInt TruncDemandedBits =
APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
ST->getMemoryVT().getScalarSizeInBits());
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -48170,7 +48170,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) &&
St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) {
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue());
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32,
St->getValue().getOperand(0));
return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(),
MVT::v16i8, St->getMemOperand());
}
Expand Down
15 changes: 7 additions & 8 deletions llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
Expand Up @@ -543,17 +543,16 @@ define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_mov_b32 s8, s2
; VI-NEXT: s_mov_b32 s9, s3
; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1
; VI-NEXT: buffer_load_ubyte v2, off, s[8:11], 0 offset:2
; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:2
; VI-NEXT: s_mov_b32 s4, s0
; VI-NEXT: s_mov_b32 s5, s1
; VI-NEXT: s_waitcnt vmcnt(2)
; VI-NEXT: s_waitcnt vmcnt(1)
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(2)
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:1
; VI-NEXT: s_waitcnt vmcnt(2)
; VI-NEXT: buffer_store_byte v2, off, s[4:7], 0 offset:2
; VI-NEXT: s_waitcnt vmcnt(1)
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:2
; VI-NEXT: v_lshrrev_b16_e32 v0, 8, v0
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 offset:1
; VI-NEXT: s_endpgm
%val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 1
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 1
Expand Down

0 comments on commit b27e545

Please sign in to comment.