Skip to content

Commit

Permalink
[SDAG] Refine MMO size when converting masked load/store to normal lo…
Browse files Browse the repository at this point in the history
…ad/store

After D113888 / 32b6c17 the MMO size of a masked loads/store is
unknown. When we are converting back to a standard load/store because
the mask is known all ones, we can refine that to the correct size from
the size of the vector being loaded/stored.

Differential Revision: https://reviews.llvm.org/D114582
  • Loading branch information
davemgreen committed Dec 8, 2021
1 parent e6d0b85 commit 5d7efd4
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 3 deletions.
10 changes: 7 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -10067,7 +10067,9 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
!MST->isCompressingStore() && !MST->isTruncatingStore())
return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
MST->getBasePtr(), MST->getMemOperand());
MST->getBasePtr(), MST->getPointerInfo(),
MST->getOriginalAlign(), MachineMemOperand::MOStore,
MST->getAAInfo());

// Try transforming N to an indexed store.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
Expand Down Expand Up @@ -10122,8 +10124,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
// FIXME: Can we do this for indexed, expanding, or extending loads?
if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
!MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
MLD->getBasePtr(), MLD->getMemOperand());
SDValue NewLd = DAG.getLoad(
N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
MLD->getPointerInfo(), MLD->getOriginalAlign(),
MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
return CombineTo(N, NewLd, NewLd.getValue(1));
}

Expand Down
38 changes: 38 additions & 0 deletions llvm/test/CodeGen/X86/masked_loadstore_split.ll
@@ -0,0 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 -stop-after=finalize-isel | FileCheck %s

define void @split_masked_store(<8 x double>* %0) {
; CHECK-LABEL: name: split_masked_store
; CHECK: bb.0.entry:
; CHECK-NEXT: liveins: $rdi
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
; CHECK-NEXT: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0
; CHECK-NEXT: [[VMOVAPSYrm:%[0-9]+]]:vr256 = VMOVAPSYrm $rip, 1, $noreg, %const.0, $noreg :: (load (s256) from constant-pool)
; CHECK-NEXT: VMASKMOVPDYmr [[COPY]], 1, $noreg, 32, $noreg, killed [[VMOVAPSYrm]], [[AVX_SET0_]] :: (store unknown-size into %ir.0 + 32, align 8)
; CHECK-NEXT: VMOVUPDYmr [[COPY]], 1, $noreg, 0, $noreg, [[AVX_SET0_]] :: (store (s256) into %ir.0, align 8)
; CHECK-NEXT: RET 0
entry:
call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> zeroinitializer, <8 x double>* %0, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>)
ret void
}

define <8 x double> @split_masked_load(<8 x double>* %0) {
; CHECK-LABEL: name: split_masked_load
; CHECK: bb.0.entry:
; CHECK-NEXT: liveins: $rdi
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
; CHECK-NEXT: [[VMOVAPSYrm:%[0-9]+]]:vr256 = VMOVAPSYrm $rip, 1, $noreg, %const.0, $noreg :: (load (s256) from constant-pool)
; CHECK-NEXT: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm killed [[VMOVAPSYrm]], [[COPY]], 1, $noreg, 32, $noreg :: (load unknown-size from %ir.0 + 32, align 8)
; CHECK-NEXT: [[VMOVUPDYrm:%[0-9]+]]:vr256 = VMOVUPDYrm [[COPY]], 1, $noreg, 0, $noreg :: (load (s256) from %ir.0, align 8)
; CHECK-NEXT: $ymm0 = COPY [[VMOVUPDYrm]]
; CHECK-NEXT: $ymm1 = COPY [[VMASKMOVPDYrm]]
; CHECK-NEXT: RET 0, $ymm0, $ymm1
entry:
%x = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %0, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <8 x double> poison)
ret <8 x double> %x
}

declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)

0 comments on commit 5d7efd4

Please sign in to comment.