Skip to content

Commit

Permalink
[SelectionDAG] Widen scalable-vector loads/stores via VP_LOAD/VP_STORE
Browse files Browse the repository at this point in the history
This patch fixes a compiler crash when widening scalable-vector loads
and stores which end up breaking down to element-wise store operations.
It does so by providing a way for targets with support for
vector-predicated loads and stores to use those instead. By widening the
operation but maintaining the original effective operation length via
the EVL, only the intended vector elements are loaded or stored.

This method should in theory be possible and even preferred for
fixed-length vector types, but all fixed-length types can be broken down
into their elements, and regardless I have observed regressions in the
generated code when doing so. I believe this is simply due to
VP_LOAD/VP_STORE not being up to par with LOAD/STORE in terms of
optimization. It does improve performance on smaller self-contained
examples, however, so the potential is there.

While the only target that benefits from this is RISCV, the legalization
is generic and so was placed centrally.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D111248
  • Loading branch information
frasercrmck committed Nov 10, 2021
1 parent f0d997c commit 332318f
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 27 deletions.
92 changes: 73 additions & 19 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4178,23 +4178,48 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
else
Result = GenWidenVectorLoads(LdChain, LD);

if (!Result)
report_fatal_error("Unable to widen vector load");
if (Result) {
// If we generate a single load, we can use that for the chain. Otherwise,
// build a factor node to remember the multiple loads are independent and
// chain to that.
SDValue NewChain;
if (LdChain.size() == 1)
NewChain = LdChain[0];
else
NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);

// If we generate a single load, we can use that for the chain. Otherwise,
// build a factor node to remember the multiple loads are independent and
// chain to that.
SDValue NewChain;
if (LdChain.size() == 1)
NewChain = LdChain[0];
else
NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
// Modified the chain - switch anything that used the old chain to use
// the new one.
ReplaceValueWith(SDValue(N, 1), NewChain);

// Modified the chain - switch anything that used the old chain to use
// the new one.
ReplaceValueWith(SDValue(N, 1), NewChain);
return Result;
}

return Result;
// Generate a vector-predicated load if it is custom/legal on the target. To
// avoid possible recursion, only do this if the widened mask type is legal.
// FIXME: Not all targets may support EVL in VP_LOAD. These will have been
// removed from the IR by the ExpandVectorPredication pass but we're
// reintroducing them here.
EVT LdVT = LD->getMemoryVT();
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT);
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WideVT.getVectorElementCount());
if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() &&
TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
TLI.isTypeLegal(WideMaskVT)) {
SDLoc DL(N);
SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
unsigned NumVTElts = LdVT.getVectorMinNumElements();
SDValue EVL =
DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
const auto *MMO = LD->getMemOperand();
return DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask,
EVL, MMO->getPointerInfo(), MMO->getAlign(),
MMO->getFlags(), MMO->getAAInfo());
}

report_fatal_error("Unable to widen vector load");
}

SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
Expand Down Expand Up @@ -5035,13 +5060,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
return TLI.scalarizeVectorStore(ST, DAG);

SmallVector<SDValue, 16> StChain;
if (!GenWidenVectorStores(StChain, ST))
report_fatal_error("Unable to widen vector store");
if (GenWidenVectorStores(StChain, ST)) {
if (StChain.size() == 1)
return StChain[0];

return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
}

if (StChain.size() == 1)
return StChain[0];
// Generate a vector-predicated store if it is custom/legal on the target.
// To avoid possible recursion, only do this if the widened mask type is
// legal.
// FIXME: Not all targets may support EVL in VP_STORE. These will have been
// removed from the IR by the ExpandVectorPredication pass but we're
// reintroducing them here.
SDValue StVal = ST->getValue();
EVT StVT = StVal.getValueType();
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WideVT.getVectorElementCount());
if (WideVT.isScalableVector() &&
TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
TLI.isTypeLegal(WideMaskVT)) {
// Widen the value.
SDLoc DL(N);
StVal = GetWidenedVector(StVal);
SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
unsigned NumVTElts = StVT.getVectorMinNumElements();
SDValue EVL =
DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
const auto *MMO = ST->getMemOperand();
return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask,
EVL, MMO->getPointerInfo(), MMO->getAlign(),
MMO->getFlags(), MMO->getAAInfo());
}

return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
report_fatal_error("Unable to widen vector store");
}

SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
Expand Down
25 changes: 21 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,33 @@
; RUN: not --crash llc -mtriple=riscv32 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s
; RUN: not --crash llc -mtriple=riscv64 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s

; Check that we are able to legalize scalable-vector loads that require widening.

; FIXME: LLVM can't yet widen scalable-vector loads.

define <vscale x 3 x i8> @load_nxv3i8(<vscale x 3 x i8>* %ptr) {
; CHECK-LABEL: load_nxv3i8:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 3
; CHECK-NEXT: slli a2, a1, 1
; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: ret
%v = load <vscale x 3 x i8>, <vscale x 3 x i8>* %ptr
ret <vscale x 3 x i8> %v
}

define <vscale x 5 x half> @load_nxv5f16(<vscale x 5 x half>* %ptr) {
; CHECK-LABEL: load_nxv5f16:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 3
; CHECK-NEXT: slli a2, a1, 2
; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: ret
%v = load <vscale x 5 x half>, <vscale x 5 x half>* %ptr
ret <vscale x 5 x half> %v
}
25 changes: 21 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,33 @@
; RUN: not --crash llc -mtriple=riscv32 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s
; RUN: not --crash llc -mtriple=riscv64 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s

; Check that we are able to legalize scalable-vector stores that require widening.

; FIXME: LLVM can't yet widen scalable-vector stores.

define void @store_nxv3i8(<vscale x 3 x i8> %val, <vscale x 3 x i8>* %ptr) {
; CHECK-LABEL: store_nxv3i8:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 3
; CHECK-NEXT: slli a2, a1, 1
; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <vscale x 3 x i8> %val, <vscale x 3 x i8>* %ptr
ret void
}

define void @store_nxv7f64(<vscale x 7 x double> %val, <vscale x 7 x double>* %ptr) {
; CHECK-LABEL: store_nxv7f64:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 3
; CHECK-NEXT: slli a2, a1, 3
; CHECK-NEXT: sub a1, a2, a1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vse64.v v8, (a0)
; CHECK-NEXT: ret
store <vscale x 7 x double> %val, <vscale x 7 x double>* %ptr
ret void
}

0 comments on commit 332318f

Please sign in to comment.