-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[RISCV] Legalize misaligned unmasked vp.load/vp.store to vle8/vse8. #167745
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
If vector-unaligned-mem support is not enabled, we should not generate loads/stores that are not aligned to their element size. We already do this for non-VP vector loads/stores. This code has been in our downstream for about a year and a half after finding the vectorizer generating misaligned loads/stores. I don't think that is unique to our downstream, but I'm not sure.
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesIf vector-unaligned-mem support is not enabled, we should not generate loads/stores that are not aligned to their element size. We already do this for non-VP vector loads/stores. This code has been in our downstream for about a year and a half after finding the vectorizer generating misaligned loads/stores. I don't think that is unique to our downstream, but I'm not sure. Full diff: https://github.com/llvm/llvm-project/pull/167745.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5a081d54d0726..10ab1ec97794d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -6853,6 +6853,97 @@ SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
Store->getMemOperand()->getFlags());
}
+// While RVV has alignment restrictions, we should always be able to load as a
+// legal equivalently-sized byte-typed vector instead. This method is
+// responsible for re-expressing a ISD::VP_LOAD via a correctly-aligned type. If
+// the load is already correctly-aligned, it returns SDValue().
+SDValue RISCVTargetLowering::expandUnalignedVPLoad(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *Load = cast<VPLoadSDNode>(Op);
+ assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
+
+ if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ Load->getMemoryVT(),
+ *Load->getMemOperand()))
+ return SDValue();
+
+ SDValue Mask = Load->getMask();
+
+ // FIXME: Handled masked loads somehow.
+ if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
+ return SDValue();
+
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ unsigned EltSizeBits = VT.getScalarSizeInBits();
+ assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
+ "Unexpected unaligned RVV load type");
+ MVT NewVT =
+ MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
+ assert(NewVT.isValid() &&
+ "Expecting equally-sized RVV vector types to be legal");
+
+ SDValue VL = Load->getVectorLength();
+ VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
+ DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
+
+ SDValue L = DAG.getLoadVP(NewVT, DL, Load->getChain(), Load->getBasePtr(),
+ DAG.getAllOnesConstant(DL, Mask.getValueType()), VL,
+ Load->getPointerInfo(), Load->getBaseAlign(),
+ Load->getMemOperand()->getFlags(), AAMDNodes());
+ return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
+}
+
+// While RVV has alignment restrictions, we should always be able to store as a
+// legal equivalently-sized byte-typed vector instead. This method is
+// responsible for re-expressing a ISD::VP STORE via a correctly-aligned type.
+// It returns SDValue() if the store is already correctly aligned.
+SDValue RISCVTargetLowering::expandUnalignedVPStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *Store = cast<VPStoreSDNode>(Op);
+ assert(Store && Store->getValue().getValueType().isVector() &&
+ "Expected vector store");
+
+ if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ Store->getMemoryVT(),
+ *Store->getMemOperand()))
+ return SDValue();
+
+ SDValue Mask = Store->getMask();
+
+ // FIXME: Handled masked stores somehow.
+ if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue StoredVal = Store->getValue();
+ MVT VT = StoredVal.getSimpleValueType();
+ unsigned EltSizeBits = VT.getScalarSizeInBits();
+ assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
+ "Unexpected unaligned RVV store type");
+ MVT NewVT =
+ MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
+ assert(NewVT.isValid() &&
+ "Expecting equally-sized RVV vector types to be legal");
+
+ SDValue VL = Store->getVectorLength();
+ VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
+ DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
+
+ StoredVal = DAG.getBitcast(NewVT, StoredVal);
+
+ LocationSize Size = LocationSize::precise(NewVT.getStoreSize());
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ Store->getPointerInfo(), Store->getMemOperand()->getFlags(), Size,
+ Store->getBaseAlign());
+
+ return DAG.getStoreVP(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
+ DAG.getUNDEF(Store->getBasePtr().getValueType()),
+ DAG.getAllOnesConstant(DL, Mask.getValueType()), VL,
+ NewVT, MMO, ISD::UNINDEXED);
+}
+
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
@@ -8408,13 +8499,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFixedLengthVectorStoreToRVV(Op, DAG);
return Op;
}
- case ISD::MLOAD:
case ISD::VP_LOAD:
+ if (SDValue V = expandUnalignedVPLoad(Op, DAG))
+ return V;
+ [[fallthrough]];
+ case ISD::MLOAD:
return lowerMaskedLoad(Op, DAG);
case ISD::VP_LOAD_FF:
return lowerLoadFF(Op, DAG);
- case ISD::MSTORE:
case ISD::VP_STORE:
+ if (SDValue V = expandUnalignedVPStore(Op, DAG))
+ return V;
+ [[fallthrough]];
+ case ISD::MSTORE:
return lowerMaskedStore(Op, DAG);
case ISD::VECTOR_COMPRESS:
return lowerVectorCompress(Op, DAG);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 5cc427c867cfd..616664306bcab 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -576,6 +576,9 @@ class RISCVTargetLowering : public TargetLowering {
SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue expandUnalignedVPLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue expandUnalignedVPStore(SDValue Op, SelectionDAG &DAG) const;
+
SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
index c3fe6b335d3da..2b800c449953b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
@@ -180,8 +180,8 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
; CHECK-LABEL: shuffle1:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 252
-; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vsetivli zero, 12, e8, m1, ta, ma
+; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: vmv.v.i v0, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vslidedown.vi v10, v10, 1, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
index edfa4a7560949..3a26af0279d50 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
@@ -612,6 +612,19 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
ret <vscale x 16 x double> %lo
}
+define <vscale x 1 x i64> @unaligned_vpload_nxv1i64_allones_mask(<vscale x 1 x i64>* %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: unaligned_vpload_nxv1i64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(<vscale x 1 x i64>* align 1 %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ ret <vscale x 1 x i64> %load
+}
+
define <vscale x 8 x i8> @vpload_all_active_nxv8i8(ptr %ptr) {
; CHECK-LABEL: vpload_all_active_nxv8i8:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
index 3b406656a4dd6..982ec218e4688 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
@@ -511,6 +511,19 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
ret void
}
+define void @unaligned_vpstore_nxv1i64_allones_mask(<vscale x 1 x i64> %val, <vscale x 1 x i64>* %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: unaligned_vpstore_nxv1i64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, <vscale x 1 x i64>* align 1 %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ ret void
+}
+
define void @vpstore_all_active_nxv8i8(<vscale x 8 x i8> %val, ptr %ptr) {
; CHECK-LABEL: vpstore_all_active_nxv8i8:
; CHECK: # %bb.0:
|
|
Ping |
preames
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
🐧 Linux x64 Test Results
|
If vector-unaligned-mem support is not enabled, we should not generate loads/stores that are not aligned to their element size.
We already do this for non-VP vector loads/stores.
This code has been in our downstream for about a year and a half after finding the vectorizer generating misaligned loads/stores. I don't think that is unique to our downstream, but I'm not sure.
Doing this for masked vp.load/store requires widening the mask as well which is harder to do.
NOTE: Because we have to scale the VL, this will introduce additional vsetvli and the VL optimizer will not be effective at optimizing any arithmetic that is consumed by the store.