diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 363c71d84694f..9084f727538ae 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -9903,8 +9903,6 @@ SDValue SelectionDAG::getLoadVP( MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges, bool IsExpanding) { - assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - MMOFlags |= MachineMemOperand::MOLoad; assert((MMOFlags & MachineMemOperand::MOStore) == 0); // If we don't have a PtrInfo, infer the trivial frame index case to simplify @@ -9926,6 +9924,11 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + assert(Mask.getValueType().getVectorElementCount() == + VT.getVectorElementCount() && + "Vector width mismatch between mask and data"); + bool Indexed = AM != ISD::UNINDEXED; assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); @@ -10021,6 +10024,10 @@ SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, ISD::MemIndexedMode AM, bool IsTruncating, bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + assert(Mask.getValueType().getVectorElementCount() == + Val.getValueType().getVectorElementCount() && + "Vector width mismatch between mask and data"); + bool Indexed = AM != ISD::UNINDEXED; assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5a081d54d0726..42badefc5d839 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6853,6 +6853,99 @@ SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, Store->getMemOperand()->getFlags()); } +// While RVV has alignment restrictions, we should always be able to load as a +// legal equivalently-sized byte-typed vector instead. This method is +// responsible for re-expressing a ISD::VP_LOAD via a correctly-aligned type. If +// the load is already correctly-aligned, it returns SDValue(). +SDValue RISCVTargetLowering::expandUnalignedVPLoad(SDValue Op, + SelectionDAG &DAG) const { + auto *Load = cast(Op); + assert(Load && Load->getMemoryVT().isVector() && "Expected vector load"); + + if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + Load->getMemoryVT(), + *Load->getMemOperand())) + return SDValue(); + + SDValue Mask = Load->getMask(); + + // FIXME: Handled masked loads somehow. + if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) + return SDValue(); + + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + unsigned EltSizeBits = VT.getScalarSizeInBits(); + assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && + "Unexpected unaligned RVV load type"); + MVT NewVT = + MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); + assert(NewVT.isValid() && + "Expecting equally-sized RVV vector types to be legal"); + + SDValue VL = Load->getVectorLength(); + VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL, + DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType())); + + MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount()); + SDValue L = DAG.getLoadVP(NewVT, DL, Load->getChain(), Load->getBasePtr(), + DAG.getAllOnesConstant(DL, MaskVT), VL, + Load->getPointerInfo(), Load->getBaseAlign(), + Load->getMemOperand()->getFlags(), AAMDNodes()); + return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL); +} + +// While RVV has alignment restrictions, we should always be able to store as a +// legal equivalently-sized byte-typed vector instead. This method is +// responsible for re-expressing a ISD::VP STORE via a correctly-aligned type. +// It returns SDValue() if the store is already correctly aligned. +SDValue RISCVTargetLowering::expandUnalignedVPStore(SDValue Op, + SelectionDAG &DAG) const { + auto *Store = cast(Op); + assert(Store && Store->getValue().getValueType().isVector() && + "Expected vector store"); + + if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + Store->getMemoryVT(), + *Store->getMemOperand())) + return SDValue(); + + SDValue Mask = Store->getMask(); + + // FIXME: Handled masked stores somehow. + if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) + return SDValue(); + + SDLoc DL(Op); + SDValue StoredVal = Store->getValue(); + MVT VT = StoredVal.getSimpleValueType(); + unsigned EltSizeBits = VT.getScalarSizeInBits(); + assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && + "Unexpected unaligned RVV store type"); + MVT NewVT = + MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); + assert(NewVT.isValid() && + "Expecting equally-sized RVV vector types to be legal"); + + SDValue VL = Store->getVectorLength(); + VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL, + DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType())); + + StoredVal = DAG.getBitcast(NewVT, StoredVal); + + LocationSize Size = LocationSize::precise(NewVT.getStoreSize()); + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + Store->getPointerInfo(), Store->getMemOperand()->getFlags(), Size, + Store->getBaseAlign()); + + MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount()); + return DAG.getStoreVP(Store->getChain(), DL, StoredVal, Store->getBasePtr(), + DAG.getUNDEF(Store->getBasePtr().getValueType()), + DAG.getAllOnesConstant(DL, MaskVT), VL, NewVT, MMO, + ISD::UNINDEXED); +} + static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { assert(Op.getValueType() == MVT::i64 && "Unexpected VT"); @@ -8408,13 +8501,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerFixedLengthVectorStoreToRVV(Op, DAG); return Op; } - case ISD::MLOAD: case ISD::VP_LOAD: + if (SDValue V = expandUnalignedVPLoad(Op, DAG)) + return V; + [[fallthrough]]; + case ISD::MLOAD: return lowerMaskedLoad(Op, DAG); case ISD::VP_LOAD_FF: return lowerLoadFF(Op, DAG); - case ISD::MSTORE: case ISD::VP_STORE: + if (SDValue V = expandUnalignedVPStore(Op, DAG)) + return V; + [[fallthrough]]; + case ISD::MSTORE: return lowerMaskedStore(Op, DAG); case ISD::VECTOR_COMPRESS: return lowerVectorCompress(Op, DAG); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 5cc427c867cfd..616664306bcab 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -576,6 +576,9 @@ class RISCVTargetLowering : public TargetLowering { SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; + SDValue expandUnalignedVPLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue expandUnalignedVPStore(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll index c3fe6b335d3da..2b800c449953b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll @@ -180,8 +180,8 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) { ; CHECK-LABEL: shuffle1: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, a0, 252 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vsetivli zero, 12, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: vmv.v.i v0, 1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vslidedown.vi v10, v10, 1, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index edfa4a7560949..3a26af0279d50 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -612,6 +612,19 @@ define @vpload_nxv17f64(ptr %ptr, ptr %out, %lo } +define @unaligned_vpload_nxv1i64_allones_mask(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: unaligned_vpload_nxv1i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call @llvm.vp.load.nxv1i64.p0(* align 1 %ptr, %b, i32 %evl) + ret %load +} + define @vpload_all_active_nxv8i8(ptr %ptr) { ; CHECK-LABEL: vpload_all_active_nxv8i8: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index 3b406656a4dd6..982ec218e4688 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -511,6 +511,19 @@ define void @vpstore_nxv17f64( %val, ptr %ptr, %val, * %ptr, i32 zeroext %evl) { +; CHECK-LABEL: unaligned_vpstore_nxv1i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + call void @llvm.vp.store.nxv1i64.p0( %val, * align 1 %ptr, %b, i32 %evl) + ret void +} + define void @vpstore_all_active_nxv8i8( %val, ptr %ptr) { ; CHECK-LABEL: vpstore_all_active_nxv8i8: ; CHECK: # %bb.0: