-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[IA] Add masked.load/store support for shuffle (de)interleave load/store #150241
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This completes the basic support for masked.laod and masked.store in InterleaveAccess. The backend already added via the intrinsic lowering path and the common code structure (in RISCV at least). Note that this isn't enough to enable in LV yet. We still need support for recognizing an interleaved mask via a shufflevector in getMask.
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesThis completes the basic support for masked.laod and masked.store in InterleaveAccess. The backend already added via the intrinsic lowering path and the common code structure (in RISCV at least). Note that this isn't enough to enable in LV yet. We still need support for recognizing an interleaved mask via a shufflevector in getMask. Full diff: https://github.com/llvm/llvm-project/pull/150241.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 1b691881d67dd..65565b98508c7 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -253,6 +253,21 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
return false;
}
+static Value *getMaskOperand(IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unexpected intrinsic");
+ case Intrinsic::vp_load:
+ return II->getOperand(1);
+ case Intrinsic::masked_load:
+ return II->getOperand(2);
+ case Intrinsic::vp_store:
+ return II->getOperand(2);
+ case Intrinsic::masked_store:
+ return II->getOperand(3);
+ }
+}
+
// Return the corresponded deinterleaved mask, or nullptr if there is no valid
// mask.
static Value *getMask(Value *WideMask, unsigned Factor,
@@ -268,8 +283,12 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
if (isa<ScalableVectorType>(Load->getType()))
return false;
- if (auto *LI = dyn_cast<LoadInst>(Load);
- LI && !LI->isSimple())
+ auto *LI = dyn_cast<LoadInst>(Load);
+ auto *II = dyn_cast<IntrinsicInst>(Load);
+ if (!LI && !II)
+ return false;
+
+ if (LI && !LI->isSimple())
return false;
// Check if all users of this load are shufflevectors. If we encounter any
@@ -322,7 +341,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
// Holds the corresponding index for each DE-interleave shuffle.
SmallVector<unsigned, 4> Indices;
- Type *VecTy = FirstSVI->getType();
+ VectorType *VecTy = cast<VectorType>(FirstSVI->getType());
// Check if other shufflevectors are also DE-interleaved of the same type
// and factor as the first shufflevector.
@@ -360,13 +379,16 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load);
Value *Mask = nullptr;
- if (auto *VPLoad = dyn_cast<VPIntrinsic>(Load)) {
- Mask = getMask(VPLoad->getMaskParam(), Factor, cast<VectorType>(VecTy));
+ if (LI) {
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n");
+ } else {
+ // Check mask operand. Handle both all-true/false and interleaved mask.
+ Mask = getMask(getMaskOperand(II), Factor, VecTy);
if (!Mask)
return false;
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load: " << *Load << "\n");
- } else {
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n");
+
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load or masked.load: "
+ << *Load << "\n");
}
// Try to create target specific intrinsics to replace the load and
@@ -483,15 +505,16 @@ bool InterleavedAccessImpl::tryReplaceExtracts(
bool InterleavedAccessImpl::lowerInterleavedStore(
Instruction *Store, SmallSetVector<Instruction *, 32> &DeadInsts) {
Value *StoredValue;
- if (auto *SI = dyn_cast<StoreInst>(Store)) {
+ auto *SI = dyn_cast<StoreInst>(Store);
+ auto *II = dyn_cast<IntrinsicInst>(Store);
+ if (SI) {
if (!SI->isSimple())
return false;
StoredValue = SI->getValueOperand();
- } else if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) {
- assert(VPStore->getIntrinsicID() == Intrinsic::vp_store);
- StoredValue = VPStore->getArgOperand(0);
} else {
- llvm_unreachable("unsupported store operation");
+ assert(II->getIntrinsicID() == Intrinsic::vp_store ||
+ II->getIntrinsicID() == Intrinsic::masked_store);
+ StoredValue = II->getArgOperand(0);
}
auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
@@ -508,18 +531,18 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
"number of stored element should be a multiple of Factor");
Value *Mask = nullptr;
- if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) {
+ if (SI) {
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
+ } else {
+ // Check mask operand. Handle both all-true/false and interleaved mask.
unsigned LaneMaskLen = NumStoredElements / Factor;
- Mask = getMask(VPStore->getMaskParam(), Factor,
+ Mask = getMask(getMaskOperand(II), Factor,
ElementCount::getFixed(LaneMaskLen));
if (!Mask)
return false;
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store: " << *Store
- << "\n");
-
- } else {
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: "
+ << *Store << "\n");
}
// Try to create target specific intrinsics to replace the store and
@@ -592,19 +615,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
assert(II);
// Check mask operand. Handle both all-true/false and interleaved mask.
- Value *WideMask;
- switch (II->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::vp_load:
- WideMask = II->getOperand(1);
- break;
- case Intrinsic::masked_load:
- WideMask = II->getOperand(2);
- break;
- }
-
- Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI));
+ Mask = getMask(getMaskOperand(II), Factor, getDeinterleavedVectorType(DI));
if (!Mask)
return false;
@@ -642,18 +653,7 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
Value *Mask = nullptr;
if (II) {
// Check mask operand. Handle both all-true/false and interleaved mask.
- Value *WideMask;
- switch (II->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::vp_store:
- WideMask = II->getOperand(2);
- break;
- case Intrinsic::masked_store:
- WideMask = II->getOperand(3);
- break;
- }
- Mask = getMask(WideMask, Factor,
+ Mask = getMask(getMaskOperand(II), Factor,
cast<VectorType>(InterleaveValues[0]->getType()));
if (!Mask)
return false;
@@ -687,11 +687,13 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
using namespace PatternMatch;
for (auto &I : instructions(F)) {
if (match(&I, m_CombineOr(m_Load(m_Value()),
- m_Intrinsic<Intrinsic::vp_load>())))
+ m_Intrinsic<Intrinsic::vp_load>())) ||
+ match(&I, m_Intrinsic<Intrinsic::masked_load>()))
Changed |= lowerInterleavedLoad(&I, DeadInsts);
if (match(&I, m_CombineOr(m_Store(m_Value(), m_Value()),
- m_Intrinsic<Intrinsic::vp_store>())))
+ m_Intrinsic<Intrinsic::vp_store>())) ||
+ match(&I, m_Intrinsic<Intrinsic::masked_store>()))
Changed |= lowerInterleavedStore(&I, DeadInsts);
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 7274e1bb59b92..26e324c47111f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -2002,3 +2002,34 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
%res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
}
+
+define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor5(ptr %ptr) {
+; CHECK-LABEL: maskedload_factor5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vlseg5e32.v v8, (a0)
+; CHECK-NEXT: ret
+ %interleaved.vec = tail call <20 x i32> @llvm.masked.load(ptr %ptr, i32 4, <20 x i1> splat (i1 true), <20 x i32> poison)
+ %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>
+ %v1 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 1, i32 6, i32 11, i32 16>
+ %v2 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 2, i32 7, i32 12, i32 17>
+ %v3 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 3, i32 8, i32 13, i32 18>
+ %v4 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 4, i32 9, i32 14, i32 19>
+ %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+ %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+ %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
+ %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3
+ %res4 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3, <4 x i32> %v4, 4
+ ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res4
+}
+
+define void @maskedstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: maskedstore_factor2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsseg2e32.v v8, (a0)
+; CHECK-NEXT: ret
+ %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+ tail call void @llvm.masked.store(<8 x i32> %interleaved.vec, ptr %ptr, i32 4, <8 x i1> splat (i1 true))
+ ret void
+}
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/CodeGen/InterleavedAccessPass.cpp llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…ore (llvm#150241) This completes the basic support for masked.laod and masked.store in InterleaveAccess. The backend already added via the intrinsic lowering path and the common code structure (in RISCV at least). Note that this isn't enough to enable in LV yet. We still need support for recognizing an interleaved mask via a shufflevector in getMask.
This completes the basic support for masked.laod and masked.store in InterleaveAccess. The backend already added via the intrinsic lowering path and the common code structure (in RISCV at least).
Note that this isn't enough to enable in LV yet. We still need support for recognizing an interleaved mask via a shufflevector in getMask.