-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[MemCpyOptimizer] Support scalable vectors in performStackMoveO… #67632
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…ptzn. This changes performStackMoveOptzn to take a TypeSize instead of uint64_t to avoid an implicit conversion when called from processStoreOfLoad. performStackMoveOptzn will return false if the TypeSize is scalable.
@llvm/pr-subscribers-llvm-transforms Changes…ptzn. This changes performStackMoveOptzn to take a TypeSize instead of uint64_t to avoid an implicit conversion when called from processStoreOfLoad. performStackMoveOptzn will return false if the TypeSize is scalable. Full diff: https://github.com/llvm/llvm-project/pull/67632.diff 3 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 3e8a5bf6a5bd56e..6c809bc881d050d 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -83,7 +83,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI);
bool performStackMoveOptzn(Instruction *Load, Instruction *Store,
AllocaInst *DestAlloca, AllocaInst *SrcAlloca,
- uint64_t Size, BatchAAResults &BAA);
+ TypeSize Size, BatchAAResults &BAA);
void eraseInstruction(Instruction *I);
bool iterateOnFunction(Function &F);
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 4db9d1b6d309afd..f1d0864477586a1 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1428,8 +1428,12 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
// allocas that aren't captured.
bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
AllocaInst *DestAlloca,
- AllocaInst *SrcAlloca, uint64_t Size,
+ AllocaInst *SrcAlloca, TypeSize Size,
BatchAAResults &BAA) {
+ // We can't optimize scalable types.
+ if (Size.isScalable())
+ return false;
+
LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
<< *Store << "\n");
@@ -1766,8 +1770,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
if (Len == nullptr)
return false;
- if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(),
- BAA)) {
+ if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
+ TypeSize::getFixed(Len->getZExtValue()), BAA)) {
// Avoid invalidating the iterator.
BBI = M->getNextNonDebugInstruction()->getIterator();
eraseInstruction(M);
diff --git a/llvm/test/Transforms/MemCpyOpt/vscale-crashes.ll b/llvm/test/Transforms/MemCpyOpt/vscale-crashes.ll
index 84b06f6071ff69b..821da24d44e73b3 100644
--- a/llvm/test/Transforms/MemCpyOpt/vscale-crashes.ll
+++ b/llvm/test/Transforms/MemCpyOpt/vscale-crashes.ll
@@ -87,3 +87,19 @@ define void @callslotoptzn(<vscale x 4 x float> %val, ptr %out) {
declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
declare void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> , <vscale x 4 x ptr> , i32, <vscale x 4 x i1>)
+
+; Make sure we don't crash calling performStackMoveOptzn from processStoreOfLoad.
+define void @load_store(<vscale x 4 x i32> %x) {
+ %src = alloca <vscale x 4 x i32>
+ %dest = alloca <vscale x 4 x i32>
+ store <vscale x 4 x i32> %x, ptr %src
+ %1 = call i32 @use_nocapture(ptr nocapture %src)
+
+ %src.val = load <vscale x 4 x i32>, ptr %src
+ store <vscale x 4 x i32> %src.val, ptr %dest
+
+ %2 = call i32 @use_nocapture(ptr nocapture %dest)
+ ret void
+}
+
+declare i32 @use_nocapture(ptr nocapture)
|
BatchAAResults &BAA) { | ||
// We can't optimize scalable types. | ||
if (Size.isScalable()) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can instead replace the Size != SrcSize->getFixedValue()
with Size != SrcSize
below? (and same with DestSize).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@@ -1766,8 +1765,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { | |||
ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()); | |||
if (Len == nullptr) | |||
return false; | |||
if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(), | |||
BAA)) { | |||
if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a possible follow up, extending the length matching from ConstantInt to also match a vscale expression here to form a scalable type size would seem relatively straight forward. I think that would allow this optimization to trigger for scalable allocas and scalable memcpys.
…m#67632) …ptzn. This changes performStackMoveOptzn to take a TypeSize instead of uint64_t to avoid an implicit conversion when called from processStoreOfLoad. performStackMoveOptzn has been updated to allow scalable types in the rest of its code.
…ptzn.
This changes performStackMoveOptzn to take a TypeSize instead of uint64_t to avoid an implicit conversion when called from processStoreOfLoad.
performStackMoveOptzn has been updated to allow scalable types in the rest of its code.