diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index 3e8a5bf6a5bd5..6c809bc881d05 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -83,7 +83,7 @@ class MemCpyOptPass : public PassInfoMixin { bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI); bool performStackMoveOptzn(Instruction *Load, Instruction *Store, AllocaInst *DestAlloca, AllocaInst *SrcAlloca, - uint64_t Size, BatchAAResults &BAA); + TypeSize Size, BatchAAResults &BAA); void eraseInstruction(Instruction *I); bool iterateOnFunction(Function &F); diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 4db9d1b6d309a..9c19c2973438c 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1428,7 +1428,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, // allocas that aren't captured. bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, AllocaInst *DestAlloca, - AllocaInst *SrcAlloca, uint64_t Size, + AllocaInst *SrcAlloca, TypeSize Size, BatchAAResults &BAA) { LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n" << *Store << "\n"); @@ -1442,13 +1442,12 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, // Check that copy is full with static size. const DataLayout &DL = DestAlloca->getModule()->getDataLayout(); std::optional SrcSize = SrcAlloca->getAllocationSize(DL); - if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) { + if (!SrcSize || Size != *SrcSize) { LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n"); return false; } std::optional DestSize = DestAlloca->getAllocationSize(DL); - if (!DestSize || DestSize->isScalable() || - Size != DestSize->getFixedValue()) { + if (!DestSize || Size != *DestSize) { LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n"); return false; } @@ -1766,8 +1765,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { ConstantInt *Len = dyn_cast(M->getLength()); if (Len == nullptr) return false; - if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(), - BAA)) { + if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, + TypeSize::getFixed(Len->getZExtValue()), BAA)) { // Avoid invalidating the iterator. BBI = M->getNextNonDebugInstruction()->getIterator(); eraseInstruction(M); diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll index dee630f470d00..6089c0a4d7cf5 100644 --- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll +++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll @@ -113,6 +113,33 @@ define void @load_store() { ret void } +; Test scalable vectors. +define void @load_store_scalable( %x) { +; CHECK-LABEL: define void @load_store_scalable +; CHECK-SAME: ( [[X:%.*]]) { +; CHECK-NEXT: [[SRC:%.*]] = alloca , align 16 +; CHECK-NEXT: store [[X]], ptr [[SRC]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) +; CHECK-NEXT: ret void +; + %src = alloca + %dest = alloca + call void @llvm.lifetime.start.p0(i64 -1, ptr nocapture %src) + call void @llvm.lifetime.start.p0(i64 -1, ptr nocapture %dest) + store %x, ptr %src + %1 = call i32 @use_nocapture(ptr nocapture %src) + + %src.val = load , ptr %src + store %src.val, ptr %dest + + %2 = call i32 @use_nocapture(ptr nocapture %dest) + + call void @llvm.lifetime.end.p0(i64 -1, ptr nocapture %src) + call void @llvm.lifetime.end.p0(i64 -1, ptr nocapture %dest) + ret void +} + ; Tests that merging two allocas shouldn't be more poisonous, smaller aligned src is valid. define void @align_up() { ; CHECK-LABEL: define void @align_up() {