diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 537890d9025fe..4351a55ea1d30 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -342,9 +342,13 @@ bool PointerReplacer::collectUsersRecursive(Instruction &I) { Worklist.insert(Inst); } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) { Worklist.insert(Inst); + if (!collectUsersRecursive(*Inst)) + return false; } else if (Inst->isLifetimeStartOrEnd()) { continue; } else { + // TODO: For arbitrary uses with address space mismatches, should we check + // if we can introduce a valid addrspacecast? LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n'); return false; } @@ -406,20 +410,18 @@ void PointerReplacer::replace(Instruction *I) { NewSI->takeName(SI); WorkMap[SI] = NewSI; } else if (auto *MemCpy = dyn_cast(I)) { - auto *SrcV = getReplacement(MemCpy->getRawSource()); - // The pointer may appear in the destination of a copy, but we don't want to - // replace it. - if (!SrcV) { - assert(getReplacement(MemCpy->getRawDest()) && - "destination not in replace list"); - return; - } + auto *DestV = MemCpy->getRawDest(); + auto *SrcV = MemCpy->getRawSource(); + + if (auto *DestReplace = getReplacement(DestV)) + DestV = DestReplace; + if (auto *SrcReplace = getReplacement(SrcV)) + SrcV = SrcReplace; IC.Builder.SetInsertPoint(MemCpy); auto *NewI = IC.Builder.CreateMemTransferInst( - MemCpy->getIntrinsicID(), MemCpy->getRawDest(), MemCpy->getDestAlign(), - SrcV, MemCpy->getSourceAlign(), MemCpy->getLength(), - MemCpy->isVolatile()); + MemCpy->getIntrinsicID(), DestV, MemCpy->getDestAlign(), SrcV, + MemCpy->getSourceAlign(), MemCpy->getLength(), MemCpy->isVolatile()); AAMDNodes AAMD = MemCpy->getAAMetadata(); if (AAMD) NewI->setAAMetadata(AAMD); @@ -432,16 +434,17 @@ void PointerReplacer::replace(Instruction *I) { assert(isEqualOrValidAddrSpaceCast( ASC, V->getType()->getPointerAddressSpace()) && "Invalid address space cast!"); - auto *NewV = V; + if (V->getType()->getPointerAddressSpace() != ASC->getType()->getPointerAddressSpace()) { auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), ""); NewI->takeName(ASC); IC.InsertNewInstWith(NewI, ASC->getIterator()); - NewV = NewI; + WorkMap[ASC] = NewI; + } else { + WorkMap[ASC] = V; } - IC.replaceInstUsesWith(*ASC, NewV); - IC.eraseInstFromFunction(*ASC); + } else { llvm_unreachable("should never reach here"); } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/issue68120.ll b/llvm/test/Transforms/InstCombine/AMDGPU/issue68120.ll new file mode 100644 index 0000000000000..346c64f096ba3 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AMDGPU/issue68120.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s | FileCheck %s + +; It is illegal to pass ptr addrspace(4) in %arg by addrspacecasting +; to ptr addrspace(5) to pass off to the stack argument. A temporary +; alloca and memcpy is necessary. +define void @issue68120_invalid_addrspacecast_introduced_0(ptr addrspace(4) byref([56 x i8]) %arg) { +; CHECK-LABEL: define void @issue68120_invalid_addrspacecast_introduced_0( +; CHECK-SAME: ptr addrspace(4) byref([56 x i8]) [[ARG:%.*]]) { +; CHECK-NEXT: [[ADDRSPACECAST_0_TO_5:%.*]] = alloca [56 x i8], align 1, addrspace(5) +; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 1 dereferenceable(56) [[ADDRSPACECAST_0_TO_5]], ptr addrspace(4) noundef align 1 dereferenceable(56) [[ARG]], i64 56, i1 false) +; CHECK-NEXT: call void @byval_func(ptr addrspace(5) [[ADDRSPACECAST_0_TO_5]]) +; CHECK-NEXT: ret void +; + %alloca = alloca [56 x i8], addrspace(5) + %alloca1 = alloca [56 x i8], addrspace(5) + call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca, ptr addrspace(4) %arg, i64 56, i1 false) + call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca, i64 56, i1 false) + %addrspacecast.alloca1 = addrspacecast ptr addrspace(5) %alloca1 to ptr + %addrspacecast.0.to.5 = addrspacecast ptr %addrspacecast.alloca1 to ptr addrspace(5) + call void @byval_func(ptr addrspace(5) %addrspacecast.0.to.5) + ret void +} + +; Further reduced variant that already eliminated one of the copies +define void @issue68120_invalid_addrspacecast_introduced_1(ptr addrspace(4) byref([56 x i8]) %arg) { +; CHECK-LABEL: define void @issue68120_invalid_addrspacecast_introduced_1( +; CHECK-SAME: ptr addrspace(4) byref([56 x i8]) [[ARG:%.*]]) { +; CHECK-NEXT: [[ADDRSPACECAST_0_TO_5:%.*]] = alloca [56 x i8], align 1, addrspace(5) +; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 1 dereferenceable(56) [[ADDRSPACECAST_0_TO_5]], ptr addrspace(4) noundef align 1 dereferenceable(56) [[ARG]], i64 56, i1 false) +; CHECK-NEXT: call void @byval_func(ptr addrspace(5) [[ADDRSPACECAST_0_TO_5]]) +; CHECK-NEXT: ret void +; + %alloca = alloca [56 x i8], addrspace(5) + call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca, ptr addrspace(4) %arg, i64 56, i1 false) + %addrspacecast.alloca = addrspacecast ptr addrspace(5) %alloca to ptr + %addrspacecast.0.to.5 = addrspacecast ptr %addrspacecast.alloca to ptr addrspace(5) + call void @byval_func(ptr addrspace(5) %addrspacecast.0.to.5) + ret void +} + +define void @issue68120_use_cast_to_as0(ptr addrspace(4) byref([56 x i8]) %arg) { +; CHECK-LABEL: define void @issue68120_use_cast_to_as0( +; CHECK-SAME: ptr addrspace(4) byref([56 x i8]) [[ARG:%.*]]) { +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [56 x i8], align 1, addrspace(5) +; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 1 dereferenceable(56) [[ALLOCA]], ptr addrspace(4) noundef align 1 dereferenceable(56) [[ARG]], i64 56, i1 false) +; CHECK-NEXT: [[ADDRSPACECAST_ALLOCA:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +; CHECK-NEXT: call void @other_func(ptr [[ADDRSPACECAST_ALLOCA]]) +; CHECK-NEXT: ret void +; + %alloca = alloca [56 x i8], addrspace(5) + call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca, ptr addrspace(4) %arg, i64 56, i1 false) + %addrspacecast.alloca = addrspacecast ptr addrspace(5) %alloca to ptr + %addrspacecast.0.to.5 = addrspacecast ptr %addrspacecast.alloca to ptr addrspace(5) + call void @other_func(ptr %addrspacecast.alloca) + ret void +} + +define void @issue68120_uses_invalid_addrspacecast(ptr addrspace(4) byref([56 x i8]) %arg) { +; CHECK-LABEL: define void @issue68120_uses_invalid_addrspacecast( +; CHECK-SAME: ptr addrspace(4) byref([56 x i8]) [[ARG:%.*]]) { +; CHECK-NEXT: [[ADDRSPACECAST_0_TO_5:%.*]] = alloca [56 x i8], align 1, addrspace(5) +; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 1 dereferenceable(56) [[ADDRSPACECAST_0_TO_5]], ptr addrspace(4) noundef align 1 dereferenceable(56) [[ARG]], i64 56, i1 false) +; CHECK-NEXT: call void @byval_func(ptr addrspace(5) [[ADDRSPACECAST_0_TO_5]]) +; CHECK-NEXT: ret void +; + %alloca = alloca [56 x i8], addrspace(5) + %alloca1 = alloca [56 x i8], addrspace(5) + %illegal.cast = addrspacecast ptr addrspace(4) %arg to ptr addrspace(5) + call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) %alloca, ptr addrspace(5) %illegal.cast, i64 56, i1 false) + call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca, i64 56, i1 false) + %addrspacecast.alloca1 = addrspacecast ptr addrspace(5) %alloca1 to ptr + %addrspacecast.0.to.5 = addrspacecast ptr %addrspacecast.alloca1 to ptr addrspace(5) + call void @byval_func(ptr addrspace(5) %addrspacecast.0.to.5) + ret void +} + + +declare void @byval_func(ptr addrspace(5) byval([56 x i8])) +declare void @other_func(ptr) + diff --git a/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll b/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll index a7aa3a8ef1beb..c783b101251d5 100644 --- a/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll +++ b/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll @@ -429,9 +429,13 @@ entry: declare i8 @readonly_callee(ptr readonly nocapture) +; FIXME: This should be able to fold to call i8 @readonly_callee(ptr nonnull @g1) define i8 @call_readonly_remove_alloca() { ; CHECK-LABEL: @call_readonly_remove_alloca( -; CHECK-NEXT: [[V:%.*]] = call i8 @readonly_callee(ptr nonnull @g1) +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 1, addrspace(1) +; CHECK-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef align 1 dereferenceable(32) [[ALLOCA]], ptr noundef nonnull align 16 dereferenceable(32) @g1, i64 32, i1 false) +; CHECK-NEXT: [[P:%.*]] = addrspacecast ptr addrspace(1) [[ALLOCA]] to ptr +; CHECK-NEXT: [[V:%.*]] = call i8 @readonly_callee(ptr [[P]]) ; CHECK-NEXT: ret i8 [[V]] ; %alloca = alloca [32 x i8], addrspace(1)