diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index ddc747a2ca297d..bc01f8b5d11496 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -1252,12 +1252,18 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( } // Otherwise, replaces the use with flat(NewV). - if (Instruction *Inst = dyn_cast(V)) { + if (Instruction *VInst = dyn_cast(V)) { // Don't create a copy of the original addrspacecast. if (U == V && isa(V)) continue; - BasicBlock::iterator InsertPos = std::next(Inst->getIterator()); + // Insert the addrspacecast after NewV. + BasicBlock::iterator InsertPos; + if (Instruction *NewVInst = dyn_cast(NewV)) + InsertPos = std::next(NewVInst->getIterator()); + else + InsertPos = std::next(VInst->getIterator()); + while (isa(InsertPos)) ++InsertPos; U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos)); diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/insert-pos-assert.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/insert-pos-assert.ll new file mode 100644 index 00000000000000..a097219b6c937a --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/insert-pos-assert.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s + +; Addrspacecasts must be inserted after the instructions that define their uses. + +%struct.s0 = type { i32*, i32 } +%struct.s1 = type { %struct.s0 } + +@global0 = protected addrspace(4) externally_initialized global %struct.s1 zeroinitializer + +declare i32 @func(i32* %arg) + +define i32 @insert_pos_assert() { +; CHECK-LABEL: @insert_pos_assert( +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; CHECK-NEXT: [[LOAD0:%.*]] = load i32*, i32* addrspace(4)* getelementptr inbounds ([[STRUCT_S1:%.*]], [[STRUCT_S1]] addrspace(4)* @global0, i32 0, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i32* [[LOAD0]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast i32 addrspace(1)* [[TMP1]] to i32* +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32 addrspace(5)* [[ALLOCA]], align 4 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD1]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[SEXT]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func(i32* [[GEP]]) +; CHECK-NEXT: ret i32 [[CALL]] +; + %alloca = alloca i32, align 4, addrspace(5) + %cast = addrspacecast i32 addrspace(5)* %alloca to i32* + %load0 = load i32*, i32* addrspace(4)* getelementptr inbounds (%struct.s1, %struct.s1 addrspace(4)* @global0, i32 0, i32 0, i32 0) + %load1 = load i32, i32* %cast + %sext = sext i32 %load1 to i64 + %gep = getelementptr inbounds i32, i32* %load0, i64 %sext + %call = call i32 @func(i32* %gep) + ret i32 %call +}