diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ff7149044d199..deb8ee2d88055 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -21479,7 +21479,18 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI, "new ScheduleData already in scheduling region"); SD->init(SchedulingRegionID, I); + auto CanIgnoreLoad = [](const Instruction *I) { + const auto *LI = dyn_cast(I); + // If there is a simple load marked as invariant, we can ignore it. + // But, in the (unlikely) case of non-simple invariant load, + // we should not ignore it. + return LI && LI->isSimple() && + LI->getMetadata(LLVMContext::MD_invariant_load); + }; + if (I->mayReadOrWriteMemory() && + // Simple InvariantLoad does not depend on other memory accesses. + !CanIgnoreLoad(I) && (!isa(I) || (cast(I)->getIntrinsicID() != Intrinsic::sideeffect && cast(I)->getIntrinsicID() != diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/invariant-load-no-alias-store.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/invariant-load-no-alias-store.ll new file mode 100644 index 0000000000000..87537c05573ae --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/invariant-load-no-alias-store.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes="function(slp-vectorizer)" -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 %s -S | FileCheck %s + +define void @test(ptr addrspace(1) %base, ptr addrspace(1) %otherA, ptr addrspace(1) %otherB) #0 { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr addrspace(1) [[BASE:%.*]], ptr addrspace(1) [[OTHERA:%.*]], ptr addrspace(1) [[OTHERB:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P0:%.*]] = getelementptr half, ptr addrspace(1) [[BASE]], i32 0 +; CHECK-NEXT: [[A0PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERA]], i32 0 +; CHECK-NEXT: [[B0PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERB]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[A0PTR]], align 2, !invariant.load [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[B0PTR]], align 2, !invariant.load [[META0]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <2 x half> [[TMP0]], [[TMP1]] +; CHECK-NEXT: store <2 x half> [[TMP2]], ptr addrspace(1) [[P0]], align 2 +; CHECK-NEXT: ret void +; +entry: + %p0 = getelementptr half, ptr addrspace(1) %base, i32 0 + %p1 = getelementptr half, ptr addrspace(1) %base, i32 1 + ; First pair of invariant loads from otherA. + %A0PTR = getelementptr half, ptr addrspace(1) %otherA, i32 0 + %B0PTR = getelementptr half, ptr addrspace(1) %otherB, i32 0 + %A0 = load half, ptr addrspace(1) %A0PTR, align 2, !invariant.load !0 + %B0 = load half, ptr addrspace(1) %B0PTR, align 2, !invariant.load !0 + %add0 = fadd reassoc half %A0, %B0 + store half %add0, ptr addrspace(1) %p0, align 2 + %A1PTR = getelementptr half, ptr addrspace(1) %otherA, i32 1 + %B1PTR = getelementptr half, ptr addrspace(1) %otherB, i32 1 + %A1 = load half, ptr addrspace(1) %A1PTR, align 2, !invariant.load !0 + %B1 = load half, ptr addrspace(1) %B1PTR, align 2, !invariant.load !0 + %add1 = fadd reassoc half %A1, %B1 + store half %add1, ptr addrspace(1) %p1, align 2 + ret void +} + + +define void @aliastest(ptr addrspace(1) %base, ptr addrspace(1) %otherA, ptr addrspace(1) %otherB) #0 { +; CHECK-LABEL: define void @aliastest( +; CHECK-SAME: ptr addrspace(1) [[BASE:%.*]], ptr addrspace(1) [[OTHERA:%.*]], ptr addrspace(1) [[OTHERB:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P0:%.*]] = getelementptr half, ptr addrspace(1) [[BASE]], i32 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr half, ptr addrspace(1) [[BASE]], i32 1 +; CHECK-NEXT: [[A0PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERA]], i32 0 +; CHECK-NEXT: [[B0PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERB]], i32 0 +; CHECK-NEXT: [[A0:%.*]] = load half, ptr addrspace(1) [[A0PTR]], align 2 +; CHECK-NEXT: [[B0:%.*]] = load half, ptr addrspace(1) [[B0PTR]], align 2 +; CHECK-NEXT: [[ADD0:%.*]] = fadd reassoc half [[A0]], [[B0]] +; CHECK-NEXT: store half [[ADD0]], ptr addrspace(1) [[P0]], align 2 +; CHECK-NEXT: [[A1PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERA]], i32 1 +; CHECK-NEXT: [[B1PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERB]], i32 1 +; CHECK-NEXT: [[A1:%.*]] = load half, ptr addrspace(1) [[A1PTR]], align 2 +; CHECK-NEXT: [[B1:%.*]] = load half, ptr addrspace(1) [[B1PTR]], align 2 +; CHECK-NEXT: [[ADD1:%.*]] = fadd reassoc half [[A1]], [[B1]] +; CHECK-NEXT: store half [[ADD1]], ptr addrspace(1) [[P1]], align 2 +; CHECK-NEXT: ret void +; +entry: + %p0 = getelementptr half, ptr addrspace(1) %base, i32 0 + %p1 = getelementptr half, ptr addrspace(1) %base, i32 1 + ; First pair of invariant loads from otherA. + %A0PTR = getelementptr half, ptr addrspace(1) %otherA, i32 0 + %B0PTR = getelementptr half, ptr addrspace(1) %otherB, i32 0 + %A0 = load half, ptr addrspace(1) %A0PTR, align 2 + %B0 = load half, ptr addrspace(1) %B0PTR, align 2 + %add0 = fadd reassoc half %A0, %B0 + store half %add0, ptr addrspace(1) %p0, align 2 + %A1PTR = getelementptr half, ptr addrspace(1) %otherA, i32 1 + %B1PTR = getelementptr half, ptr addrspace(1) %otherB, i32 1 + %A1 = load half, ptr addrspace(1) %A1PTR, align 2 + %B1 = load half, ptr addrspace(1) %B1PTR, align 2 + %add1 = fadd reassoc half %A1, %B1 + store half %add1, ptr addrspace(1) %p1, align 2 + ret void +} + +define void @voltest(ptr addrspace(1) %base, ptr addrspace(1) %otherA, ptr addrspace(1) %otherB) #0 { +; CHECK-LABEL: define void @voltest( +; CHECK-SAME: ptr addrspace(1) [[BASE:%.*]], ptr addrspace(1) [[OTHERA:%.*]], ptr addrspace(1) [[OTHERB:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P0:%.*]] = getelementptr half, ptr addrspace(1) [[BASE]], i32 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr half, ptr addrspace(1) [[BASE]], i32 1 +; CHECK-NEXT: [[A0PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERA]], i32 0 +; CHECK-NEXT: [[B0PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERB]], i32 0 +; CHECK-NEXT: [[A0:%.*]] = load volatile half, ptr addrspace(1) [[A0PTR]], align 2, !invariant.load [[META0]] +; CHECK-NEXT: [[B0:%.*]] = load volatile half, ptr addrspace(1) [[B0PTR]], align 2, !invariant.load [[META0]] +; CHECK-NEXT: [[ADD0:%.*]] = fadd reassoc half [[A0]], [[B0]] +; CHECK-NEXT: store half [[ADD0]], ptr addrspace(1) [[P0]], align 2 +; CHECK-NEXT: [[A1PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERA]], i32 1 +; CHECK-NEXT: [[B1PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERB]], i32 1 +; CHECK-NEXT: [[A1:%.*]] = load volatile half, ptr addrspace(1) [[A1PTR]], align 2, !invariant.load [[META0]] +; CHECK-NEXT: [[B1:%.*]] = load volatile half, ptr addrspace(1) [[B1PTR]], align 2, !invariant.load [[META0]] +; CHECK-NEXT: [[ADD1:%.*]] = fadd reassoc half [[A1]], [[B1]] +; CHECK-NEXT: store half [[ADD1]], ptr addrspace(1) [[P1]], align 2 +; CHECK-NEXT: ret void +; +entry: + %p0 = getelementptr half, ptr addrspace(1) %base, i32 0 + %p1 = getelementptr half, ptr addrspace(1) %base, i32 1 + ; First pair of invariant loads from otherA. + %A0PTR = getelementptr half, ptr addrspace(1) %otherA, i32 0 + %B0PTR = getelementptr half, ptr addrspace(1) %otherB, i32 0 + %A0 = load volatile half, ptr addrspace(1) %A0PTR, align 2, !invariant.load !0 + %B0 = load volatile half, ptr addrspace(1) %B0PTR, align 2, !invariant.load !0 + %add0 = fadd reassoc half %A0, %B0 + store half %add0, ptr addrspace(1) %p0, align 2 + %A1PTR = getelementptr half, ptr addrspace(1) %otherA, i32 1 + %B1PTR = getelementptr half, ptr addrspace(1) %otherB, i32 1 + %A1 = load volatile half, ptr addrspace(1) %A1PTR, align 2, !invariant.load !0 + %B1 = load volatile half, ptr addrspace(1) %B1PTR, align 2, !invariant.load !0 + %add1 = fadd reassoc half %A1, %B1 + store half %add1, ptr addrspace(1) %p1, align 2 + ret void +} + + +attributes #0 = { nounwind } + +!0 = !{} +;. +; CHECK: [[META0]] = !{} +;.