-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[GVN] Teach GVN simple masked load/store forwarding #157689
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
11ac166
c335ed6
0d7107f
8f7fe4a
a3a6f9f
6693704
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2287,6 +2287,35 @@ bool GVNPass::processLoad(LoadInst *L) { | |
return true; | ||
} | ||
|
||
// Attempt to process masked loads which have loaded from | ||
// masked stores with the same mask | ||
bool GVNPass::processMaskedLoad(IntrinsicInst *I) { | ||
if (!MD) | ||
return false; | ||
MemDepResult Dep = MD->getDependency(I); | ||
Instruction *DepInst = Dep.getInst(); | ||
if (!DepInst || !Dep.isLocal() || !Dep.isDef()) | ||
return false; | ||
|
||
Value *Mask = I->getOperand(2); | ||
Value *Passthrough = I->getOperand(3); | ||
Value *StoreVal; | ||
if (!match(DepInst, m_MaskedStore(m_Value(StoreVal), m_Value(), m_Value(), | ||
m_Specific(Mask))) || | ||
StoreVal->getType() != I->getType()) | ||
return false; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If a dependency is local then it's either a Clobber or a Def, and it looks like the code below is assuming a Def. Would be good to clarify this. If so, I think you have to bail out for the clobber case as that's more complicated - see AnalyzeLoadAvailability. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added a bail-out if it's not a Def |
||
// Remove the load but generate a select for the passthrough | ||
Value *OpToForward = llvm::SelectInst::Create(Mask, StoreVal, Passthrough, "", | ||
I->getIterator()); | ||
|
||
ICF->removeUsersOf(I); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't if ordering matters, but I noticed that in processLoads we do this the other way around:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
I->replaceAllUsesWith(OpToForward); | ||
salvageAndRemoveInstruction(I); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we also need to call
and
similar to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if (MSSAU)
MSSAU->removeMemoryAccess(L); This is indirectly called by salvageAndRemoveInstruction(I)
//...
removeInstruction(I) I've added the Pointer stuff though. |
||
++NumGVNLoad; | ||
return true; | ||
} | ||
|
||
/// Return a pair the first field showing the value number of \p Exp and the | ||
/// second field showing whether it is a value number newly created. | ||
std::pair<uint32_t, bool> | ||
|
@@ -2734,6 +2763,10 @@ bool GVNPass::processInstruction(Instruction *I) { | |
return false; | ||
} | ||
|
||
if (match(I, m_Intrinsic<Intrinsic::masked_load>()) && | ||
processMaskedLoad(cast<IntrinsicInst>(I))) | ||
return true; | ||
|
||
// For conditional branches, we can perform simple conditional propagation on | ||
// the condition value itself. | ||
if (BranchInst *BI = dyn_cast<BranchInst>(I)) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt -passes=gvn -S -enable-gvn-memdep=true < %s | FileCheck %s | ||
; RUN: opt -passes=gvn -S -enable-gvn-memdep=false < %s | FileCheck %s --check-prefix=MEMDEPFALSE | ||
|
||
define <4 x float> @forward_binop_with_sel(ptr %0, ptr %1, i32 %a, i32 %b, <4 x float> %passthrough) { | ||
; CHECK-LABEL: @forward_binop_with_sel( | ||
; CHECK-NEXT: [[MASK:%.*]] = tail call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[A:%.*]], i32 [[B:%.*]]) | ||
; CHECK-NEXT: [[LOAD_0_0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0:%.*]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) | ||
; CHECK-NEXT: [[GEP_0_16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 | ||
; CHECK-NEXT: [[LOAD_0_16:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[GEP_0_16]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) | ||
; CHECK-NEXT: [[FMUL:%.*]] = fmul <4 x float> [[LOAD_0_0]], [[LOAD_0_16]] | ||
; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[FMUL]], ptr [[TMP1:%.*]], i32 1, <4 x i1> [[MASK]]) | ||
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[MASK]], <4 x float> [[FMUL]], <4 x float> [[PASSTHROUGH:%.*]] | ||
; CHECK-NEXT: ret <4 x float> [[TMP3]] | ||
; | ||
; MEMDEPFALSE-LABEL: @forward_binop_with_sel( | ||
; MEMDEPFALSE-NEXT: [[MASK:%.*]] = tail call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[A:%.*]], i32 [[B:%.*]]) | ||
; MEMDEPFALSE-NEXT: [[LOAD_0_0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0:%.*]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) | ||
; MEMDEPFALSE-NEXT: [[GEP_0_16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 | ||
; MEMDEPFALSE-NEXT: [[LOAD_0_16:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[GEP_0_16]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer) | ||
; MEMDEPFALSE-NEXT: [[FMUL:%.*]] = fmul <4 x float> [[LOAD_0_0]], [[LOAD_0_16]] | ||
; MEMDEPFALSE-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[FMUL]], ptr [[TMP1:%.*]], i32 1, <4 x i1> [[MASK]]) | ||
; MEMDEPFALSE-NEXT: [[LOAD_1_0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP1]], i32 1, <4 x i1> [[MASK]], <4 x float> [[PASSTHROUGH:%.*]]) | ||
; MEMDEPFALSE-NEXT: ret <4 x float> [[LOAD_1_0]] | ||
; | ||
%mask = tail call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %a, i32 %b) | ||
%load.0.0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %0, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) | ||
%gep.0.16 = getelementptr i8, ptr %0, i32 16 | ||
%load.0.16 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %gep.0.16, i32 1, <4 x i1> %mask, <4 x float> zeroinitializer) | ||
%fmul = fmul <4 x float> %load.0.0, %load.0.16 | ||
call void @llvm.masked.store.v4f32.p0(<4 x float> %fmul, ptr %1, i32 1, <4 x i1> %mask) | ||
%load.1.0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 1, <4 x i1> %mask, <4 x float> %passthrough) | ||
ret <4 x float> %load.1.0 | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at processLoad it seems like you need an additional check:
Would be good to add a test for this too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added the test
masked-load-store-no-mem-dep.ll
to deal with this.