Skip to content

Commit

Permalink
[Attributor] Identify and remove no-op fences
Browse files Browse the repository at this point in the history
The logic and implementation follows the removal of no-op barriers. If
the fence is not making updates visible, either to the world or the
current thread, it is not needed. Said differently, the fences we remove
do not establish synchronization (happens-before) edges.
This allows us to eliminate some of the regression caused by:
  https://reviews.llvm.org/D145290
  • Loading branch information
jdoerfert committed Jun 6, 2023
1 parent d3777f2 commit cb17c48
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 5 deletions.
5 changes: 5 additions & 0 deletions llvm/include/llvm/Transforms/IPO/Attributor.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Alignment.h"
Expand Down Expand Up @@ -5178,6 +5179,10 @@ struct AAExecutionDomain
getExecutionDomain(const CallBase &CB) const = 0;
virtual ExecutionDomainTy getFunctionExecutionDomain() const = 0;

/// Helper function to determine if \p FI is a no-op given the information
/// about its execution from \p ExecDomainAA.
virtual bool isNoOpFence(const FenceInst &FI) const = 0;

/// This function should return true if the type of the \p AA is
/// AAExecutionDomain.
static bool classof(const AbstractAttribute *AA) {
Expand Down
22 changes: 21 additions & 1 deletion llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4339,13 +4339,22 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {

Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
if (!isAssumedSideEffectFree(A, I)) {
if (!isa_and_nonnull<StoreInst>(I))
if (!isa_and_nonnull<StoreInst>(I) && !isa_and_nonnull<FenceInst>(I))
indicatePessimisticFixpoint();
else
removeAssumedBits(HAS_NO_EFFECT);
}
}

bool isDeadFence(Attributor &A, FenceInst &FI) {
const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
IRPosition::function(*FI.getFunction()), *this, DepClassTy::NONE);
if (!ExecDomainAA || !ExecDomainAA->isNoOpFence(FI))
return false;
A.recordDependence(*ExecDomainAA, *this, DepClassTy::OPTIONAL);
return true;
}

bool isDeadStore(Attributor &A, StoreInst &SI,
SmallSetVector<Instruction *, 8> *AssumeOnlyInst = nullptr) {
// Lang ref now states volatile store is not UB/dead, let's skip them.
Expand Down Expand Up @@ -4399,6 +4408,9 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
if (isa_and_nonnull<StoreInst>(I))
if (isValidState())
return "assumed-dead-store";
if (isa_and_nonnull<FenceInst>(I))
if (isValidState())
return "assumed-dead-fence";
return AAIsDeadValueImpl::getAsStr();
}

Expand All @@ -4408,6 +4420,9 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
if (auto *SI = dyn_cast_or_null<StoreInst>(I)) {
if (!isDeadStore(A, *SI))
return indicatePessimisticFixpoint();
} else if (auto *FI = dyn_cast_or_null<FenceInst>(I)) {
if (!isDeadFence(A, *FI))
return indicatePessimisticFixpoint();
} else {
if (!isAssumedSideEffectFree(A, I))
return indicatePessimisticFixpoint();
Expand Down Expand Up @@ -4443,6 +4458,11 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
}
return ChangeStatus::CHANGED;
}
if (auto *FI = dyn_cast<FenceInst>(I)) {
assert(isDeadFence(A, *FI));
A.deleteAfterManifest(*FI);
return ChangeStatus::CHANGED;
}
if (isAssumedSideEffectFree(A, I) && !isa<InvokeInst>(I)) {
A.deleteAfterManifest(*I);
return ChangeStatus::CHANGED;
Expand Down
41 changes: 41 additions & 0 deletions llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
Expand All @@ -46,6 +47,7 @@
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/IPO/Attributor.h"
Expand Down Expand Up @@ -2641,6 +2643,10 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
return Changed;
}

bool isNoOpFence(const FenceInst &FI) const override {
return getState().isValidState() && !NonNoOpFences.count(&FI);
}

/// Merge barrier and assumption information from \p PredED into the successor
/// \p ED.
void
Expand Down Expand Up @@ -2811,6 +2817,10 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
R = V;
return !Eq;
}

/// Collection of fences known to be non-no-opt. All fences not in this set
/// can be assumed no-opt.
SmallPtrSet<const FenceInst *, 8> NonNoOpFences;
};

void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions(
Expand Down Expand Up @@ -2980,6 +2990,33 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
continue;
}

if (auto *FI = dyn_cast<FenceInst>(&I)) {
if (!ED.EncounteredNonLocalSideEffect) {
// An aligned fence without non-local side-effects is a no-op.
if (ED.IsReachedFromAlignedBarrierOnly)
continue;
// A non-aligned fence without non-local side-effects is a no-op
// if the ordering only publishes non-local side-effects (or less).
switch (FI->getOrdering()) {
case AtomicOrdering::NotAtomic:
continue;
case AtomicOrdering::Unordered:
continue;
case AtomicOrdering::Monotonic:
continue;
case AtomicOrdering::Acquire:
break;
case AtomicOrdering::Release:
continue;
case AtomicOrdering::AcquireRelease:
break;
case AtomicOrdering::SequentiallyConsistent:
break;
};
}
NonNoOpFences.insert(FI);
}

auto *CB = dyn_cast<CallBase>(&I);
bool IsNoSync = AA::isNoSyncInst(A, I, *this);
bool IsAlignedBarrier =
Expand Down Expand Up @@ -5206,6 +5243,10 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
continue;
}
if (auto *FI = dyn_cast<FenceInst>(&I)) {
A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*FI));
continue;
}
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
if (II->getIntrinsicID() == Intrinsic::assume) {
A.getOrCreateAAFor<AAPotentialValues>(
Expand Down
33 changes: 33 additions & 0 deletions llvm/test/Transforms/OpenMP/barrier_removal.ll
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ define void @pos_empty_8(i1 %c) "kernel" {
;
br i1 %c, label %t, label %f
t:
fence release
call void @llvm.amdgcn.s.barrier() "llvm.assume"="ompx_aligned_barrier"
br label %f
f:
Expand All @@ -133,23 +134,33 @@ define void @neg_empty_9(i1 %c) "kernel" {
; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
; CHECK: t:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence release
; CHECK-NEXT: br label [[M:%.*]]
; CHECK: f:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence release
; CHECK-NEXT: br label [[M]]
; CHECK: m:
; CHECK-NEXT: fence release
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence release
; CHECK-NEXT: ret void
;
br i1 %c, label %t, label %f
t:
fence release
call void @llvm.amdgcn.s.barrier()
fence release
br label %m
f:
fence release
call void @llvm.amdgcn.s.barrier()
fence release
br label %m
m:
fence release
call void @llvm.amdgcn.s.barrier()
fence release
ret void
}
; FIXME: We should remove the barrier
Expand Down Expand Up @@ -345,21 +356,27 @@ define void @neg_mem() "kernel" {
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8
; CHECK-NEXT: [[A:%.*]] = load i32, ptr @G1, align 4
; CHECK-NEXT: fence seq_cst
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: store i32 [[A]], ptr [[ARG]], align 4
; CHECK-NEXT: fence release
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @G2 to ptr), align 4
; CHECK-NEXT: store i32 [[B]], ptr @G1, align 4
; CHECK-NEXT: fence acquire
; CHECK-NEXT: ret void
;
%arg = load ptr, ptr @GPtr
%a = load i32, ptr @G1
fence seq_cst
call void @aligned_barrier()
store i32 %a, ptr %arg
fence release
call void @aligned_barrier()
%G2c = addrspacecast ptr addrspace(1) @G2 to ptr
%b = load i32, ptr %G2c
store i32 %b, ptr @G1
fence acquire
call void @aligned_barrier()
ret void
}
Expand Down Expand Up @@ -397,27 +414,43 @@ define void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) "kernel" {
; CHECK: m:
; CHECK-NEXT: ret void
;
fence acquire
call void @llvm.nvvm.barrier0()
fence release
call void @aligned_barrier()
fence seq_cst
br i1 %c0, label %t0, label %f0
t0:
fence seq_cst
call void @aligned_barrier()
fence seq_cst
br label %t0b
t0b:
fence seq_cst
call void @aligned_barrier()
fence seq_cst
br label %m
f0:
fence release
call void @aligned_barrier()
fence acquire
call void @llvm.nvvm.barrier0()
fence acquire
br i1 %c1, label %t1, label %f1
t1:
fence acquire
call void @aligned_barrier()
fence seq_cst
br label %m
f1:
fence seq_cst
call void @aligned_barrier()
fence acquire
br label %m
m:
fence seq_cst
call void @aligned_barrier()
fence seq_cst
ret void
}

Expand Down
6 changes: 2 additions & 4 deletions openmp/libomptarget/test/jit/empty_kernel_lvl2.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,15 @@
// RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll \
// RUN: LIBOMPTARGET_JIT_SKIP_OPT=true \
// RUN: %libomptarget-run-generic
// TODO:
// RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefix=FIRST
// RUN: %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefix=FIRST
// RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \
// RUN: -DTGT1_DIRECTIVE="target" \
// RUN: -DTGT2_DIRECTIVE="target" \
// RUN: -DLOOP_DIRECTIVE="for"
// RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll \
// RUN: LIBOMPTARGET_JIT_SKIP_OPT=true \
// RUN: %libomptarget-run-generic
// TODO:
// RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefixes=FIRST,SECOND
// RUN: %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefixes=FIRST,SECOND
//
// RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \
// RUN: -DTGT1_DIRECTIVE="target" \
Expand Down

0 comments on commit cb17c48

Please sign in to comment.