Skip to content

Commit

Permalink
[OpenMP][FIX] Restrict more unsound assmptions about threading
Browse files Browse the repository at this point in the history
Even if all loads and stores are in `nosync` functions we cannot
guarantee there is no synchronization going on between them. As such, we
cannot use CFG reasoning. We could check the entire module, or, what
happens now to minimize test churn, is to check if all accesses are in
the same function that is `nosync`. A follow up will undo some of the
regressions where possible.

Similarly, reachability cannot be used to exclude an access if the
access is not known to be executed by the same thread as the given
instruction.

The OpenMP-opt test was added for the latter problem.
  • Loading branch information
jdoerfert committed Dec 14, 2022
1 parent 36a879e commit 07c3753
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 69 deletions.
25 changes: 14 additions & 11 deletions llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Expand Up @@ -1076,25 +1076,23 @@ struct AAPointerInfoImpl
QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
const bool NoSync = NoSyncAA.isAssumedNoSync();
bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync();

// Helper to determine if we need to consider threading, which we cannot
// right now. However, if the function is (assumed) nosync or the thread
// executing all instructions is the main thread only we can ignore
// threading.
auto CanIgnoreThreading = [&](const Instruction &I) -> bool {
if (NoSync)
return true;
if (ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I))
return true;
return false;
};

// Helper to determine if the access is executed by the same thread as the
// load, for now it is sufficient to avoid any potential threading effects
// as we cannot deal with them anyway.
auto IsSameThreadAsLoad = [&](const Access &Acc) -> bool {
return CanIgnoreThreading(*Acc.getLocalInst());
// given instruction, for now it is sufficient to avoid any potential
// threading effects as we cannot deal with them anyway.
auto IsSameThreadAsInst = [&](const Access &Acc) -> bool {
return AllInSameNoSyncFn || CanIgnoreThreading(*Acc.getLocalInst());
};

// TODO: Use inter-procedural reachability and dominance.
Expand Down Expand Up @@ -1180,10 +1178,14 @@ struct AAPointerInfoImpl
if (FindInterferingWrites && Dominates)
HasBeenWrittenTo = true;

// Track if all interesting accesses are in the same `nosync` function as
// the given instruction.
AllInSameNoSyncFn &= Acc.getRemoteInst()->getFunction() == &Scope;

// For now we only filter accesses based on CFG reasoning which does not
// work yet if we have threading effects, or the access is complicated.
if (CanUseCFGResoning && Dominates && UseDominanceReasoning &&
IsSameThreadAsLoad(Acc))
IsSameThreadAsInst(Acc))
DominatingWrites.insert(&Acc);

InterferingAccesses.push_back({&Acc, Exact});
Expand All @@ -1196,6 +1198,8 @@ struct AAPointerInfoImpl
// the worst case quadratic as we are looking for another write that will
// hide the effect of this one.
auto CanSkipAccess = [&](const Access &Acc, bool Exact) {
if (!IsSameThreadAsInst(Acc))
return false;
if ((!Acc.isWriteOrAssumption() ||
!AA::isPotentiallyReachable(A, *Acc.getRemoteInst(), I, QueryingAA,
&ExclusionSet, IsLiveInCalleeCB)) &&
Expand All @@ -1206,8 +1210,6 @@ struct AAPointerInfoImpl

if (!DT || !UseDominanceReasoning)
return false;
if (!IsSameThreadAsLoad(Acc))
return false;
if (!DominatingWrites.count(&Acc))
return false;
for (const Access *DomAcc : DominatingWrites) {
Expand All @@ -1227,7 +1229,8 @@ struct AAPointerInfoImpl
// succeeded for all or not.
unsigned NumInterferingAccesses = InterferingAccesses.size();
for (auto &It : InterferingAccesses) {
if (!CanUseCFGResoning || NumInterferingAccesses > MaxInterferingAccesses ||
if (!AllInSameNoSyncFn ||
NumInterferingAccesses > MaxInterferingAccesses ||
!CanSkipAccess(*It.first, It.second)) {
if (!UserCB(*It.first, It.second))
return false;
Expand Down
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC

;; This function returns its second argument on all return statements
Expand Down
27 changes: 14 additions & 13 deletions llvm/test/Transforms/Attributor/internal-noalias.ll
Expand Up @@ -7,8 +7,8 @@ define dso_local i32 @visible(i32* noalias %A, i32* noalias %B) #0 {
; TUNIT-LABEL: define {{[^@]+}}@visible
; TUNIT-SAME: (i32* noalias nocapture nofree readonly [[A:%.*]], i32* noalias nocapture nofree readonly align 4 [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR4:[0-9]+]]
; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR4]]
; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR3:[0-9]+]]
; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR3]]
; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]]
; TUNIT-NEXT: ret i32 [[ADD]]
;
Expand Down Expand Up @@ -36,7 +36,7 @@ define private i32 @noalias_args(i32* %A, i32* %B) #0 {
; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR4]]
; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR3]]
; TUNIT-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[CALL]]
; TUNIT-NEXT: ret i32 [[ADD2]]
;
Expand Down Expand Up @@ -94,8 +94,8 @@ define dso_local i32 @visible_local(i32* %A) #0 {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4
; TUNIT-NEXT: store i32 5, i32* [[B]], align 4
; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR4]]
; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR4]]
; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR3]]
; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR3]]
; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]]
; TUNIT-NEXT: ret i32 [[ADD]]
;
Expand Down Expand Up @@ -158,10 +158,11 @@ define i32 @visible_local_2() {
}

define internal i32 @noalias_args_argmem_rn(i32* %A, i32* %B) #1 {
; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable
; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
; TUNIT-LABEL: define {{[^@]+}}@noalias_args_argmem_rn
; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR3:[0-9]+]] {
; TUNIT-NEXT: ret i32 undef
; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4
; TUNIT-NEXT: ret i32 [[T0]]
;
; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
; CGSCC-LABEL: define {{[^@]+}}@noalias_args_argmem_rn
Expand All @@ -180,8 +181,9 @@ define i32 @visible_local_3() {
; TUNIT-LABEL: define {{[^@]+}}@visible_local_3
; TUNIT-SAME: () #[[ATTR2]] {
; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4
; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR5:[0-9]+]]
; TUNIT-NEXT: ret i32 5
; TUNIT-NEXT: store i32 5, i32* [[B]], align 4
; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR4:[0-9]+]]
; TUNIT-NEXT: ret i32 [[CALL]]
;
; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none)
; CGSCC-LABEL: define {{[^@]+}}@visible_local_3
Expand All @@ -203,9 +205,8 @@ attributes #1 = { argmemonly noinline nounwind uwtable willreturn}
; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable }
; TUNIT: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable }
; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) }
; TUNIT: attributes #[[ATTR3]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable }
; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind }
; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn }
; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind }
; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn }
;.
; CGSCC: attributes #[[ATTR0]] = { nofree noinline nosync nounwind willreturn memory(argmem: read) uwtable }
; CGSCC: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable }
Expand Down
24 changes: 21 additions & 3 deletions llvm/test/Transforms/Attributor/value-simplify-assume.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC

@Gstatic_int1 = internal global i32 zeroinitializer, align 4
Expand Down Expand Up @@ -422,6 +422,9 @@ define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse {
; TUNIT-LABEL: define {{[^@]+}}@assume_3_nr
; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1
; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]]
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
Expand Down Expand Up @@ -472,6 +475,7 @@ define i1 @assume_4_nr(i1 %arg, i1 %cond) norecurse {
; TUNIT-LABEL: define {{[^@]+}}@assume_4_nr
; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
Expand Down Expand Up @@ -524,6 +528,9 @@ define i1 @assume_5_nr(i1 %arg, i1 %cond) norecurse {
; TUNIT-LABEL: define {{[^@]+}}@assume_5_nr
; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1
; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]]
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
Expand Down Expand Up @@ -592,7 +599,9 @@ define i1 @assume_5c_nr(i1 %cond) norecurse {
; TUNIT-LABEL: define {{[^@]+}}@assume_5c_nr
; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR6]]
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1
; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]]
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
Expand Down Expand Up @@ -1037,6 +1046,9 @@ define i1 @assume_3(i1 %arg, i1 %cond) {
; TUNIT-LABEL: define {{[^@]+}}@assume_3
; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1
; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]]
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
Expand Down Expand Up @@ -1087,6 +1099,7 @@ define i1 @assume_4(i1 %arg, i1 %cond) {
; TUNIT-LABEL: define {{[^@]+}}@assume_4
; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
Expand Down Expand Up @@ -1139,6 +1152,9 @@ define i1 @assume_5(i1 %arg, i1 %cond) {
; TUNIT-LABEL: define {{[^@]+}}@assume_5
; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1
; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1
; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]]
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
Expand Down Expand Up @@ -1207,7 +1223,9 @@ define i1 @assume_5c(i1 %cond) {
; TUNIT-LABEL: define {{[^@]+}}@assume_5c
; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1
; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR6]]
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1
; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]]
; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
; TUNIT: t:
; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1
Expand Down

0 comments on commit 07c3753

Please sign in to comment.