Skip to content

Commit

Permalink
[OpenMP][FIX] Ensure to determine aligned regions properly
Browse files Browse the repository at this point in the history
There were missing checks in the aligned region code, copy-paste errors
(= usage of the IsReachedFromAlignedBarrierOnly value instead of
IsReachingAlignedBarrierOnly value on the forward pass), and a missing
update of the call state for sync declarations and definitions.

Partially fixes #60425

(cherry picked from commit 578d507)
  • Loading branch information
jdoerfert authored and tstellar committed Feb 6, 2023
1 parent a2a85f2 commit bd00ad5
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 24 deletions.
24 changes: 17 additions & 7 deletions llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2682,7 +2682,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
const Instruction &I) const override {
assert(I.getFunction() == getAnchorScope() &&
"Instruction is out of scope!");
if (!isValidState() || isa<CallBase>(I))
if (!isValidState())
return false;

const Instruction *CurI;
Expand All @@ -2693,14 +2693,18 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
auto *CB = dyn_cast<CallBase>(CurI);
if (!CB)
continue;
if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
break;
}
const auto &It = CEDMap.find(CB);
if (It == CEDMap.end())
continue;
if (!It->getSecond().IsReachedFromAlignedBarrierOnly)
if (!It->getSecond().IsReachingAlignedBarrierOnly)
return false;
break;
} while ((CurI = CurI->getNextNonDebugInstruction()));

if (!CurI && !BEDMap.lookup(I.getParent()).IsReachedFromAlignedBarrierOnly)
if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly)
return false;

// Check backward until a call or the block beginning is reached.
Expand All @@ -2709,12 +2713,16 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
auto *CB = dyn_cast<CallBase>(CurI);
if (!CB)
continue;
if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
break;
}
const auto &It = CEDMap.find(CB);
if (It == CEDMap.end())
continue;
if (!AA::isNoSyncInst(A, *CB, *this)) {
if (It->getSecond().IsReachedFromAlignedBarrierOnly)
if (It->getSecond().IsReachedFromAlignedBarrierOnly) {
break;
}
return false;
}

Expand Down Expand Up @@ -3010,7 +3018,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
if (EDAA.getState().isValidState()) {
const auto &CalleeED = EDAA.getFunctionExecutionDomain();
ED.IsReachedFromAlignedBarrierOnly =
CalleeED.IsReachedFromAlignedBarrierOnly;
CallED.IsReachedFromAlignedBarrierOnly =
CalleeED.IsReachedFromAlignedBarrierOnly;
AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly;
if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly)
ED.EncounteredNonLocalSideEffect |=
Expand All @@ -3025,8 +3034,9 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
continue;
}
}
ED.IsReachedFromAlignedBarrierOnly =
IsNoSync && ED.IsReachedFromAlignedBarrierOnly;
if (!IsNoSync)
ED.IsReachedFromAlignedBarrierOnly =
CallED.IsReachedFromAlignedBarrierOnly = false;
AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly;
ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory();
if (!IsNoSync)
Expand Down
93 changes: 76 additions & 17 deletions llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,20 @@ target triple = "amdgcn-amd-amdhsa"

@G = internal addrspace(3) global i32 undef, align 4
@H = internal addrspace(3) global i32 undef, align 4
@X = internal addrspace(3) global i32 undef, align 4
@str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1

; Make sure we do not delete the stores to @G without also replacing the load with `1`.
;.
; TUNIT: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; TUNIT: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; TUNIT: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; TUNIT: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
; TUNIT: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
; CGSCC: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; CGSCC: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; CGSCC: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; CGSCC: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
;.
define void @kernel() "kernel" {
Expand All @@ -30,20 +33,17 @@ define void @kernel() "kernel" {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK: if.then:
; CHECK-NEXT: store i32 1, ptr addrspace(3) @G, align 4
; CHECK-NEXT: br label [[IF_MERGE:%.*]]
; CHECK: if.else:
; CHECK-NEXT: call void @barrier() #[[ATTR5:[0-9]+]]
; CHECK-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4
; CHECK-NEXT: call void @use1(i32 [[L]]) #[[ATTR5]]
; CHECK-NEXT: call void @barrier() #[[ATTR5]]
; CHECK-NEXT: call void @barrier() #[[ATTR6:[0-9]+]]
; CHECK-NEXT: call void @use1(i32 undef) #[[ATTR6]]
; CHECK-NEXT: call void @barrier() #[[ATTR6]]
; CHECK-NEXT: br label [[IF_MERGE]]
; CHECK: if.merge:
; CHECK-NEXT: call void @use1(i32 2) #[[ATTR5]]
; CHECK-NEXT: call void @use1(i32 2) #[[ATTR6]]
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
; CHECK: if.then2:
; CHECK-NEXT: store i32 2, ptr addrspace(3) @G, align 4
; CHECK-NEXT: call void @barrier() #[[ATTR5]]
; CHECK-NEXT: call void @barrier() #[[ATTR6]]
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1)
Expand Down Expand Up @@ -87,31 +87,90 @@ define void @test_assume() {
ret void
}

; We can't ignore the sync, hence this might store 2 into %p
define void @kernel2(ptr %p) "kernel" {
; CHECK-LABEL: define {{[^@]+}}@kernel2
; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: store i32 1, ptr addrspace(3) @X, align 4
; CHECK-NEXT: call void @sync()
; CHECK-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
; CHECK-NEXT: store i32 2, ptr addrspace(3) @X, align 4
; CHECK-NEXT: store i32 [[V]], ptr [[P]], align 4
; CHECK-NEXT: ret void
;
store i32 1, ptr addrspace(3) @X
call void @sync()
%v = load i32, ptr addrspace(3) @X
store i32 2, ptr addrspace(3) @X
store i32 %v, ptr %p
ret void
}

; We can't ignore the sync, hence this might store 2 into %p
define void @kernel3(ptr %p) "kernel" {
; TUNIT-LABEL: define {{[^@]+}}@kernel3
; TUNIT-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: store i32 1, ptr addrspace(3) @X, align 4
; TUNIT-NEXT: call void @sync_def.internalized()
; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
; TUNIT-NEXT: store i32 2, ptr addrspace(3) @X, align 4
; TUNIT-NEXT: store i32 [[V]], ptr [[P]], align 4
; TUNIT-NEXT: ret void
;
; CGSCC-LABEL: define {{[^@]+}}@kernel3
; CGSCC-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
; CGSCC-NEXT: store i32 1, ptr addrspace(3) @X, align 4
; CGSCC-NEXT: call void @sync_def()
; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
; CGSCC-NEXT: store i32 2, ptr addrspace(3) @X, align 4
; CGSCC-NEXT: store i32 [[V]], ptr [[P]], align 4
; CGSCC-NEXT: ret void
;
store i32 1, ptr addrspace(3) @X
call void @sync_def()
%v = load i32, ptr addrspace(3) @X
store i32 2, ptr addrspace(3) @X
store i32 %v, ptr %p
ret void
}

define void @sync_def() {
; CHECK-LABEL: define {{[^@]+}}@sync_def() {
; CHECK-NEXT: call void @sync()
; CHECK-NEXT: ret void
;
call void @sync()
ret void
}

declare void @sync()
declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier"
declare void @use1(i32) nosync norecurse nounwind nocallback
declare i32 @__kmpc_target_init(ptr, i8, i1) nocallback
declare void @__kmpc_target_deinit(ptr, i8) nocallback
declare void @llvm.assume(i1)

!llvm.module.flags = !{!0, !1}
!nvvm.annotations = !{!2}
!nvvm.annotations = !{!2, !3, !4}

!0 = !{i32 7, !"openmp", i32 50}
!1 = !{i32 7, !"openmp-device", i32 50}
!2 = !{ptr @kernel, !"kernel", i32 1}
!3 = !{ptr @kernel2, !"kernel", i32 1}
!4 = !{ptr @kernel3, !"kernel", i32 1}

;.
; CHECK: attributes #[[ATTR0]] = { norecurse "kernel" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
; CHECK: attributes #[[ATTR5]] = { nounwind }
; CHECK: attributes #[[ATTR1]] = { "kernel" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
; CHECK: attributes #[[ATTR6]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
; CHECK: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
; CHECK: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1}
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CGSCC: {{.*}}
; TUNIT: {{.*}}

0 comments on commit bd00ad5

Please sign in to comment.