Skip to content

Commit

Permalink
[OpenMP] Use AAAssumptionInfo to get assumptions in OpenMPOpt
Browse files Browse the repository at this point in the history
This patch uses the abstract attributor introduced in D111054 to get the
assumption values instead of the `hasAssumption` function. This also
calls it so assumption information should propagate throug the device
where applicabile.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D111445
  • Loading branch information
jhuber6 committed Nov 9, 2021
1 parent b8a825b commit e52937e
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 34 deletions.
12 changes: 5 additions & 7 deletions llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Expand Up @@ -3844,13 +3844,11 @@ struct AAKernelInfoCallSite : AAKernelInfo {
CallBase &CB = cast<CallBase>(getAssociatedValue());
Function *Callee = getAssociatedFunction();

// Helper to lookup an assumption string.
auto HasAssumption = [](CallBase &CB, StringRef AssumptionStr) {
return hasAssumption(CB, AssumptionStr);
};
auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
*this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);

// Check for SPMD-mode assumptions.
if (HasAssumption(CB, "ompx_spmd_amenable")) {
if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) {
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
indicateOptimisticFixpoint();
}
Expand All @@ -3875,8 +3873,8 @@ struct AAKernelInfoCallSite : AAKernelInfo {

// Unknown callees might contain parallel regions, except if they have
// an appropriate assumption attached.
if (!(HasAssumption(CB, "omp_no_openmp") ||
HasAssumption(CB, "omp_no_parallelism")))
if (!(AssumptionAA.hasAssumption("omp_no_openmp") ||
AssumptionAA.hasAssumption("omp_no_parallelism")))
ReachedUnknownParallelRegions.insert(&CB);

// If SPMDCompatibilityTracker is not fixed, we need to give up on the
Expand Down
75 changes: 48 additions & 27 deletions llvm/test/Transforms/OpenMP/spmdization.ll
Expand Up @@ -199,13 +199,11 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_l5() #0 {
; AMDGPU-DISABLED: worker_state_machine.is_active.check:
; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; AMDGPU-DISABLED: worker_state_machine.parallel_region.check:
; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*)
; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
; AMDGPU-DISABLED-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute:
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute:
; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
; AMDGPU-DISABLED: worker_state_machine.parallel_region.check1:
; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; AMDGPU-DISABLED: worker_state_machine.parallel_region.end:
; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
Expand Down Expand Up @@ -247,13 +245,11 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_l5() #0 {
; NVPTX-DISABLED: worker_state_machine.is_active.check:
; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; NVPTX-DISABLED: worker_state_machine.parallel_region.check:
; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*)
; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
; NVPTX-DISABLED-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; NVPTX-DISABLED: worker_state_machine.parallel_region.execute:
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute:
; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
; NVPTX-DISABLED: worker_state_machine.parallel_region.check1:
; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; NVPTX-DISABLED: worker_state_machine.parallel_region.end:
; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel()
Expand Down Expand Up @@ -322,7 +318,7 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias
; AMDGPU-NEXT: store i32 [[INC]], i32* [[I]], align 4
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
; AMDGPU: for.end:
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-NEXT: call void @indirection() #[[ATTR7:[0-9]+]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
Expand Down Expand Up @@ -350,7 +346,7 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias
; NVPTX-NEXT: store i32 [[INC]], i32* [[I]], align 4
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
; NVPTX: for.end:
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-NEXT: call void @indirection() #[[ATTR7:[0-9]+]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
Expand Down Expand Up @@ -378,7 +374,7 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias
; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4
; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
; AMDGPU-DISABLED: for.end:
; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-DISABLED-NEXT: call void @indirection() #[[ATTR7:[0-9]+]]
; AMDGPU-DISABLED-NEXT: ret void
;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
Expand Down Expand Up @@ -406,7 +402,7 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias
; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4
; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
; NVPTX-DISABLED: for.end:
; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-DISABLED-NEXT: call void @indirection() #[[ATTR7:[0-9]+]]
; NVPTX-DISABLED-NEXT: ret void
;
entry:
Expand Down Expand Up @@ -438,7 +434,32 @@ for.inc: ; preds = %for.body
br label %for.cond, !llvm.loop !13

for.end: ; preds = %for.cond
call void @spmd_amenable() #4
call void @indirection() #4
ret void
}

define internal void @indirection() {
; AMDGPU-LABEL: define {{[^@]+}}@indirection
; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR1]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@indirection
; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR1]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@indirection
; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR1]]
; AMDGPU-DISABLED-NEXT: ret void
;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@indirection
; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR1]]
; NVPTX-DISABLED-NEXT: ret void
;
call void @spmd_amenable()
ret void
}

Expand Down Expand Up @@ -564,7 +585,7 @@ declare void @__kmpc_get_shared_variables(i8***)

declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)

declare void @spmd_amenable() #2
declare void @spmd_amenable()

declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3

Expand Down Expand Up @@ -887,7 +908,7 @@ for.end: ; preds = %for.cond

declare i8* @__kmpc_alloc_shared(i64) #3

declare void @use(i32* nocapture) #2
declare void @use(i32* nocapture)

define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
;
Expand Down Expand Up @@ -2257,39 +2278,39 @@ attributes #5 = { convergent }
!17 = distinct !{!17, !14}
;.
; AMDGPU: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; AMDGPU: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; AMDGPU: attributes #[[ATTR2:[0-9]+]] = { alwaysinline }
; AMDGPU: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; AMDGPU: attributes #[[ATTR1]] = { "llvm.assume"="ompx_spmd_amenable" }
; AMDGPU: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; AMDGPU: attributes #[[ATTR3:[0-9]+]] = { alwaysinline }
; AMDGPU: attributes #[[ATTR4]] = { nounwind }
; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind }
; AMDGPU: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
; AMDGPU: attributes #[[ATTR8]] = { convergent }
;.
; NVPTX: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; NVPTX: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; NVPTX: attributes #[[ATTR2:[0-9]+]] = { alwaysinline }
; NVPTX: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; NVPTX: attributes #[[ATTR1]] = { "llvm.assume"="ompx_spmd_amenable" }
; NVPTX: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; NVPTX: attributes #[[ATTR3:[0-9]+]] = { alwaysinline }
; NVPTX: attributes #[[ATTR4]] = { nounwind }
; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
; NVPTX: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind }
; NVPTX: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
; NVPTX: attributes #[[ATTR8]] = { convergent }
;.
; AMDGPU-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; AMDGPU-DISABLED: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; AMDGPU-DISABLED: attributes #[[ATTR2:[0-9]+]] = { alwaysinline }
; AMDGPU-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; AMDGPU-DISABLED: attributes #[[ATTR1]] = { "llvm.assume"="ompx_spmd_amenable" }
; AMDGPU-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; AMDGPU-DISABLED: attributes #[[ATTR3:[0-9]+]] = { alwaysinline }
; AMDGPU-DISABLED: attributes #[[ATTR4]] = { nounwind }
; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind }
; AMDGPU-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent }
;.
; NVPTX-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; NVPTX-DISABLED: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; NVPTX-DISABLED: attributes #[[ATTR2:[0-9]+]] = { alwaysinline }
; NVPTX-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; NVPTX-DISABLED: attributes #[[ATTR1]] = { "llvm.assume"="ompx_spmd_amenable" }
; NVPTX-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
; NVPTX-DISABLED: attributes #[[ATTR3:[0-9]+]] = { alwaysinline }
; NVPTX-DISABLED: attributes #[[ATTR4]] = { nounwind }
; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind }
Expand Down

0 comments on commit e52937e

Please sign in to comment.