156 changes: 78 additions & 78 deletions llvm/test/Transforms/OpenMP/barrier_removal.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ declare void @llvm.assume(i1)
;.
; CHECK: @[[GC1:[a-zA-Z0-9_$"\\.-]+]] = constant i32 42
; CHECK: @[[GC2:[a-zA-Z0-9_$"\\.-]+]] = addrspace(4) global i32 0
; CHECK: @[[GPTR4:[a-zA-Z0-9_$"\\.-]+]] = addrspace(4) global i32 addrspace(4)* null
; CHECK: @[[GPTR4:[a-zA-Z0-9_$"\\.-]+]] = addrspace(4) global ptr addrspace(4) null
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = global i32 42
; CHECK: @[[GS:[a-zA-Z0-9_$"\\.-]+]] = addrspace(3) global i32 0
; CHECK: @[[GPTR:[a-zA-Z0-9_$"\\.-]+]] = global i32* null
; CHECK: @[[GPTR:[a-zA-Z0-9_$"\\.-]+]] = global ptr null
; CHECK: @[[PG1:[a-zA-Z0-9_$"\\.-]+]] = thread_local global i32 42
; CHECK: @[[PG2:[a-zA-Z0-9_$"\\.-]+]] = addrspace(5) global i32 0
; CHECK: @[[GPTR5:[a-zA-Z0-9_$"\\.-]+]] = global i32 addrspace(5)* null
; CHECK: @[[GPTR5:[a-zA-Z0-9_$"\\.-]+]] = global ptr addrspace(5) null
; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global i32 42
; CHECK: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global i32 0
;.
Expand Down Expand Up @@ -95,28 +95,28 @@ define void @neg_empty_2() {

@GC1 = constant i32 42
@GC2 = addrspace(4) global i32 0
@GPtr4 = addrspace(4) global i32 addrspace(4)* null
@GPtr4 = addrspace(4) global ptr addrspace(4) null
define void @pos_constant_loads() {
; CHECK-LABEL: define {{[^@]+}}@pos_constant_loads() {
; CHECK-NEXT: [[ARG:%.*]] = load i32 addrspace(4)*, i32 addrspace(4)** addrspacecast (i32 addrspace(4)* addrspace(4)* @GPtr4 to i32 addrspace(4)**), align 8
; CHECK-NEXT: [[B:%.*]] = load i32, i32* addrspacecast (i32 addrspace(4)* @GC2 to i32*), align 4
; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast i32 addrspace(4)* [[ARG]] to i32*
; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[ARGC]], align 4
; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(4), ptr addrspacecast (ptr addrspace(4) @GPtr4 to ptr), align 8
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @GC2 to ptr), align 4
; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr
; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[ARGC]], align 4
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[D:%.*]] = add i32 42, [[B]]
; CHECK-NEXT: [[E:%.*]] = add i32 [[D]], [[C]]
; CHECK-NEXT: call void @useI32(i32 [[E]])
; CHECK-NEXT: ret void
;
%GPtr4c = addrspacecast i32 addrspace(4)*addrspace(4)* @GPtr4 to i32 addrspace(4)**
%arg = load i32 addrspace(4)*, i32 addrspace(4)** %GPtr4c
%a = load i32, i32* @GC1
%GPtr4c = addrspacecast ptr addrspace(4) @GPtr4 to ptr
%arg = load ptr addrspace(4), ptr %GPtr4c
%a = load i32, ptr @GC1
call void @aligned_barrier()
%GC2c = addrspacecast i32 addrspace(4)* @GC2 to i32*
%b = load i32, i32* %GC2c
%GC2c = addrspacecast ptr addrspace(4) @GC2 to ptr
%b = load i32, ptr %GC2c
call void @aligned_barrier()
%argc = addrspacecast i32 addrspace(4)* %arg to i32*
%c = load i32, i32* %argc
%argc = addrspacecast ptr addrspace(4) %arg to ptr
%c = load i32, ptr %argc
call void @aligned_barrier()
%d = add i32 %a, %b
%e = add i32 %d, %c
Expand All @@ -125,29 +125,29 @@ define void @pos_constant_loads() {
}
@G = global i32 42
@GS = addrspace(3) global i32 0
@GPtr = global i32* null
@GPtr = global ptr null
; TODO: We could remove some of the barriers due to the lack of write effects.
define void @neg_loads() {
; CHECK-LABEL: define {{[^@]+}}@neg_loads() {
; CHECK-NEXT: [[ARG:%.*]] = load i32*, i32** @GPtr, align 8
; CHECK-NEXT: [[A:%.*]] = load i32, i32* @G, align 4
; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8
; CHECK-NEXT: [[A:%.*]] = load i32, ptr @G, align 4
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[B:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @GS to i32*), align 4
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @GS to ptr), align 4
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[ARG]], align 4
; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[ARG]], align 4
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[D:%.*]] = add i32 [[A]], [[B]]
; CHECK-NEXT: [[E:%.*]] = add i32 [[D]], [[C]]
; CHECK-NEXT: call void @useI32(i32 [[E]])
; CHECK-NEXT: ret void
;
%arg = load i32*, i32** @GPtr
%a = load i32, i32* @G
%arg = load ptr, ptr @GPtr
%a = load i32, ptr @G
call void @aligned_barrier()
%GSc = addrspacecast i32 addrspace(3)* @GS to i32*
%b = load i32, i32* %GSc
%GSc = addrspacecast ptr addrspace(3) @GS to ptr
%b = load i32, ptr %GSc
call void @aligned_barrier()
%c = load i32, i32* %arg
%c = load i32, ptr %arg
call void @aligned_barrier()
%d = add i32 %a, %b
%e = add i32 %d, %c
Expand All @@ -156,58 +156,58 @@ define void @neg_loads() {
}
@PG1 = thread_local global i32 42
@PG2 = addrspace(5) global i32 0
@GPtr5 = global i32 addrspace(5)* null
@GPtr5 = global ptr addrspace(5) null
define void @pos_priv_mem() {
; CHECK-LABEL: define {{[^@]+}}@pos_priv_mem() {
; CHECK-NEXT: [[ARG:%.*]] = load i32 addrspace(5)*, i32 addrspace(5)** @GPtr5, align 8
; CHECK-NEXT: [[ARG:%.*]] = load ptr addrspace(5), ptr @GPtr5, align 8
; CHECK-NEXT: [[LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A:%.*]] = load i32, i32* @PG1, align 4
; CHECK-NEXT: store i32 [[A]], i32* [[LOC]], align 4
; CHECK-NEXT: [[B:%.*]] = load i32, i32* addrspacecast (i32 addrspace(5)* @PG2 to i32*), align 4
; CHECK-NEXT: [[A:%.*]] = load i32, ptr @PG1, align 4
; CHECK-NEXT: store i32 [[A]], ptr [[LOC]], align 4
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(5) @PG2 to ptr), align 4
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast i32 addrspace(5)* [[ARG]] to i32*
; CHECK-NEXT: store i32 [[B]], i32* [[ARGC]], align 4
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[LOC]], align 4
; CHECK-NEXT: store i32 [[V]], i32* @PG1, align 4
; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast ptr addrspace(5) [[ARG]] to ptr
; CHECK-NEXT: store i32 [[B]], ptr [[ARGC]], align 4
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[LOC]], align 4
; CHECK-NEXT: store i32 [[V]], ptr @PG1, align 4
; CHECK-NEXT: ret void
;
%arg = load i32 addrspace(5)*, i32 addrspace(5)** @GPtr5
%arg = load ptr addrspace(5), ptr @GPtr5
%loc = alloca i32
%a = load i32, i32* @PG1
%a = load i32, ptr @PG1
call void @aligned_barrier()
store i32 %a, i32* %loc
%PG2c = addrspacecast i32 addrspace(5)* @PG2 to i32*
%b = load i32, i32* %PG2c
store i32 %a, ptr %loc
%PG2c = addrspacecast ptr addrspace(5) @PG2 to ptr
%b = load i32, ptr %PG2c
call void @aligned_barrier()
%argc = addrspacecast i32 addrspace(5)* %arg to i32*
store i32 %b, i32* %argc
%argc = addrspacecast ptr addrspace(5) %arg to ptr
store i32 %b, ptr %argc
call void @aligned_barrier()
%v = load i32, i32* %loc
store i32 %v, i32* @PG1
%v = load i32, ptr %loc
store i32 %v, ptr @PG1
call void @aligned_barrier()
ret void
}
@G1 = global i32 42
@G2 = addrspace(1) global i32 0
define void @neg_mem() {
; CHECK-LABEL: define {{[^@]+}}@neg_mem() {
; CHECK-NEXT: [[ARG:%.*]] = load i32*, i32** @GPtr, align 8
; CHECK-NEXT: [[A:%.*]] = load i32, i32* @G1, align 4
; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8
; CHECK-NEXT: [[A:%.*]] = load i32, ptr @G1, align 4
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: store i32 [[A]], i32* [[ARG]], align 4
; CHECK-NEXT: store i32 [[A]], ptr [[ARG]], align 4
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[B:%.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @G2 to i32*), align 4
; CHECK-NEXT: store i32 [[B]], i32* @G1, align 4
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @G2 to ptr), align 4
; CHECK-NEXT: store i32 [[B]], ptr @G1, align 4
; CHECK-NEXT: ret void
;
%arg = load i32*, i32** @GPtr
%a = load i32, i32* @G1
%arg = load ptr, ptr @GPtr
%a = load i32, ptr @G1
call void @aligned_barrier()
store i32 %a, i32* %arg
store i32 %a, ptr %arg
call void @aligned_barrier()
%G2c = addrspacecast i32 addrspace(1)* @G2 to i32*
%b = load i32, i32* %G2c
store i32 %b, i32* @G1
%G2c = addrspacecast ptr addrspace(1) @G2 to ptr
%b = load i32, ptr %G2c
store i32 %b, ptr @G1
call void @aligned_barrier()
ret void
}
Expand All @@ -231,18 +231,18 @@ define void @pos_multiple() {
!llvm.module.flags = !{!12,!13}
!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11}

!0 = !{void ()* @pos_empty_1, !"kernel", i32 1}
!1 = !{void ()* @pos_empty_2, !"kernel", i32 1}
!2 = !{void ()* @pos_empty_3, !"kernel", i32 1}
!3 = !{void ()* @pos_empty_4, !"kernel", i32 1}
!4 = !{void ()* @pos_empty_5, !"kernel", i32 1}
!5 = !{void ()* @pos_empty_6, !"kernel", i32 1}
!6 = !{void ()* @neg_empty_7, !"kernel", i32 1}
!7 = !{void ()* @pos_constant_loads, !"kernel", i32 1}
!8 = !{void ()* @neg_loads, !"kernel", i32 1}
!9 = !{void ()* @pos_priv_mem, !"kernel", i32 1}
!10 = !{void ()* @neg_mem, !"kernel", i32 1}
!11 = !{void ()* @pos_multiple, !"kernel", i32 1}
!0 = !{ptr @pos_empty_1, !"kernel", i32 1}
!1 = !{ptr @pos_empty_2, !"kernel", i32 1}
!2 = !{ptr @pos_empty_3, !"kernel", i32 1}
!3 = !{ptr @pos_empty_4, !"kernel", i32 1}
!4 = !{ptr @pos_empty_5, !"kernel", i32 1}
!5 = !{ptr @pos_empty_6, !"kernel", i32 1}
!6 = !{ptr @neg_empty_7, !"kernel", i32 1}
!7 = !{ptr @pos_constant_loads, !"kernel", i32 1}
!8 = !{ptr @neg_loads, !"kernel", i32 1}
!9 = !{ptr @pos_priv_mem, !"kernel", i32 1}
!10 = !{ptr @neg_mem, !"kernel", i32 1}
!11 = !{ptr @pos_multiple, !"kernel", i32 1}
!12 = !{i32 7, !"openmp", i32 50}
!13 = !{i32 7, !"openmp-device", i32 50}
;.
Expand All @@ -253,16 +253,16 @@ define void @pos_multiple() {
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK: [[META2:![0-9]+]] = !{void ()* @pos_empty_1, !"kernel", i32 1}
; CHECK: [[META3:![0-9]+]] = !{void ()* @pos_empty_2, !"kernel", i32 1}
; CHECK: [[META4:![0-9]+]] = !{void ()* @pos_empty_3, !"kernel", i32 1}
; CHECK: [[META5:![0-9]+]] = !{void ()* @pos_empty_4, !"kernel", i32 1}
; CHECK: [[META6:![0-9]+]] = !{void ()* @pos_empty_5, !"kernel", i32 1}
; CHECK: [[META7:![0-9]+]] = !{void ()* @pos_empty_6, !"kernel", i32 1}
; CHECK: [[META8:![0-9]+]] = !{void ()* @neg_empty_7, !"kernel", i32 1}
; CHECK: [[META9:![0-9]+]] = !{void ()* @pos_constant_loads, !"kernel", i32 1}
; CHECK: [[META10:![0-9]+]] = !{void ()* @neg_loads, !"kernel", i32 1}
; CHECK: [[META11:![0-9]+]] = !{void ()* @pos_priv_mem, !"kernel", i32 1}
; CHECK: [[META12:![0-9]+]] = !{void ()* @neg_mem, !"kernel", i32 1}
; CHECK: [[META13:![0-9]+]] = !{void ()* @pos_multiple, !"kernel", i32 1}
; CHECK: [[META2:![0-9]+]] = !{ptr @pos_empty_1, !"kernel", i32 1}
; CHECK: [[META3:![0-9]+]] = !{ptr @pos_empty_2, !"kernel", i32 1}
; CHECK: [[META4:![0-9]+]] = !{ptr @pos_empty_3, !"kernel", i32 1}
; CHECK: [[META5:![0-9]+]] = !{ptr @pos_empty_4, !"kernel", i32 1}
; CHECK: [[META6:![0-9]+]] = !{ptr @pos_empty_5, !"kernel", i32 1}
; CHECK: [[META7:![0-9]+]] = !{ptr @pos_empty_6, !"kernel", i32 1}
; CHECK: [[META8:![0-9]+]] = !{ptr @neg_empty_7, !"kernel", i32 1}
; CHECK: [[META9:![0-9]+]] = !{ptr @pos_constant_loads, !"kernel", i32 1}
; CHECK: [[META10:![0-9]+]] = !{ptr @neg_loads, !"kernel", i32 1}
; CHECK: [[META11:![0-9]+]] = !{ptr @pos_priv_mem, !"kernel", i32 1}
; CHECK: [[META12:![0-9]+]] = !{ptr @neg_mem, !"kernel", i32 1}
; CHECK: [[META13:![0-9]+]] = !{ptr @pos_multiple, !"kernel", i32 1}
;.
464 changes: 190 additions & 274 deletions llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll

Large diffs are not rendered by default.

35 changes: 17 additions & 18 deletions llvm/test/Transforms/OpenMP/parallel_deletion_cg_update.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@
; CHECK-NEXT: CS<None> calls function 'dead_fork_call'
; CHECK-NEXT: CS<None> calls function '__kmpc_fork_call'
; CHECK-NEXT: CS<None> calls function 'live_fork_call'
; CHECK-NEXT: CS<None> calls function '.omp_outlined..1'
; CHECK-NEXT: CS<None> calls function 'd'
;
; CHECK: Call graph node for function: '.omp_outlined..0'<<{{.*}}>> #uses=0
;
; CHECK: Call graph node for function: '.omp_outlined..1'<<{{.*}}>> #uses=3
; CHECK: Call graph node for function: '.omp_outlined..1'<<{{.*}}>> #uses=2
; CHECK: CS<{{.*}}> calls function 'd'
;
; CHECK: Call graph node for function: '__kmpc_fork_call'<<{{.*}}>> #uses=3
Expand All @@ -29,10 +28,10 @@
; CHECK: CS<None> calls function '.omp_outlined..1'


%struct.ident_t = type { i32, i32, i32, i32, i8* }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8

define dso_local void @dead_fork_call() {
entry:
Expand All @@ -43,7 +42,7 @@ if.then: ; preds = %entry

if.else: ; preds = %entry
call void @dead_fork_call2()
call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*))
call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @0, i32 0, ptr @.omp_outlined..0)
br label %if.end

if.end: ; preds = %if.else, %if.then
Expand All @@ -52,33 +51,33 @@ if.end: ; preds = %if.else, %if.then

define internal void @dead_fork_call2() {
entry:
call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @0, i32 0, ptr @.omp_outlined..1)
ret void
}

define internal void @.omp_outlined..0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
define internal void @.omp_outlined..0(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
entry:
%.global_tid..addr = alloca i32*, align 8
%.bound_tid..addr = alloca i32*, align 8
store i32* %.global_tid., i32** %.global_tid..addr, align 8
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
%.global_tid..addr = alloca ptr, align 8
%.bound_tid..addr = alloca ptr, align 8
store ptr %.global_tid., ptr %.global_tid..addr, align 8
store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
ret void
}

declare !callback !2 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
declare !callback !2 void @__kmpc_fork_call(ptr, i32, ptr, ...)

define dso_local void @live_fork_call() {
entry:
call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @0, i32 0, ptr @.omp_outlined..1)
ret void
}

define internal void @.omp_outlined..1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
define internal void @.omp_outlined..1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
entry:
%.global_tid..addr = alloca i32*, align 8
%.bound_tid..addr = alloca i32*, align 8
store i32* %.global_tid., i32** %.global_tid..addr, align 8
store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
%.global_tid..addr = alloca ptr, align 8
%.bound_tid..addr = alloca ptr, align 8
store ptr %.global_tid., ptr %.global_tid..addr, align 8
store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
call void (...) @d()
ret void
}
Expand Down