518 changes: 243 additions & 275 deletions llvm/test/Transforms/OpenMP/parallel_deletion.ll

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ source_filename = "parallel_deletion_remarks.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

%struct.ident_t = type { i32, i32, i32, i32, i8* }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8

; void delete_parallel(void) {
; #pragma omp parallel
Expand All @@ -26,14 +26,14 @@ target triple = "x86_64-pc-linux-gnu"
; CHECK: remark: parallel_deletion_remarks.c:12:1: Removing parallel region with no side-effects.
; CHECK: remark: parallel_deletion_remarks.c:14:1: Removing parallel region with no side-effects.
define dso_local void @delete_parallel() local_unnamed_addr !dbg !15 {
call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)), !dbg !18
call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)), !dbg !19
call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)), !dbg !20
call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)), !dbg !21
call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.), !dbg !18
call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..2), !dbg !19
call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..4), !dbg !20
call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..6), !dbg !21
ret void, !dbg !22
}

declare !callback !23 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr
declare !callback !23 void @__kmpc_fork_call(ptr, i32, ptr, ...) local_unnamed_addr

; Function Attrs: willreturn
declare !dbg !4 void @unknown_willreturn(...) #0
Expand All @@ -44,22 +44,22 @@ declare !dbg !7 void @readonly_willreturn(...) #1
; Function Attrs: readnone willreturn
declare !dbg !8 void @readnone_willreturn(...) #2

define internal void @.omp_outlined.(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !25 {
define internal void @.omp_outlined.(ptr noalias nocapture readnone %0, ptr noalias nocapture readnone %1) !dbg !25 {
call void (...) @unknown_willreturn(), !dbg !36
ret void, !dbg !36
}

define internal void @.omp_outlined..2(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !37 {
define internal void @.omp_outlined..2(ptr noalias nocapture readnone %0, ptr noalias nocapture readnone %1) !dbg !37 {
call void (...) @readonly_willreturn(), !dbg !41
ret void, !dbg !41
}

define internal void @.omp_outlined..4(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !42 {
define internal void @.omp_outlined..4(ptr noalias nocapture readnone %0, ptr noalias nocapture readnone %1) !dbg !42 {
call void (...) @readnone_willreturn(), !dbg !46
ret void, !dbg !46
}

define internal void @.omp_outlined..6(i32* noalias nocapture %0, i32* noalias nocapture %1) !dbg !47 {
define internal void @.omp_outlined..6(ptr noalias nocapture %0, ptr noalias nocapture %1) !dbg !47 {
ret void, !dbg !51
}

Expand Down
66 changes: 33 additions & 33 deletions llvm/test/Transforms/OpenMP/parallel_level_fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,74 +2,74 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"

%struct.ident_t = type { i32, i32, i32, i32, i8* }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@no_spmd_exec_mode = weak constant i8 1
@spmd_exec_mode = weak constant i8 0
@parallel_exec_mode = weak constant i8 0
@G = external global i8
@llvm.compiler.used = appending global [3 x i8*] [i8* @no_spmd_exec_mode, i8* @spmd_exec_mode, i8* @parallel_exec_mode], section "llvm.metadata"
@llvm.compiler.used = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata"

;.
; CHECK: @[[NO_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[PARALLEL_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x i8*] [i8* @no_spmd_exec_mode, i8* @spmd_exec_mode, i8* @parallel_exec_mode], section "llvm.metadata"
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata"
;.
define weak void @none_spmd() {
; CHECK-LABEL: define {{[^@]+}}@none_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
; CHECK-NEXT: call void @none_spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
call void @none_spmd_helper()
call void @mixed_helper()
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}

define weak void @spmd() {
; CHECK-LABEL: define {{[^@]+}}@spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
%i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
call void @spmd_helper()
call void @mixed_helper()
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
call void @__kmpc_target_deinit(ptr null, i8 2)
ret void
}

define weak void @parallel() {
; CHECK-LABEL: define {{[^@]+}}@parallel() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
%i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
call void @spmd_helper()
call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0)
call void @__kmpc_target_deinit(ptr null, i8 2)
ret void
}

define internal void @mixed_helper() {
; CHECK-LABEL: define {{[^@]+}}@mixed_helper() {
; CHECK-NEXT: [[LEVEL:%.*]] = call i8 @__kmpc_parallel_level()
; CHECK-NEXT: store i8 [[LEVEL]], i8* @G, align 1
; CHECK-NEXT: store i8 [[LEVEL]], ptr @G, align 1
; CHECK-NEXT: ret void
;
%level = call i8 @__kmpc_parallel_level()
store i8 %level, i8* @G
store i8 %level, ptr @G
ret void
}

Expand Down Expand Up @@ -98,17 +98,17 @@ f:

define internal void @spmd_helper() {
; CHECK-LABEL: define {{[^@]+}}@spmd_helper() {
; CHECK-NEXT: store i8 1, i8* @G, align 1
; CHECK-NEXT: store i8 1, ptr @G, align 1
; CHECK-NEXT: ret void
;
%level = call i8 @__kmpc_parallel_level()
store i8 %level, i8* @G
store i8 %level, ptr @G
ret void
}

define internal void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) {
define internal void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) {
; CHECK-LABEL: define {{[^@]+}}@__kmpc_parallel_51
; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]], i8* [[TMP5:%.*]], i8* [[TMP6:%.*]], i8** [[TMP7:%.*]], i64 [[TMP8:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]], ptr [[TMP7:%.*]], i64 [[TMP8:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: call void @parallel_helper()
; CHECK-NEXT: ret void
;
Expand All @@ -119,34 +119,34 @@ define internal void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i
define internal void @parallel_helper() {
; CHECK-LABEL: define {{[^@]+}}@parallel_helper() {
; CHECK-NEXT: [[LEVEL:%.*]] = call i8 @__kmpc_parallel_level()
; CHECK-NEXT: store i8 [[LEVEL]], i8* @G, align 1
; CHECK-NEXT: store i8 [[LEVEL]], ptr @G, align 1
; CHECK-NEXT: ret void
;
%level = call i8 @__kmpc_parallel_level()
store i8 %level, i8* @G
store i8 %level, ptr @G
ret void
}

declare void @foo()
declare void @bar()
declare i8 @__kmpc_parallel_level()
declare i32 @__kmpc_target_init(%struct.ident_t*, i8 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8 zeroext) #1
declare i32 @__kmpc_target_init(ptr, i8 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(ptr nocapture readnone, i8 zeroext) #1

!llvm.module.flags = !{!0, !1}
!nvvm.annotations = !{!2, !3, !4}

!0 = !{i32 7, !"openmp", i32 50}
!1 = !{i32 7, !"openmp-device", i32 50}
!2 = !{void ()* @none_spmd, !"kernel", i32 1}
!3 = !{void ()* @spmd, !"kernel", i32 1}
!4 = !{void ()* @parallel, !"kernel", i32 1}
!2 = !{ptr @none_spmd, !"kernel", i32 1}
!3 = !{ptr @spmd, !"kernel", i32 1}
!4 = !{ptr @parallel, !"kernel", i32 1}
;.
; CHECK: attributes #[[ATTR0]] = { alwaysinline }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK: [[META2:![0-9]+]] = !{void ()* @none_spmd, !"kernel", i32 1}
; CHECK: [[META3:![0-9]+]] = !{void ()* @spmd, !"kernel", i32 1}
; CHECK: [[META4:![0-9]+]] = !{void ()* @parallel, !"kernel", i32 1}
; CHECK: [[META2:![0-9]+]] = !{ptr @none_spmd, !"kernel", i32 1}
; CHECK: [[META3:![0-9]+]] = !{ptr @spmd, !"kernel", i32 1}
; CHECK: [[META4:![0-9]+]] = !{ptr @parallel, !"kernel", i32 1}
;.
3,754 changes: 1,866 additions & 1,888 deletions llvm/test/Transforms/OpenMP/parallel_region_merging.ll

Large diffs are not rendered by default.

126 changes: 63 additions & 63 deletions llvm/test/Transforms/OpenMP/remove_globalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,57 +12,57 @@ target triple = "nvptx64"
; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
; UTC_ARGS: --enable

@S = external local_unnamed_addr global i8*
@S = external local_unnamed_addr global ptr

%struct.ident_t = type { i32, i32, i32, i32, i8* }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
;.
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
;.
; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
;.
define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-NEXT: ret i32 0
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-DISABLED-NEXT: ret i32 0
;
ret i32 0
}
declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
declare void @__kmpc_target_deinit(ptr, i8)

define void @kernel() {
; CHECK-LABEL: define {{[^@]+}}@kernel() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 false)
; CHECK-NEXT: call void @foo() #[[ATTR0:[0-9]+]]
; CHECK-NEXT: call void @bar() #[[ATTR0]]
; CHECK-NEXT: call void @convert_and_move_alloca() #[[ATTR0]]
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1)
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull null, i8 1)
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false)
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 false)
; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR0:[0-9]+]]
; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR0]]
; CHECK-DISABLED-NEXT: call void @convert_and_move_alloca() #[[ATTR0]]
; CHECK-DISABLED-NEXT: call void @unknown_no_openmp()
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1)
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr nonnull null, i8 1)
; CHECK-DISABLED-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 true)
%0 = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 true)
call void @foo()
call void @bar()
call void @convert_and_move_alloca()
call void @unknown_no_openmp()
call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1)
call void @__kmpc_target_deinit(ptr nonnull null, i8 1)
ret void
}

Expand All @@ -80,87 +80,87 @@ define internal void @foo() {
; CHECK-DISABLED-NEXT: ret void
;
entry:
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12
call void @use(i8* %0)
call void @__kmpc_free_shared(i8* %0, i64 4)
%0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !12
call void @use(ptr %0)
call void @__kmpc_free_shared(ptr %0, i64 4)
ret void
}

define internal void @bar() {
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
; CHECK-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]]
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) #[[ATTR0]]
; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
; CHECK-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]]
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR0]]
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar
; CHECK-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
; CHECK-DISABLED-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]]
; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) #[[ATTR0]]
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]]
; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR0]]
; CHECK-DISABLED-NEXT: ret void
;
entry:
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !13
call void @share(i8* %0), !dbg !13
call void @__kmpc_free_shared(i8* %0, i64 4)
%0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !13
call void @share(ptr %0), !dbg !13
call void @__kmpc_free_shared(ptr %0, i64 4)
ret void
}

define internal void @use(i8* %x) {
define internal void @use(ptr %x) {
; CHECK-LABEL: define {{[^@]+}}@use
; CHECK-SAME: (i8* [[X:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-SAME: (ptr [[X:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@use
; CHECK-DISABLED-SAME: (i8* [[X:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-DISABLED-SAME: (ptr [[X:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: ret void
;
entry:
ret void
}

define internal void @share(i8* %x) {
define internal void @share(ptr %x) {
; CHECK-LABEL: define {{[^@]+}}@share
; CHECK-SAME: (i8* nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] {
; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: store i8* [[X]], i8** @S, align 8
; CHECK-NEXT: store ptr [[X]], ptr @S, align 8
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@share
; CHECK-DISABLED-SAME: (i8* nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] {
; CHECK-DISABLED-SAME: (ptr nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: store i8* [[X]], i8** @S, align 8
; CHECK-DISABLED-NEXT: store ptr [[X]], ptr @S, align 8
; CHECK-DISABLED-NEXT: ret void
;
entry:
store i8* %x, i8** @S
store ptr %x, ptr @S
ret void
}

define void @unused() {
; CHECK-LABEL: define {{[^@]+}}@unused() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1
; CHECK-NEXT: call void @use(i8* undef)
; CHECK-NEXT: call void @use(ptr undef)
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@unused() {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4), !dbg [[DBG11:![0-9]+]]
; CHECK-DISABLED-NEXT: call void @use(i8* [[TMP0]])
; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4)
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4), !dbg [[DBG11:![0-9]+]]
; CHECK-DISABLED-NEXT: call void @use(ptr [[TMP0]])
; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4)
; CHECK-DISABLED-NEXT: ret void
;
entry:
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !14
call void @use(i8* %0)
call void @__kmpc_free_shared(i8* %0, i64 4)
%0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !14
call void @use(ptr %0)
call void @__kmpc_free_shared(ptr %0, i64 4)
ret void
}

Expand All @@ -172,15 +172,15 @@ define internal void @convert_and_move_alloca() {
; CHECK-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
; CHECK-NEXT: br label [[INITLOOP:%.*]]
; CHECK: initloop:
; CHECK-NEXT: store i32 0, i32* [[IV_PTR]], align 4
; CHECK-NEXT: store i32 0, ptr [[IV_PTR]], align 4
; CHECK-NEXT: br label [[LOOPBODY:%.*]]
; CHECK: loopbody:
; CHECK-NEXT: [[IV:%.*]] = load i32, i32* [[IV_PTR]], align 4
; CHECK-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
; CHECK: loopinc:
; CHECK-NEXT: [[INC:%.*]] = add i32 [[IV]], 1
; CHECK-NEXT: store i32 [[INC]], i32* [[IV_PTR]], align 4
; CHECK-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4
; CHECK-NEXT: br label [[LOOPBODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
Expand All @@ -192,52 +192,52 @@ define internal void @convert_and_move_alloca() {
; CHECK-DISABLED-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
; CHECK-DISABLED-NEXT: br label [[INITLOOP:%.*]]
; CHECK-DISABLED: initloop:
; CHECK-DISABLED-NEXT: store i32 0, i32* [[IV_PTR]], align 4
; CHECK-DISABLED-NEXT: store i32 0, ptr [[IV_PTR]], align 4
; CHECK-DISABLED-NEXT: br label [[LOOPBODY:%.*]]
; CHECK-DISABLED: loopbody:
; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, i32* [[IV_PTR]], align 4
; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
; CHECK-DISABLED-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
; CHECK-DISABLED: loopinc:
; CHECK-DISABLED-NEXT: [[INC:%.*]] = add i32 [[IV]], 1
; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[IV_PTR]], align 4
; CHECK-DISABLED-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4
; CHECK-DISABLED-NEXT: br label [[LOOPBODY]]
; CHECK-DISABLED: exit:
; CHECK-DISABLED-NEXT: ret void
;
entry:
%iv_ptr = alloca i32, align 4
%ub_ptr = alloca i32, align 4
store i32 10, i32* %ub_ptr
store i32 10, ptr %ub_ptr
br label %initloop

initloop:
store i32 0, i32* %iv_ptr
%ub = load i32, i32* %ub_ptr
store i32 0, ptr %iv_ptr
%ub = load i32, ptr %ub_ptr
br label %loopbody

loopbody:
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !16
call void @use(i8* %0)
call void @__kmpc_free_shared(i8* %0, i64 4)
%iv = load i32, i32* %iv_ptr
%0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !16
call void @use(ptr %0)
call void @__kmpc_free_shared(ptr %0, i64 4)
%iv = load i32, ptr %iv_ptr
%1 = icmp eq i32 %iv, %ub
br i1 %1, label %exit, label %loopinc

loopinc:
%inc = add i32 %iv, 1
store i32 %inc, i32* %iv_ptr
store i32 %inc, ptr %iv_ptr
br label %loopbody

exit:
ret void
}

; CHECK: declare i8* @__kmpc_alloc_shared(i64)
declare i8* @__kmpc_alloc_shared(i64)
; CHECK: declare ptr @__kmpc_alloc_shared(i64)
declare ptr @__kmpc_alloc_shared(i64)

; CHECK: declare void @__kmpc_free_shared(i8* allocptr nocapture, i64)
declare void @__kmpc_free_shared(i8*, i64)
; CHECK: declare void @__kmpc_free_shared(ptr allocptr nocapture, i64)
declare void @__kmpc_free_shared(ptr, i64)

declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"

Expand All @@ -250,7 +250,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
!2 = !{}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{void ()* @kernel, !"kernel", i32 1}
!5 = !{ptr @kernel, !"kernel", i32 1}
!6 = !{i32 7, !"openmp", i32 50}
!7 = !{i32 7, !"openmp-device", i32 50}
!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
Expand Down Expand Up @@ -284,7 +284,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK: [[META7:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1}
; CHECK: [[META7:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
; CHECK: [[DBG8]] = !DILocation(line: 4, column: 2, scope: !9)
; CHECK: [[META9:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
; CHECK: [[META10:![0-9]+]] = !DISubroutineType(types: !2)
Expand All @@ -296,7 +296,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK-DISABLED: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK-DISABLED: [[META7:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1}
; CHECK-DISABLED: [[META7:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
; CHECK-DISABLED: [[DBG8]] = !DILocation(line: 4, column: 2, scope: !9)
; CHECK-DISABLED: [[META9:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
; CHECK-DISABLED: [[META10:![0-9]+]] = !DISubroutineType(types: !2)
Expand Down
131 changes: 61 additions & 70 deletions llvm/test/Transforms/OpenMP/replace_globalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,91 +13,82 @@ target triple = "nvptx64"
; CHECK-LIMIT: remark: replace_globalization.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization
; UTC_ARGS: --enable

%struct.ident_t = type { i32, i32, i32, i32, i8* }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@S = external local_unnamed_addr global i8*
@S = external local_unnamed_addr global ptr
@0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @0, i32 0, i32 0) }, align 8
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@foo_exec_mode = weak constant i8 1
@bar_exec_mode = weak constant i8 1
@baz_spmd_exec_mode = weak constant i8 2


define dso_local void @foo() "kernel" {
entry:
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%x = call align 4 i8* @__kmpc_alloc_shared(i64 4)
%c = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
call void @unknown_no_openmp()
%x_on_stack = bitcast i8* %x to i32*
%0 = bitcast i32* %x_on_stack to i8*
call void @use(i8* %0)
call void @__kmpc_free_shared(i8* %x, i64 4)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
call void @use(ptr %x)
call void @__kmpc_free_shared(ptr %x, i64 4)
call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
}

define void @bar() "kernel" {
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%c = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
call void @unknown_no_openmp()
%cmp = icmp eq i32 %c, -1
br i1 %cmp, label %master1, label %exit
master1:
%x = call align 4 i8* @__kmpc_alloc_shared(i64 16), !dbg !11
%x_on_stack = bitcast i8* %x to [4 x i32]*
%a0 = bitcast [4 x i32]* %x_on_stack to i8*
call void @use(i8* %a0)
call void @__kmpc_free_shared(i8* %x, i64 16)
%x = call align 4 ptr @__kmpc_alloc_shared(i64 16), !dbg !11
call void @use(ptr %x)
call void @__kmpc_free_shared(ptr %x, i64 16)
br label %next
next:
call void @unknown_no_openmp()
%b0 = icmp eq i32 %c, -1
br i1 %b0, label %master2, label %exit
master2:
%y = call align 4 i8* @__kmpc_alloc_shared(i64 4), !dbg !12
%y_on_stack = bitcast i8* %y to [4 x i32]*
%b1 = bitcast [4 x i32]* %y_on_stack to i8*
call void @use(i8* %b1)
call void @__kmpc_free_shared(i8* %y, i64 4)
%y = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg !12
call void @use(ptr %y)
call void @__kmpc_free_shared(ptr %y, i64 4)
br label %exit
exit:
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
}

define void @baz_spmd() "kernel" {
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 true)
%c = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 true)
call void @unknown_no_openmp()
%c0 = icmp eq i32 %c, -1
br i1 %c0, label %master3, label %exit
master3:
%z = call align 4 i8* @__kmpc_alloc_shared(i64 24), !dbg !12
%z_on_stack = bitcast i8* %z to [6 x i32]*
%c1 = bitcast [6 x i32]* %z_on_stack to i8*
call void @use(i8* %c1)
call void @__kmpc_free_shared(i8* %z, i64 24)
%z = call align 4 ptr @__kmpc_alloc_shared(i64 24), !dbg !12
call void @use(ptr %z)
call void @__kmpc_free_shared(ptr %z, i64 24)
br label %exit
exit:
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2)
call void @__kmpc_target_deinit(ptr @1, i8 2)
ret void
}

define void @use(i8* %x) {
define void @use(ptr %x) {
entry:
store i8* %x, i8** @S
store ptr %x, ptr @S
ret void
}

@offset =global i32 undef
@stack = internal addrspace(3) global [1024 x i8] undef
define private i8* @__kmpc_alloc_shared(i64) {
%bc = bitcast [1024 x i8] addrspace(3) * @stack to i8 addrspace(3) *
%ac = addrspacecast i8 addrspace(3) * %bc to i8*
%l = load i32, i32* @offset
%gep = getelementptr i8, i8* %ac, i32 %l
ret i8* %gep
define private ptr @__kmpc_alloc_shared(i64) {
%ac = addrspacecast ptr addrspace(3) @stack to ptr
%l = load i32, ptr @offset
%gep = getelementptr i8, ptr %ac, i32 %l
ret ptr %gep
}

declare void @__kmpc_free_shared(i8*, i64)
declare void @__kmpc_free_shared(ptr, i64)

declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()

Expand All @@ -106,11 +97,11 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()

; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
ret i32 0
}

declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
declare void @__kmpc_target_deinit(ptr, i8)

declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"

Expand All @@ -125,17 +116,17 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"openmp", i32 50}
!6 = !{i32 7, !"openmp-device", i32 50}
!7 = !{void ()* @foo, !"kernel", i32 1}
!8 = !{void ()* @bar, !"kernel", i32 1}
!13 = !{void ()* @baz_spmd, !"kernel", i32 1}
!7 = !{ptr @foo, !"kernel", i32 1}
!8 = !{ptr @bar, !"kernel", i32 1}
!13 = !{ptr @baz_spmd, !"kernel", i32 1}
!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!10 = !DISubroutineType(types: !2)
!11 = !DILocation(line: 5, column: 7, scope: !9)
!12 = !DILocation(line: 5, column: 14, scope: !9)
;.
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[FOO_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[BAR_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[BAZ_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
Expand All @@ -147,77 +138,77 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-LABEL: define {{[^@]+}}@foo
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: call void @use.internalized(i8* nofree [[X]]) #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; CHECK-NEXT: call void @use.internalized(ptr nofree [[X]]) #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]]
; CHECK: master1:
; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*)) #[[ATTR6]]
; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @x_shared to ptr)) #[[ATTR6]]
; CHECK-NEXT: br label [[NEXT:%.*]]
; CHECK: next:
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[B0:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[B0]], label [[MASTER2:%.*]], label [[EXIT]]
; CHECK: master2:
; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR6]]
; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR6]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@baz_spmd
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 true)
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]]
; CHECK: master3:
; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]]
; CHECK-NEXT: call void @use.internalized(i8* nofree [[Z]]) #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR6]]
; CHECK-NEXT: [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]]
; CHECK-NEXT: call void @use.internalized(ptr nofree [[Z]]) #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR6]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
;
;
; CHECK: Function Attrs: nofree norecurse nounwind memory(write)
; CHECK-LABEL: define {{[^@]+}}@use.internalized
; CHECK-SAME: (i8* nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: store i8* [[X]], i8** @S, align 8
; CHECK-NEXT: store ptr [[X]], ptr @S, align 8
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@use
; CHECK-SAME: (i8* [[X:%.*]]) {
; CHECK-SAME: (ptr [[X:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: store i8* [[X]], i8** @S, align 8
; CHECK-NEXT: store ptr [[X]], ptr @S, align 8
; CHECK-NEXT: ret void
;
;
; CHECK: Function Attrs: nosync nounwind allocsize(0) memory(read)
; CHECK-LABEL: define {{[^@]+}}@__kmpc_alloc_shared
; CHECK-SAME: (i64 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: [[L:%.*]] = load i32, i32* @offset, align 4
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([1024 x i8], [1024 x i8] addrspace(3)* @stack, i32 0, i32 0) to i8*), i32 [[L]]
; CHECK-NEXT: ret i8* [[GEP]]
; CHECK-NEXT: [[L:%.*]] = load i32, ptr @offset, align 4
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspacecast (ptr addrspace(3) @stack to ptr), i32 [[L]]
; CHECK-NEXT: ret ptr [[GEP]]
;
;
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-NEXT: ret i32 0
;
;.
Expand All @@ -236,9 +227,9 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK: [[META7:![0-9]+]] = !{void ()* @foo, !"kernel", i32 1}
; CHECK: [[META8:![0-9]+]] = !{void ()* @bar, !"kernel", i32 1}
; CHECK: [[META9:![0-9]+]] = !{void ()* @baz_spmd, !"kernel", i32 1}
; CHECK: [[META7:![0-9]+]] = !{ptr @foo, !"kernel", i32 1}
; CHECK: [[META8:![0-9]+]] = !{ptr @bar, !"kernel", i32 1}
; CHECK: [[META9:![0-9]+]] = !{ptr @baz_spmd, !"kernel", i32 1}
; CHECK: [[DBG10]] = !DILocation(line: 5, column: 14, scope: !11)
; CHECK: [[META11:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !12, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
; CHECK: [[META12:![0-9]+]] = !DISubroutineType(types: !2)
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/Transforms/OpenMP/rtf_type_checking.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,45 @@

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"

%struct.ident_t = type { i32, i32, i32, i32, i8* }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 322, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8
@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 322, i32 0, i32 0, ptr @.str }, align 8

define i32 @main() {
entry:

call void (%struct.ident_t*, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
call void (ptr, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, ptr @.omp_outlined.)
ret i32 0
}

; Only the last runtime call will be matched due that the rest of the "runtime function" calls
; have some type mismatch compared to the real runtime function. See the check at bottom.
define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
define internal void @.omp_outlined.(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
entry:

call void @__kmpc_master(%struct.ident_t* nonnull @0)
call void @__kmpc_end_master(%struct.ident_t* nonnull @0, i32 0, i32 0)
call void @__kmpc_barrier(%struct.ident_t* nonnull @1, float 0.0)
call void @__kmpc_master(ptr nonnull @0)
call void @__kmpc_end_master(ptr nonnull @0, i32 0, i32 0)
call void @__kmpc_barrier(ptr nonnull @1, float 0.0)
call void @omp_get_thread_num()
call void @__kmpc_flush(%struct.ident_t* nonnull @0)
call void @__kmpc_flush(ptr nonnull @0)
ret void
}
; Fewer arguments than expected in variadic function.
declare !callback !2 void @__kmpc_fork_call(%struct.ident_t*, void (i32*, i32*, ...)*, ...)
declare !callback !2 void @__kmpc_fork_call(ptr, ptr, ...)

; Fewer number of arguments in non variadic function.
declare void @__kmpc_master(%struct.ident_t*)
declare void @__kmpc_master(ptr)

; Bigger number of arguments in non variadic function.
declare void @__kmpc_end_master(%struct.ident_t*, i32, i32)
declare void @__kmpc_end_master(ptr, i32, i32)

; Different argument type than the expected.
declare void @__kmpc_barrier(%struct.ident_t*, float)
declare void @__kmpc_barrier(ptr, float)

; Proper use of runtime function.
declare void @__kmpc_flush(%struct.ident_t*)
declare void @__kmpc_flush(ptr)

; Different return type.
declare void @omp_get_thread_num()
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/OpenMP/single_threaded_execution.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
; REQUIRES: asserts
; ModuleID = 'single_threaded_exeuction.c'

%struct.ident_t = type { i32, i32, i32, i32, i8* }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@0 = private unnamed_addr constant [1 x i8] c"\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @0, i32 0, i32 0) }, align 8
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@kernel_exec_mode = weak constant i8 1


Expand All @@ -15,15 +15,15 @@
; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread.
; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread.
define void @kernel() {
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false)
%call = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 false)
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
if.then:
br label %if.end
if.else:
br label %if.end
if.end:
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}

Expand Down Expand Up @@ -104,11 +104,11 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()

declare i32 @llvm.amdgcn.workitem.id.x()

declare void @__kmpc_kernel_prepare_parallel(i8*)
declare void @__kmpc_kernel_prepare_parallel(ptr)

declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1)
declare i32 @__kmpc_target_init(ptr, i8, i1)

declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
declare void @__kmpc_target_deinit(ptr, i8)

attributes #0 = { cold noinline }

Expand All @@ -123,7 +123,7 @@ attributes #0 = { cold noinline }
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"openmp", i32 50}
!6 = !{i32 7, !"openmp-device", i32 50}
!7 = !{void ()* @kernel, !"kernel", i32 1}
!7 = !{ptr @kernel, !"kernel", i32 1}
!8 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 8, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!9 = distinct !DISubprogram(name: "cold", scope: !1, file: !1, line: 8, type: !10, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!10 = !DISubroutineType(types: !2)
1,333 changes: 634 additions & 699 deletions llvm/test/Transforms/OpenMP/spmdization.ll

Large diffs are not rendered by default.

70 changes: 34 additions & 36 deletions llvm/test/Transforms/OpenMP/spmdization_assumes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,78 +12,76 @@

target triple = "nvptx64"

%struct.ident_t = type { i32, i32, i32, i32, i8* }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@__omp_offloading_fd02_404433c2_main_l5_exec_mode = weak constant i8 1
@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"

; Function Attrs: alwaysinline convergent norecurse nounwind
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
;.
define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 {
define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5
; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: user_code.entry:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @[[GLOB1]]) #[[ATTR3]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR3]]
; CHECK-NEXT: [[CALL_I:%.*]] = call double @__nv_sin(double 0x400921FB54442D18) #[[ATTR7:[0-9]+]]
; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]]
; CHECK: region.check.tid:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store double [[CALL_I]], double* [[X]], align 8, !tbaa [[TBAA8:![0-9]+]]
; CHECK-NEXT: store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[TBAA8:![0-9]+]]
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
; CHECK: region.guarded.end:
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]])
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP2]])
; CHECK-NEXT: br label [[REGION_EXIT:%.*]]
; CHECK: region.exit:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull [[TMP4]], i64 0) #[[ATTR3]]
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR3]]
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 0) #[[ATTR3]]
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR3]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3
%captured_vars_addrs = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

common.ret: ; preds = %entry, %user_code.entry
ret void

user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1)
%1 = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
%call.i = call double @__nv_sin(double 0x400921FB54442D18) #6
store double %call.i, double* %x, align 8, !tbaa !8
%2 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0
call void @__kmpc_parallel_51(%struct.ident_t* nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull %2, i64 0) #3
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1) #3
store double %call.i, ptr %x, align 8, !tbaa !8
call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3
call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #3
br label %common.ret
}

declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) local_unnamed_addr
declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr

; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn
define internal void @__omp_outlined__(i32* noalias nocapture %.global_tid., i32* noalias nocapture %.bound_tid.) #1 {
define internal void @__omp_outlined__(ptr noalias nocapture %.global_tid., ptr noalias nocapture %.bound_tid.) #1 {
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
; CHECK-SAME: (i32* noalias nocapture [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-SAME: (ptr noalias nocapture [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: ret void
;
Expand All @@ -96,25 +94,25 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 {
; CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** nonnull [[GLOBAL_ARGS]]) #[[ATTR3]]
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]]) #[[ATTR3]]
; CHECK-NEXT: ret void
;
entry:
%global_args = alloca i8**, align 8
call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3
%global_args = alloca ptr, align 8
call void @__kmpc_get_shared_variables(ptr nonnull %global_args) #3
ret void
}

declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr
declare void @__kmpc_get_shared_variables(ptr) local_unnamed_addr

; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3

; Function Attrs: alwaysinline
declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr #4
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr #4

declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr
declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr

; Function Attrs: convergent
declare double @__nv_sin(double) local_unnamed_addr #5
Expand All @@ -133,7 +131,7 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
!llvm.ident = !{!7}

!0 = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0}
!1 = !{void (double*)* @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
!1 = !{ptr @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
!2 = !{i32 1, !"wchar_size", i32 4}
!3 = !{i32 7, !"openmp", i32 50}
!4 = !{i32 7, !"openmp-device", i32 50}
Expand All @@ -155,7 +153,7 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
; CHECK: attributes #[[ATTR7]] = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0}
; CHECK: [[META1:![0-9]+]] = !{void (double*)* @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
Expand Down
226 changes: 113 additions & 113 deletions llvm/test/Transforms/OpenMP/spmdization_guarding.ll

Large diffs are not rendered by default.

Large diffs are not rendered by default.

101 changes: 48 additions & 53 deletions llvm/test/Transforms/OpenMP/spmdization_remarks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,53 +37,51 @@ target triple = "nvptx64"
;; }
;; }

%struct.ident_t = type { i32, i32, i32, i32, i8* }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@0 = private unnamed_addr constant [103 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @0, i32 0, i32 0) }, align 8
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@2 = private unnamed_addr constant [72 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_fallback;11;1;;\00", align 1
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([72 x i8], [72 x i8]* @2, i32 0, i32 0) }, align 8
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8
@4 = private unnamed_addr constant [104 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([104 x i8], [104 x i8]* @4, i32 0, i32 0) }, align 8
@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8
@__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1
@6 = private unnamed_addr constant [106 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([106 x i8], [106 x i8]* @6, i32 0, i32 0) }, align 8
@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8
@8 = private unnamed_addr constant [75 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_no_fallback;20;1;;\00", align 1
@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([75 x i8], [75 x i8]* @8, i32 0, i32 0) }, align 8
@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8
@10 = private unnamed_addr constant [107 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([107 x i8], [107 x i8]* @10, i32 0, i32 0) }, align 8
@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8
@__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1
@12 = private unnamed_addr constant [63 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;known;4;1;;\00", align 1
@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([63 x i8], [63 x i8]* @12, i32 0, i32 0) }, align 8
@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8
@G = external global i32
@llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, i8* @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"

; Function Attrs: convergent norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3, !dbg !18
%captured_vars_addrs.i.i = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18
%exec_user_code = icmp eq i32 %0, -1, !dbg !18
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18

common.ret: ; preds = %entry, %user_code.entry
ret void, !dbg !19

user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @3) #3
%1 = call i32 @__kmpc_global_thread_num(ptr nonnull @3) #3
call void @unknown() #6, !dbg !20
%2 = bitcast [0 x i8*]* %captured_vars_addrs.i.i to i8*
call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
%3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
%4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i.i, i64 0, i64 0, !dbg !23
call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26
call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3
%2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26
call void @unknown() #6, !dbg !27
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1) #3, !dbg !28
call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28
br label %common.ret
}

define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
ret i32 0
}

Expand All @@ -94,52 +92,49 @@ declare void @unknown() local_unnamed_addr #1
; Function Attrs: nounwind
define hidden void @known() local_unnamed_addr #2 !dbg !29 {
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @13)
%1 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0, !dbg !30
call void @__kmpc_parallel_51(%struct.ident_t* nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** nonnull %1, i64 0) #3, !dbg !30
%captured_vars_addrs = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_global_thread_num(ptr nonnull @13)
call void @__kmpc_parallel_51(ptr nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3, !dbg !30
ret void, !dbg !31
}

; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3

declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr
declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr

; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true) #3, !dbg !33
%captured_vars_addrs.i2.i = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33
%exec_user_code = icmp eq i32 %0, -1, !dbg !33
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33

common.ret: ; preds = %entry, %user_code.entry
ret void, !dbg !34

user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @9) #3
%2 = bitcast [0 x i8*]* %captured_vars_addrs.i2.i to i8*
call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
%3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
%4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i2.i, i64 0, i64 0, !dbg !35
call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !35
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !39
call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
%5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %5, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !40
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !42
call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
%6 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45
%1 = call i32 @__kmpc_global_thread_num(ptr nonnull @9) #3
call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3
%2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !35
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !39
call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3
%3 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !40
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !42
call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3
%4 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %4, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !43
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45
call void @spmd_amenable()
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1) #3, !dbg !46
call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46
br label %common.ret
}

; Function Attrs: convergent norecurse nounwind
define internal void @__omp_outlined__2(i32* noalias nocapture nofree readnone %.global_tid., i32* noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 {
define internal void @__omp_outlined__2(ptr noalias nocapture nofree readnone %.global_tid., ptr noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 {
entry:
call void @unknown() #6, !dbg !48
ret void, !dbg !49
Expand All @@ -148,21 +143,21 @@ entry:
; Function Attrs: convergent norecurse nounwind
define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 !dbg !50 {
entry:
%global_args = alloca i8**, align 8
call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3, !dbg !51
%global_args = alloca ptr, align 8
call void @__kmpc_get_shared_variables(ptr nonnull %global_args) #3, !dbg !51
call void @unknown() #6, !dbg !52
ret void, !dbg !51
}

declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr
declare void @__kmpc_get_shared_variables(ptr) local_unnamed_addr

declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr

; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #5

; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #5

declare void @spmd_amenable() #7

Expand All @@ -186,8 +181,8 @@ attributes #7 = { "llvm.assume"="ompx_spmd_amenable" }
!2 = !{}
!3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1}
!4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0}
!5 = !{void ()* @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1}
!6 = !{void ()* @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1}
!5 = !{ptr @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1}
!6 = !{ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1}
!7 = !{i32 7, !"Dwarf Version", i32 2}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{i32 1, !"wchar_size", i32 4}
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

target triple = "amdgcn-amd-amdhsa"

%struct.ident_t = type { i32, i32, i32, i32, i8* }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@G = internal addrspace(3) global i32 undef, align 4

Expand All @@ -16,84 +16,84 @@ define void @kernel() "kernel" {
; TUNIT: Function Attrs: norecurse
; TUNIT-LABEL: define {{[^@]+}}@kernel
; TUNIT-SAME: () #[[ATTR0:[0-9]+]] {
; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; TUNIT: if.then:
; TUNIT-NEXT: store i32 1, i32 addrspace(3)* @G, align 4
; TUNIT-NEXT: store i32 1, ptr addrspace(3) @G, align 4
; TUNIT-NEXT: br label [[IF_MERGE:%.*]]
; TUNIT: if.else:
; TUNIT-NEXT: call void @barrier() #[[ATTR4:[0-9]+]]
; TUNIT-NEXT: [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4
; TUNIT-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4
; TUNIT-NEXT: call void @use1(i32 [[L]]) #[[ATTR4]]
; TUNIT-NEXT: br label [[IF_MERGE]]
; TUNIT: if.merge:
; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
; TUNIT: if.then2:
; TUNIT-NEXT: store i32 2, i32 addrspace(3)* @G, align 4
; TUNIT-NEXT: store i32 2, ptr addrspace(3) @G, align 4
; TUNIT-NEXT: call void @barrier() #[[ATTR4]]
; TUNIT-NEXT: br label [[IF_END]]
; TUNIT: if.end:
; TUNIT-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
; TUNIT-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1)
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: norecurse
; CGSCC-LABEL: define {{[^@]+}}@kernel
; CGSCC-SAME: () #[[ATTR0:[0-9]+]] {
; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CGSCC: if.then:
; CGSCC-NEXT: store i32 1, i32 addrspace(3)* @G, align 4
; CGSCC-NEXT: store i32 1, ptr addrspace(3) @G, align 4
; CGSCC-NEXT: br label [[IF_MERGE:%.*]]
; CGSCC: if.else:
; CGSCC-NEXT: call void @barrier()
; CGSCC-NEXT: [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4
; CGSCC-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4
; CGSCC-NEXT: call void @use1(i32 [[L]])
; CGSCC-NEXT: br label [[IF_MERGE]]
; CGSCC: if.merge:
; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
; CGSCC: if.then2:
; CGSCC-NEXT: store i32 2, i32 addrspace(3)* @G, align 4
; CGSCC-NEXT: store i32 2, ptr addrspace(3) @G, align 4
; CGSCC-NEXT: call void @barrier()
; CGSCC-NEXT: br label [[IF_END]]
; CGSCC: if.end:
; CGSCC-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
; CGSCC-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1)
; CGSCC-NEXT: ret void
;
%call = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
%call = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
if.then:
store i32 1, i32 addrspace(3)* @G
store i32 1, ptr addrspace(3) @G
br label %if.merge
if.else:
call void @barrier();
%l = load i32, i32 addrspace(3)* @G
%l = load i32, ptr addrspace(3) @G
call void @use1(i32 %l)
br label %if.merge
if.merge:
br i1 %cmp, label %if.then2, label %if.end
if.then2:
store i32 2, i32 addrspace(3)* @G
store i32 2, ptr addrspace(3) @G
call void @barrier();
br label %if.end
if.end:
call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
call void @__kmpc_target_deinit(ptr undef, i8 1)
ret void
}

declare void @barrier() norecurse nounwind nocallback
declare void @use1(i32) nosync norecurse nounwind nocallback
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) nocallback
declare void @__kmpc_target_deinit(%struct.ident_t*, i8) nocallback
declare i32 @__kmpc_target_init(ptr, i8, i1) nocallback
declare void @__kmpc_target_deinit(ptr, i8) nocallback

!llvm.module.flags = !{!0, !1}
!nvvm.annotations = !{!2}

!0 = !{i32 7, !"openmp", i32 50}
!1 = !{i32 7, !"openmp-device", i32 50}
!2 = !{void ()* @kernel, !"kernel", i32 1}
!2 = !{ptr @kernel, !"kernel", i32 1}

;.
; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
Expand All @@ -109,7 +109,7 @@ declare void @__kmpc_target_deinit(%struct.ident_t*, i8) nocallback
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1}
; CHECK: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
55 changes: 24 additions & 31 deletions llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,68 +6,61 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
@.__omp_offloading_heavyComputation.region_id = weak constant i8 0
@.offload_maptypes. = private unnamed_addr constant [2 x i64] [i64 35, i64 35]

%struct.ident_t = type { i32, i32, i32, i32, i8* }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8

; CHECK-LABEL: {{[^@]+}}Successfully got offload values:
; CHECK-NEXT: offload_baseptrs: double* %a --- %size.addr = alloca i32, align 4 ---
; CHECK-NEXT: offload_ptrs: double* %a --- %size.addr = alloca i32, align 4 ---
; CHECK-NEXT: offload_baseptrs: ptr %a --- %size.addr = alloca i32, align 4 ---
; CHECK-NEXT: offload_ptrs: ptr %a --- %size.addr = alloca i32, align 4 ---
; CHECK-NEXT: offload_sizes: %0 = shl nuw nsw i64 %conv, 3 --- i64 4 ---

;int heavyComputation(double* a, unsigned size) {
;int heavyComputation(ptr a, unsigned size) {
; int random = rand() % 7;
;
; //#pragma omp target data map(a[0:size], size)
; void* args[2];
; ptr args[2];
; args[0] = &a;
; args[1] = &size;
; __tgt_target_data_begin(..., args, ...)
;
; #pragma omp target teams
; for (int i = 0; i < size; ++i) {
; a[i] = ++a[i] * 3.141624;
; a[i] = ++aptr 3.141624;
; }
;
; return random;
;}
define dso_local i32 @heavyComputation(double* %a, i32 %size) {
define dso_local i32 @heavyComputation(ptr %a, i32 %size) {
entry:
%size.addr = alloca i32, align 4
%.offload_baseptrs = alloca [2 x i8*], align 8
%.offload_ptrs = alloca [2 x i8*], align 8
%.offload_baseptrs = alloca [2 x ptr], align 8
%.offload_ptrs = alloca [2 x ptr], align 8
%.offload_sizes = alloca [2 x i64], align 8

store i32 %size, i32* %size.addr, align 4
store i32 %size, ptr %size.addr, align 4
%call = tail call i32 (...) @rand()

%conv = zext i32 %size to i64
%0 = shl nuw nsw i64 %conv, 3
%1 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 0
%2 = bitcast [2 x i8*]* %.offload_baseptrs to double**
store double* %a, double** %2, align 8
%3 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 0
%4 = bitcast [2 x i8*]* %.offload_ptrs to double**
store double* %a, double** %4, align 8
%5 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 0
store i64 %0, i64* %5, align 8
%6 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 1
%7 = bitcast i8** %6 to i32**
store i32* %size.addr, i32** %7, align 8
%8 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 1
%9 = bitcast i8** %8 to i32**
store i32* %size.addr, i32** %9, align 8
%10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1
store i64 4, i64* %10, align 8
call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes., i64 0, i64 0), i8** null, i8** null)
store ptr %a, ptr %.offload_baseptrs, align 8
store ptr %a, ptr %.offload_ptrs, align 8
store i64 %0, ptr %.offload_sizes, align 8
%1 = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i64 0, i64 1
store ptr %size.addr, ptr %1, align 8
%2 = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i64 0, i64 1
store ptr %size.addr, ptr %2, align 8
%3 = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i64 0, i64 1
store i64 4, ptr %3, align 8
call void @__tgt_target_data_begin_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes., ptr null, ptr null)
%rem = srem i32 %call, 7
call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes., i64 0, i64 0), i8** null, i8** null)
call void @__tgt_target_data_end_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes., ptr null, ptr null)
ret i32 %rem
}

declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
declare void @__tgt_target_data_begin_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
declare void @__tgt_target_data_end_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)

declare dso_local i32 @rand(...)

Expand Down