diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 94e97a891baed..8b2f5d2746f7f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11368,6 +11368,8 @@ def err_openmp_vla_in_task_untied : Error< def warn_omp_unterminated_declare_target : Warning< "expected '#pragma omp end declare target' at end of file to match '#pragma omp %0'">, InGroup; +def err_ompx_bare_no_grid : Error< + "'ompx_bare' clauses requires explicit grid size via 'num_teams' and 'thread_limit' clauses">; } // end of OpenMP category let CategoryName = "Related Result Type Issue" in { diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index e400f248d15aa..3826994ef2126 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14658,6 +14658,19 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDirective(ArrayRef Clauses, } setFunctionHasBranchProtectedScope(); + const OMPClause *BareClause = nullptr; + bool HasThreadLimitAndNumTeamsClause = hasClauses(Clauses, OMPC_num_teams) && + hasClauses(Clauses, OMPC_thread_limit); + bool HasBareClause = llvm::any_of(Clauses, [&](const OMPClause *C) { + BareClause = C; + return C->getClauseKind() == OMPC_ompx_bare; + }); + + if (HasBareClause && !HasThreadLimitAndNumTeamsClause) { + Diag(BareClause->getBeginLoc(), diag::err_ompx_bare_no_grid); + return StmtError(); + } + return OMPTargetTeamsDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt); } diff --git a/clang/test/OpenMP/nvptx_target_teams_ompx_bare_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_ompx_bare_codegen.cpp index 9f8046acb0970..2e6f0a9ce0169 100644 --- a/clang/test/OpenMP/nvptx_target_teams_ompx_bare_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_ompx_bare_codegen.cpp @@ -10,7 +10,7 @@ template tx ftemplate(int n) { tx a = 0; - #pragma omp target teams ompx_bare + #pragma omp target teams ompx_bare num_teams(1) thread_limit(32) { a = 2; } diff --git a/clang/test/OpenMP/ompx_bare_messages.c b/clang/test/OpenMP/ompx_bare_messages.c index a1b3c38028528..19ceee5625fee 100644 --- a/clang/test/OpenMP/ompx_bare_messages.c +++ b/clang/test/OpenMP/ompx_bare_messages.c @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-unknown %s - // RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-unknown %s - // RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-unknown %s +// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64 %s void foo() { } @@ -18,4 +18,7 @@ void bar() { #pragma omp target #pragma omp teams ompx_bare // expected-error {{unexpected OpenMP clause 'ompx_bare' in directive '#pragma omp teams'}} expected-note {{OpenMP extension clause 'ompx_bare' only allowed with '#pragma omp target teams'}} foo(); + +#pragma omp target teams ompx_bare // expected-error {{'ompx_bare' clauses requires explicit grid size via 'num_teams' and 'thread_limit' clauses}} + foo(); } diff --git a/clang/test/OpenMP/target_teams_ast_print.cpp b/clang/test/OpenMP/target_teams_ast_print.cpp index 5f1040be01a25..8eaf4cbf24933 100644 --- a/clang/test/OpenMP/target_teams_ast_print.cpp +++ b/clang/test/OpenMP/target_teams_ast_print.cpp @@ -111,8 +111,8 @@ int main (int argc, char **argv) { // CHECK-NEXT: #pragma omp target teams a=2; // CHECK-NEXT: a = 2; -#pragma omp target teams ompx_bare -// CHECK-NEXT: #pragma omp target teams ompx_bare +#pragma omp target teams ompx_bare num_teams(1) thread_limit(32) +// CHECK-NEXT: #pragma omp target teams ompx_bare num_teams(1) thread_limit(32) a=3; // CHECK-NEXT: a = 3; #pragma omp target teams default(none), private(argc,b) num_teams(f) firstprivate(argv) reduction(| : c, d) reduction(* : e) thread_limit(f+g) diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp index b7c7add229c14..8790a0fc87cbb 100644 --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -121,7 +121,7 @@ int foo(int n) { aa += 1; } - #pragma omp target teams ompx_bare + #pragma omp target teams ompx_bare num_teams(1) thread_limit(1) { a += 1; aa += 1; @@ -588,12 +588,12 @@ int bar(int n){ // CHECK1-NEXT: [[TMP116:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 9 // CHECK1-NEXT: store i64 0, ptr [[TMP116]], align 8 // CHECK1-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP117]], align 4 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP117]], align 4 // CHECK1-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP118]], align 4 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP118]], align 4 // CHECK1-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP119]], align 4 -// CHECK1-NEXT: [[TMP120:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.region_id, ptr [[KERNEL_ARGS21]]) +// CHECK1-NEXT: [[TMP120:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.region_id, ptr [[KERNEL_ARGS21]]) // CHECK1-NEXT: [[TMP121:%.*]] = icmp ne i32 [[TMP120]], 0 // CHECK1-NEXT: br i1 [[TMP121]], label [[OMP_OFFLOAD_FAILED22:%.*]], label [[OMP_OFFLOAD_CONT23:%.*]] // CHECK1: omp_offload.failed22: @@ -899,64 +899,64 @@ int bar(int n){ // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !25 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !25 -// CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !25 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !25 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !25 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !25 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !25 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !25 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META25:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META25]] // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !25 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !25 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !25 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !25 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META25]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias [[META25]] // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK1-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 -// CHECK1-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !25 +// CHECK1-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META25]] // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP22]], align 4, !noalias !25 +// CHECK1-NEXT: store i32 3, ptr [[TMP22]], align 4, !noalias [[META25]] // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP23]], align 8, !noalias !25 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP23]], align 8, !noalias [[META25]] // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP24]], align 8, !noalias !25 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP24]], align 8, !noalias [[META25]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP25]], align 8, !noalias !25 +// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP25]], align 8, !noalias [[META25]] // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP26]], align 8, !noalias !25 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP26]], align 8, !noalias [[META25]] // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8, !noalias !25 +// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8, !noalias [[META25]] // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8, !noalias !25 +// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8, !noalias [[META25]] // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP29]], align 8, !noalias !25 +// CHECK1-NEXT: store i64 0, ptr [[TMP29]], align 8, !noalias [[META25]] // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 9 -// CHECK1-NEXT: store i64 1, ptr [[TMP30]], align 8, !noalias !25 +// CHECK1-NEXT: store i64 1, ptr [[TMP30]], align 8, !noalias [[META25]] // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [[TMP20]], ptr [[TMP31]], align 4, !noalias !25 +// CHECK1-NEXT: store [3 x i32] [[TMP20]], ptr [[TMP31]], align 4, !noalias [[META25]] // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] [[TMP21]], ptr [[TMP32]], align 4, !noalias !25 +// CHECK1-NEXT: store [3 x i32] [[TMP21]], ptr [[TMP32]], align 4, !noalias [[META25]] // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP33]], align 4, !noalias !25 +// CHECK1-NEXT: store i32 0, ptr [[TMP33]], align 4, !noalias [[META25]] // CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 [[TMP18]], i32 [[TMP19]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.region_id, ptr [[KERNEL_ARGS_I]]) // CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 // CHECK1-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__EXIT:%.*]] // CHECK1: omp_offload.failed.i: // CHECK1-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP12]], align 2 -// CHECK1-NEXT: store i16 [[TMP36]], ptr [[AA_CASTED_I]], align 2, !noalias !25 -// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[AA_CASTED_I]], align 8, !noalias !25 +// CHECK1-NEXT: store i16 [[TMP36]], ptr [[AA_CASTED_I]], align 2, !noalias [[META25]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[AA_CASTED_I]], align 8, !noalias [[META25]] // CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: store i32 [[TMP38]], ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !25 -// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !25 +// CHECK1-NEXT: store i32 [[TMP38]], ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias [[META25]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias [[META25]] // CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: store i32 [[TMP40]], ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !25 -// CHECK1-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 8, !noalias !25 +// CHECK1-NEXT: store i32 [[TMP40]], ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias [[META25]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 8, !noalias [[META25]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101(i64 [[TMP37]], i64 [[TMP39]], i64 [[TMP41]]) #[[ATTR3]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] // CHECK1: .omp_outlined..exit: @@ -1069,15 +1069,17 @@ int bar(int n){ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.omp_outlined, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.omp_outlined, i64 [[TMP2]], i64 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -2180,12 +2182,12 @@ int bar(int n){ // CHECK3-NEXT: [[TMP114:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 9 // CHECK3-NEXT: store i64 0, ptr [[TMP114]], align 8 // CHECK3-NEXT: [[TMP115:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP115]], align 4 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP115]], align 4 // CHECK3-NEXT: [[TMP116:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP116]], align 4 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP116]], align 4 // CHECK3-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP117]], align 4 -// CHECK3-NEXT: [[TMP118:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.region_id, ptr [[KERNEL_ARGS21]]) +// CHECK3-NEXT: [[TMP118:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.region_id, ptr [[KERNEL_ARGS21]]) // CHECK3-NEXT: [[TMP119:%.*]] = icmp ne i32 [[TMP118]], 0 // CHECK3-NEXT: br i1 [[TMP119]], label [[OMP_OFFLOAD_FAILED22:%.*]], label [[OMP_OFFLOAD_CONT23:%.*]] // CHECK3: omp_offload.failed22: @@ -2493,64 +2495,64 @@ int bar(int n){ // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !26 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 4, !noalias !26 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias !26 -// CHECK3-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !26 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 4, !noalias !26 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 4, !noalias !26 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias !26 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !26 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias !26 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META26:![0-9]+]] +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META26]] // CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !26 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !26 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !26 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !26 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK3-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 -// CHECK3-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !26 +// CHECK3-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 -// CHECK3-NEXT: store i32 3, ptr [[TMP22]], align 4, !noalias !26 +// CHECK3-NEXT: store i32 3, ptr [[TMP22]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP23]], align 4, !noalias !26 +// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP23]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP24]], align 4, !noalias !26 +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP24]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 4 -// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP25]], align 4, !noalias !26 +// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP25]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP26]], align 4, !noalias !26 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP26]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4, !noalias !26 +// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP28]], align 4, !noalias !26 +// CHECK3-NEXT: store ptr null, ptr [[TMP28]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP29]], align 8, !noalias !26 +// CHECK3-NEXT: store i64 0, ptr [[TMP29]], align 8, !noalias [[META26]] // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 9 -// CHECK3-NEXT: store i64 1, ptr [[TMP30]], align 8, !noalias !26 +// CHECK3-NEXT: store i64 1, ptr [[TMP30]], align 8, !noalias [[META26]] // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [[TMP20]], ptr [[TMP31]], align 4, !noalias !26 +// CHECK3-NEXT: store [3 x i32] [[TMP20]], ptr [[TMP31]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] [[TMP21]], ptr [[TMP32]], align 4, !noalias !26 +// CHECK3-NEXT: store [3 x i32] [[TMP21]], ptr [[TMP32]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP33]], align 4, !noalias !26 +// CHECK3-NEXT: store i32 0, ptr [[TMP33]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 [[TMP18]], i32 [[TMP19]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.region_id, ptr [[KERNEL_ARGS_I]]) // CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 // CHECK3-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__EXIT:%.*]] // CHECK3: omp_offload.failed.i: // CHECK3-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP12]], align 2 -// CHECK3-NEXT: store i16 [[TMP36]], ptr [[AA_CASTED_I]], align 2, !noalias !26 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[AA_CASTED_I]], align 4, !noalias !26 +// CHECK3-NEXT: store i16 [[TMP36]], ptr [[AA_CASTED_I]], align 2, !noalias [[META26]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[AA_CASTED_I]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: store i32 [[TMP38]], ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !26 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !26 +// CHECK3-NEXT: store i32 [[TMP38]], ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias [[META26]] // CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK3-NEXT: store i32 [[TMP40]], ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !26 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !26 +// CHECK3-NEXT: store i32 [[TMP40]], ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias [[META26]] +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias [[META26]] // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101(i32 [[TMP37]], i32 [[TMP39]], i32 [[TMP41]]) #[[ATTR3]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] // CHECK3: .omp_outlined..exit: @@ -2663,15 +2665,17 @@ int bar(int n){ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.omp_outlined, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.omp_outlined, i32 [[TMP2]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -3613,16 +3617,18 @@ int bar(int n){ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.omp_outlined, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.omp_outlined, i64 [[TMP2]], i64 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -4172,16 +4178,18 @@ int bar(int n){ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.omp_outlined, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.omp_outlined, i32 [[TMP2]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // //