diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 3cb3c1014d73b..f735fa5643aec 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -6109,6 +6109,8 @@ class OMPTeamsGenericLoopDirective final : public OMPLoopDirective { class OMPTargetTeamsGenericLoopDirective final : public OMPLoopDirective { friend class ASTStmtReader; friend class OMPExecutableDirective; + /// true if loop directive's associated loop can be a parallel for. + bool CanBeParallelFor = false; /// Build directive with the given start and end location. /// /// \param StartLoc Starting location of the directive kind. @@ -6131,6 +6133,9 @@ class OMPTargetTeamsGenericLoopDirective final : public OMPLoopDirective { llvm::omp::OMPD_target_teams_loop, SourceLocation(), SourceLocation(), CollapsedNum) {} + /// Set whether associated loop can be a parallel for. + void setCanBeParallelFor(bool ParFor) { CanBeParallelFor = ParFor; } + public: /// Creates directive with a list of \p Clauses. /// @@ -6145,7 +6150,7 @@ class OMPTargetTeamsGenericLoopDirective final : public OMPLoopDirective { static OMPTargetTeamsGenericLoopDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs); + Stmt *AssociatedStmt, const HelperExprs &Exprs, bool CanBeParallelFor); /// Creates an empty directive with the place /// for \a NumClauses clauses. @@ -6159,6 +6164,10 @@ class OMPTargetTeamsGenericLoopDirective final : public OMPLoopDirective { unsigned CollapsedNum, EmptyShell); + /// Return true if current loop directive's associated loop can be a + /// parallel for. + bool canBeParallelFor() const { return CanBeParallelFor; } + static bool classof(const Stmt *T) { return T->getStmtClass() == OMPTargetTeamsGenericLoopDirectiveClass; } diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 426b35848cb5c..d8519b2071e6d 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -2431,7 +2431,7 @@ OMPTeamsGenericLoopDirective::CreateEmpty(const ASTContext &C, OMPTargetTeamsGenericLoopDirective *OMPTargetTeamsGenericLoopDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs) { + const HelperExprs &Exprs, bool CanBeParallelFor) { auto *Dir = createDirective( C, Clauses, AssociatedStmt, numLoopChildren(CollapsedNum, OMPD_target_teams_loop), StartLoc, EndLoc, @@ -2473,6 +2473,7 @@ OMPTargetTeamsGenericLoopDirective *OMPTargetTeamsGenericLoopDirective::Create( Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); + Dir->setCanBeParallelFor(CanBeParallelFor); return Dir; } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index bc363313dec6f..dba5312ff8d05 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2656,11 +2656,12 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, - isOpenMPDistributeDirective(DKind) + isOpenMPDistributeDirective(DKind) || + (DKind == OMPD_target_teams_loop) ? OMP_IDENT_WORK_DISTRIBUTE - : isOpenMPLoopDirective(DKind) - ? OMP_IDENT_WORK_LOOP - : OMP_IDENT_WORK_SECTIONS), + : isOpenMPLoopDirective(DKind) + ? OMP_IDENT_WORK_LOOP + : OMP_IDENT_WORK_SECTIONS), getThreadID(CGF, Loc)}; auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); if (isOpenMPDistributeDirective(DKind) && @@ -8885,7 +8886,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: - // For now, just treat 'target teams loop' as if it's distributed. + // For now, treat 'target' with nested 'teams loop' as if it's + // distributed (target teams distribute). if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop) return NestedDir; if (DKind == OMPD_teams) { @@ -9369,7 +9371,8 @@ llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( SizeEmitter) { OpenMPDirectiveKind Kind = D.getDirectiveKind(); const OMPExecutableDirective *TD = &D; - // Get nested teams distribute kind directive, if any. + // Get nested teams distribute kind directive, if any. For now, treat + // 'target_teams_loop' as if it's really a target_teams_distribute. if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) && Kind != OMPD_target_teams_loop) TD = getNestedDistributeDirective(CGM.getContext(), D); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 5baac8f0e3e26..59ba03c6b8625 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -646,7 +646,6 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_target: case OMPD_target_teams: return hasNestedSPMDDirective(Ctx, D); - case OMPD_target_teams_loop: case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: @@ -658,6 +657,12 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, return true; case OMPD_target_teams_distribute: return false; + case OMPD_target_teams_loop: + // Whether this is true or not depends on how the directive will + // eventually be emitted. + if (auto *TTLD = dyn_cast(&D)) + return TTLD->canBeParallelFor(); + return false; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index e6d504bcdeca5..3bf99366b69ce 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -24,6 +24,7 @@ #include "clang/AST/StmtVisitor.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/PrettyStackTrace.h" +#include "clang/Basic/SourceManager.h" #include "llvm/ADT/SmallSet.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" @@ -34,11 +35,14 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Debug.h" #include using namespace clang; using namespace CodeGen; using namespace llvm::omp; +#define TTL_CODEGEN_TYPE "target-teams-loop-codegen" + static const VarDecl *getBaseDecl(const Expr *Ref); namespace { @@ -1432,9 +1436,12 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( *this, D.getBeginLoc(), isOpenMPWorksharingDirective(D.getDirectiveKind())); } + bool TeamsLoopCanBeParallel = false; + if (auto *TTLD = dyn_cast(&D)) + TeamsLoopCanBeParallel = TTLD->canBeParallelFor(); bool WithNowait = D.getSingleClause() || isOpenMPParallelDirective(D.getDirectiveKind()) || - ReductionKind == OMPD_simd; + TeamsLoopCanBeParallel || ReductionKind == OMPD_simd; bool SimpleReduction = ReductionKind == OMPD_simd; // Emit nowait reduction if nowait clause is present or directive is a // parallel directive (it always has implicit barrier). @@ -7928,11 +7935,9 @@ void CodeGenFunction::EmitOMPParallelGenericLoopDirective( void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( const OMPTeamsGenericLoopDirective &S) { // To be consistent with current behavior of 'target teams loop', emit - // 'teams loop' as if its constituent constructs are 'distribute, - // 'parallel, and 'for'. + // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, - S.getDistInc()); + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); }; // Emit teams region as a standalone region. @@ -7946,15 +7951,33 @@ void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( CodeGenDistribute); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; - emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); emitPostUpdateForReductionClause(*this, S, [](CodeGenFunction &) { return nullptr; }); } -static void -emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF, - const OMPTargetTeamsGenericLoopDirective &S, - PrePostActionTy &Action) { +static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF, + std::string StatusMsg, + const OMPExecutableDirective &D) { +#ifndef NDEBUG + bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice; + if (IsDevice) + StatusMsg += ": DEVICE"; + else + StatusMsg += ": HOST"; + SourceLocation L = D.getBeginLoc(); + auto &SM = CGF.getContext().getSourceManager(); + PresumedLoc PLoc = SM.getPresumedLoc(L); + const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr; + unsigned LineNo = + PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L); + llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n"; +#endif +} + +static void emitTargetTeamsGenericLoopRegionAsParallel( + CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsGenericLoopDirective &S) { Action.Enter(CGF); // Emit 'teams loop' as if its constituent constructs are 'distribute, // 'parallel, and 'for'. @@ -7974,19 +7997,50 @@ emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF, CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; - + DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, + emitTargetTeamsLoopCodegenStatus( + CGF, TTL_CODEGEN_TYPE " as parallel for", S)); emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, CodeGenTeams); emitPostUpdateForReductionClause(CGF, S, [](CodeGenFunction &) { return nullptr; }); } -/// Emit combined directive 'target teams loop' as if its constituent -/// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'. +static void emitTargetTeamsGenericLoopRegionAsDistribute( + CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsGenericLoopDirective &S) { + Action.Enter(CGF); + // Emit 'teams loop' as if its constituent construct is 'distribute'. + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, + emitTargetTeamsLoopCodegenStatus( + CGF, TTL_CODEGEN_TYPE " as distribute", S)); + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( const OMPTargetTeamsGenericLoopDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { - emitTargetTeamsGenericLoopRegion(CGF, S, Action); + if (S.canBeParallelFor()) + emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); + else + emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); }; emitCommonOMPTargetDirective(*this, S, CodeGen); } @@ -7996,7 +8050,10 @@ void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( const OMPTargetTeamsGenericLoopDirective &S) { // Emit SPMD target parallel loop region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { - emitTargetTeamsGenericLoopRegion(CGF, S, Action); + if (S.canBeParallelFor()) + emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); + else + emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); }; llvm::Function *Fn; llvm::Constant *Addr; diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 0ba54a3a9cae3..c814535ad6bdb 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4478,6 +4478,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { Params); break; } + // For 'target teams loop', collect all captured regions so codegen can + // later decide the best IR to emit given the associated loop-nest. case OMPD_target_teams_loop: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: { @@ -6135,6 +6137,79 @@ processImplicitMapsWithDefaultMappers(Sema &S, DSAStackTy *Stack, } } +namespace { +/// A 'teams loop' with a nested 'loop bind(parallel)' or generic function +/// call in the associated loop-nest cannot be a 'parallel for'. +class TeamsLoopChecker final : public ConstStmtVisitor { + Sema &SemaRef; + +public: + bool teamsLoopCanBeParallelFor() const { return TeamsLoopCanBeParallelFor; } + + // Is there a nested OpenMP loop bind(parallel) + void VisitOMPExecutableDirective(const OMPExecutableDirective *D) { + if (D->getDirectiveKind() == llvm::omp::Directive::OMPD_loop) { + if (const auto *C = D->getSingleClause()) + if (C->getBindKind() == OMPC_BIND_parallel) { + TeamsLoopCanBeParallelFor = false; + // No need to continue visiting any more + return; + } + } + for (const Stmt *Child : D->children()) + if (Child) + Visit(Child); + } + + void VisitCallExpr(const CallExpr *C) { + // Function calls inhibit parallel loop translation of 'target teams loop' + // unless the assume-no-nested-parallelism flag has been specified. + // OpenMP API runtime library calls do not inhibit parallel loop + // translation, regardless of the assume-no-nested-parallelism. + if (C) { + bool IsOpenMPAPI = false; + auto *FD = dyn_cast_or_null(C->getCalleeDecl()); + if (FD) { + std::string Name = FD->getNameInfo().getAsString(); + IsOpenMPAPI = Name.find("omp_") == 0; + } + TeamsLoopCanBeParallelFor = + IsOpenMPAPI || SemaRef.getLangOpts().OpenMPNoNestedParallelism; + if (!TeamsLoopCanBeParallelFor) + return; + } + for (const Stmt *Child : C->children()) + if (Child) + Visit(Child); + } + + void VisitCapturedStmt(const CapturedStmt *S) { + if (!S) + return; + Visit(S->getCapturedDecl()->getBody()); + } + + void VisitStmt(const Stmt *S) { + if (!S) + return; + for (const Stmt *Child : S->children()) + if (Child) + Visit(Child); + } + explicit TeamsLoopChecker(Sema &SemaRef) + : SemaRef(SemaRef), TeamsLoopCanBeParallelFor(true) {} + +private: + bool TeamsLoopCanBeParallelFor; +}; +} // namespace + +static bool teamsLoopCanBeParallelFor(Stmt *AStmt, Sema &SemaRef) { + TeamsLoopChecker Checker(SemaRef); + Checker.Visit(AStmt); + return Checker.teamsLoopCanBeParallelFor(); +} + bool Sema::mapLoopConstruct(llvm::SmallVector &ClausesWithoutBind, ArrayRef Clauses, OpenMPBindClauseKind &BindKind, @@ -10895,7 +10970,8 @@ StmtResult Sema::ActOnOpenMPTargetTeamsGenericLoopDirective( setFunctionHasBranchProtectedScope(); return OMPTargetTeamsGenericLoopDirective::Create( - Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); + Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + teamsLoopCanBeParallelFor(AStmt, *this)); } StmtResult Sema::ActOnOpenMPParallelGenericLoopDirective( @@ -15645,6 +15721,12 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( if (NameModifier == OMPD_unknown || NameModifier == OMPD_parallel) CaptureRegion = OMPD_target; break; + case OMPD_teams_loop: + case OMPD_target_teams_loop: + // For [target] teams loop, assume capture region is 'teams' so it's + // available for codegen later to use if/when necessary. + CaptureRegion = OMPD_teams; + break; case OMPD_target_teams_distribute_parallel_for_simd: if (OpenMPVersion >= 50 && (NameModifier == OMPD_unknown || NameModifier == OMPD_simd)) { @@ -15652,7 +15734,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( break; } [[fallthrough]]; - case OMPD_target_teams_loop: case OMPD_target_teams_distribute_parallel_for: // If this clause applies to the nested 'parallel' region, capture within // the 'teams' region, otherwise do not capture. @@ -15775,7 +15856,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_loop: - case OMPD_teams_loop: case OMPD_teams: case OMPD_tile: case OMPD_unroll: diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 674ed47581dfd..8db1ec7be91cc 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2776,6 +2776,7 @@ void ASTStmtReader::VisitOMPTeamsGenericLoopDirective( void ASTStmtReader::VisitOMPTargetTeamsGenericLoopDirective( OMPTargetTeamsGenericLoopDirective *D) { VisitOMPLoopDirective(D); + D->setCanBeParallelFor(Record.readBool()); } void ASTStmtReader::VisitOMPParallelGenericLoopDirective( diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 7ce48fede383e..e850329db5192 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2823,6 +2823,7 @@ void ASTStmtWriter::VisitOMPTeamsGenericLoopDirective( void ASTStmtWriter::VisitOMPTargetTeamsGenericLoopDirective( OMPTargetTeamsGenericLoopDirective *D) { VisitOMPLoopDirective(D); + Record.writeBool(D->canBeParallelFor()); Code = serialization::STMT_OMP_TARGET_TEAMS_GENERIC_LOOP_DIRECTIVE; } diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp index 5226b7498e4ce..c89c6eb65706a 100644 --- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp @@ -320,23 +320,31 @@ int bar(int n){ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -344,11 +352,12 @@ int bar(int n){ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -360,6 +369,7 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 @@ -1566,23 +1576,31 @@ int bar(int n){ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33 -// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 +// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1590,11 +1608,12 @@ int bar(int n){ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -1606,6 +1625,7 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 @@ -2801,23 +2821,31 @@ int bar(int n){ // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33 -// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]]) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -2825,11 +2853,12 @@ int bar(int n){ // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -2841,6 +2870,7 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp index ca2670f0cd643..f0effa760dcdb 100644 --- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp @@ -30,17 +30,20 @@ int main(int argc, char **argv) { #endif // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment, ptr [[DYN_PTR]]) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 @@ -50,9 +53,14 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -60,56 +68,55 @@ int main(int argc, char **argv) { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 @@ -122,177 +129,54 @@ int main(int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARGC_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END12:%.*]] -// CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END12]] -// CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR4:[0-9]+]] +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR4]] // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]] +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR4]] // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] // CHECK1-NEXT: store i32 [[ADD10]], ptr [[TMP0]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24 -// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment, ptr [[DYN_PTR]]) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 @@ -302,9 +186,14 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 +// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -312,56 +201,55 @@ int main(int argc, char **argv) { // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 @@ -374,155 +262,34 @@ int main(int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP16]], ptr [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: br label [[COND_END12:%.*]] -// CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END12]] -// CHECK2: cond.end12: -// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ] -// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP39]]) -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I3]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]] -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CALL]], [[CALL5]] -// CHECK2-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]] -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD6]], [[CALL7]] -// CHECK2-NEXT: store i32 [[ADD8]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR4:[0-9]+]] +// CHECK2-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR4]] +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] +// CHECK2-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR4]] +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] +// CHECK2-NEXT: store i32 [[ADD10]], ptr [[TMP0]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP16]]) +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp index 22cf534bf0ba2..3f752ac663f41 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp @@ -28,1118 +28,6 @@ int foo() { return 0; } #endif -// IR-PCH-HOST-LABEL: define {{[^@]+}}@_Z3foov -// IR-PCH-HOST-SAME: () #[[ATTR0:[0-9]+]] { -// IR-PCH-HOST-NEXT: entry: -// IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[J:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load i32, ptr [[J]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP0]], ptr [[J_CASTED]], align 4 -// IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i64, ptr [[J_CASTED]], align 8 -// IR-PCH-HOST-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22(i64 [[TMP1]], ptr [[SUM]]) #[[ATTR2:[0-9]+]] -// IR-PCH-HOST-NEXT: ret i32 0 -// IR-PCH-HOST-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 -// IR-PCH-HOST-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { -// IR-PCH-HOST-NEXT: entry: -// IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i32, ptr [[J_ADDR]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP1]], ptr [[J_CASTED]], align 4 -// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load i64, ptr [[J_CASTED]], align 8 -// IR-PCH-HOST-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP2]], ptr [[TMP0]]) -// IR-PCH-HOST-NEXT: ret void -// IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp_outlined. -// IR-PCH-HOST-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { -// IR-PCH-HOST-NEXT: entry: -// IR-PCH-HOST-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-PCH-HOST-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[J3:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[J4:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// IR-PCH-HOST-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 -// IR-PCH-HOST-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] -// IR-PCH-HOST: omp.arrayinit.body: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// IR-PCH-HOST-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] -// IR-PCH-HOST: omp.arrayinit.done: -// IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// IR-PCH-HOST-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-HOST-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// IR-PCH-HOST-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-PCH-HOST-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-HOST-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// IR-PCH-HOST-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// IR-PCH-HOST: cond.true: -// IR-PCH-HOST-NEXT: br label [[COND_END:%.*]] -// IR-PCH-HOST: cond.false: -// IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-HOST-NEXT: br label [[COND_END]] -// IR-PCH-HOST: cond.end: -// IR-PCH-HOST-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// IR-PCH-HOST-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-HOST-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// IR-PCH-HOST: omp.inner.for.cond: -// IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-PCH-HOST-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-HOST-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// IR-PCH-HOST-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// IR-PCH-HOST: omp.inner.for.body: -// IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// IR-PCH-HOST-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// IR-PCH-HOST-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-HOST-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// IR-PCH-HOST-NEXT: [[TMP13:%.*]] = load i32, ptr [[J3]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP13]], ptr [[J_CASTED]], align 4 -// IR-PCH-HOST-NEXT: [[TMP14:%.*]] = load i64, ptr [[J_CASTED]], align 8 -// IR-PCH-HOST-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined..1, i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], ptr [[SUM1]]) -// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// IR-PCH-HOST: omp.inner.for.inc: -// IR-PCH-HOST-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-PCH-HOST-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND]] -// IR-PCH-HOST: omp.inner.for.end: -// IR-PCH-HOST-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// IR-PCH-HOST: omp.loop.exit: -// IR-PCH-HOST-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// IR-PCH-HOST-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) -// IR-PCH-HOST-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-HOST-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// IR-PCH-HOST-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// IR-PCH-HOST: .omp.lastprivate.then: -// IR-PCH-HOST-NEXT: store i32 10, ptr [[J3]], align 4 -// IR-PCH-HOST-NEXT: [[TMP21:%.*]] = load i32, ptr [[J3]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP21]], ptr [[J_ADDR]], align 4 -// IR-PCH-HOST-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// IR-PCH-HOST: .omp.lastprivate.done: -// IR-PCH-HOST-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// IR-PCH-HOST-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 -// IR-PCH-HOST-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// IR-PCH-HOST-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-HOST-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// IR-PCH-HOST-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// IR-PCH-HOST-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// IR-PCH-HOST-NEXT: ] -// IR-PCH-HOST: .omp.reduction.case1: -// IR-PCH-HOST-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP26]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR-PCH-HOST: omp.arraycpy.body: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 -// IR-PCH-HOST-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// IR-PCH-HOST-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH-HOST: omp.arraycpy.done10: -// IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR-PCH-HOST: .omp.reduction.case2: -// IR-PCH-HOST-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP0]], [[TMP29]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] -// IR-PCH-HOST: omp.arraycpy.body12: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// IR-PCH-HOST-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 -// IR-PCH-HOST-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP30]] monotonic, align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] -// IR-PCH-HOST: omp.arraycpy.done18: -// IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR-PCH-HOST: .omp.reduction.default: -// IR-PCH-HOST-NEXT: ret void -// IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp_outlined..1 -// IR-PCH-HOST-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { -// IR-PCH-HOST-NEXT: entry: -// IR-PCH-HOST-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[J3:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[J5:%.*]] = alloca i32, align 4 -// IR-PCH-HOST-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// IR-PCH-HOST-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// IR-PCH-HOST-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 -// IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// IR-PCH-HOST-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// IR-PCH-HOST-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// IR-PCH-HOST-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-HOST-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 -// IR-PCH-HOST-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] -// IR-PCH-HOST: omp.arrayinit.body: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// IR-PCH-HOST-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] -// IR-PCH-HOST: omp.arrayinit.done: -// IR-PCH-HOST-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// IR-PCH-HOST-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-PCH-HOST-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-HOST-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 -// IR-PCH-HOST-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// IR-PCH-HOST: cond.true: -// IR-PCH-HOST-NEXT: br label [[COND_END:%.*]] -// IR-PCH-HOST: cond.false: -// IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-HOST-NEXT: br label [[COND_END]] -// IR-PCH-HOST: cond.end: -// IR-PCH-HOST-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] -// IR-PCH-HOST-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// IR-PCH-HOST-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// IR-PCH-HOST: omp.inner.for.cond: -// IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] -// IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// IR-PCH-HOST-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// IR-PCH-HOST: omp.inner.for.body: -// IR-PCH-HOST-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 10 -// IR-PCH-HOST-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP13]], 10 -// IR-PCH-HOST-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 -// IR-PCH-HOST-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL8]] -// IR-PCH-HOST-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 -// IR-PCH-HOST-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] -// IR-PCH-HOST-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// IR-PCH-HOST-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] -// IR-PCH-HOST-NEXT: [[TMP16:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 -// IR-PCH-HOST-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] -// IR-PCH-HOST-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP14]] -// IR-PCH-HOST-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// IR-PCH-HOST: omp.body.continue: -// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// IR-PCH-HOST: omp.inner.for.inc: -// IR-PCH-HOST-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 -// IR-PCH-HOST-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] -// IR-PCH-HOST: omp.inner.for.end: -// IR-PCH-HOST-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// IR-PCH-HOST: omp.loop.exit: -// IR-PCH-HOST-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// IR-PCH-HOST-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) -// IR-PCH-HOST-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// IR-PCH-HOST-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 -// IR-PCH-HOST-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// IR-PCH-HOST-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-HOST-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// IR-PCH-HOST-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// IR-PCH-HOST-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// IR-PCH-HOST-NEXT: ] -// IR-PCH-HOST: .omp.reduction.case1: -// IR-PCH-HOST-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR-PCH-HOST: omp.arraycpy.body: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// IR-PCH-HOST-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// IR-PCH-HOST-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH-HOST: omp.arraycpy.done19: -// IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR-PCH-HOST: .omp.reduction.case2: -// IR-PCH-HOST-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP0]], [[TMP28]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] -// IR-PCH-HOST: omp.arraycpy.body21: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// IR-PCH-HOST-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 -// IR-PCH-HOST-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP29]] monotonic, align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] -// IR-PCH-HOST: omp.arraycpy.done27: -// IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR-PCH-HOST: .omp.reduction.default: -// IR-PCH-HOST-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-HOST-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// IR-PCH-HOST-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// IR-PCH-HOST: .omp.lastprivate.then: -// IR-PCH-HOST-NEXT: store i32 10, ptr [[J3]], align 4 -// IR-PCH-HOST-NEXT: [[TMP33:%.*]] = load i32, ptr [[J3]], align 4 -// IR-PCH-HOST-NEXT: store i32 [[TMP33]], ptr [[J_ADDR]], align 4 -// IR-PCH-HOST-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// IR-PCH-HOST: .omp.lastprivate.done: -// IR-PCH-HOST-NEXT: ret void -// IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func -// IR-PCH-HOST-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -// IR-PCH-HOST-NEXT: entry: -// IR-PCH-HOST-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// IR-PCH-HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// IR-PCH-HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// IR-PCH-HOST-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR-PCH-HOST: omp.arraycpy.body: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH-HOST: omp.arraycpy.done2: -// IR-PCH-HOST-NEXT: ret void -// IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2 -// IR-PCH-HOST-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { -// IR-PCH-HOST-NEXT: entry: -// IR-PCH-HOST-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// IR-PCH-HOST-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// IR-PCH-HOST-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// IR-PCH-HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// IR-PCH-HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// IR-PCH-HOST-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR-PCH-HOST: omp.arraycpy.body: -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH-HOST: omp.arraycpy.done2: -// IR-PCH-HOST-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l23 -// CHECK-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr -// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr -// CHECK-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr -// CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr -// CHECK-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr -// CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) -// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK: user_code.entry: -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[J_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP3]], ptr [[J_CASTED_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 -// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 -// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) -// CHECK-NEXT: ret void -// CHECK: worker.exit: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[_TMP2:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[J4:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr -// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr -// CHECK-NEXT: [[SUM1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM1]] to ptr -// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr -// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// CHECK-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr -// CHECK-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr -// CHECK-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr -// CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr -// CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr -// CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr -// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr -// CHECK-NEXT: [[J4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J4]] to ptr -// CHECK-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1_ASCAST]], i32 0, i32 0, i32 0 -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 -// CHECK-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] -// CHECK-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] -// CHECK: omp.arrayinit.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] -// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] -// CHECK: omp.arrayinit.done: -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 -// CHECK-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK: cond.true: -// CHECK-NEXT: br label [[COND_END:%.*]] -// CHECK: cond.false: -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: br label [[COND_END]] -// CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP7]], 100 -// CHECK-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP12]], ptr [[J_CASTED_ASCAST]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to ptr -// CHECK-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 -// CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 -// CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP13]] to ptr -// CHECK-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 -// CHECK-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP20]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP22]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__.1, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4) -// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP29]], 99 -// CHECK-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK: cond.true9: -// CHECK-NEXT: br label [[COND_END11:%.*]] -// CHECK: cond.false10: -// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: br label [[COND_END11]] -// CHECK: cond.end11: -// CHECK-NEXT: [[COND12:%.*]] = phi i32 [ 99, [[COND_TRUE9]] ], [ [[TMP30]], [[COND_FALSE10]] ] -// CHECK-NEXT: store i32 [[COND12]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK: omp.inner.for.end: -// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK: omp.loop.exit: -// CHECK-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP33]]) -// CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 -// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK: .omp.lastprivate.then: -// CHECK-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 -// CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP36]], ptr [[J_ADDR_ASCAST]], align 4 -// CHECK-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK: .omp.lastprivate.done: -// CHECK-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP39]], align 8 -// CHECK-NEXT: [[TMP40:%.*]] = load ptr, ptr addrspace(1) @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP38]], ptr [[TMP40]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func.3, ptr @_omp_reduction_inter_warp_copy_func.4, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) -// CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP41]], 1 -// CHECK-NEXT: br i1 [[TMP42]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK: .omp.reduction.then: -// CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 -// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP43]] -// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// CHECK: omp.arraycpy.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 -// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK-NEXT: store i32 [[ADD14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 -// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP43]] -// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] -// CHECK: omp.arraycpy.done17: -// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK: .omp.reduction.done: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__.1 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) -// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr -// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr -// CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr -// CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr -// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr -// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr -// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// CHECK-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr -// CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr -// CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr -// CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr -// CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr -// CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr -// CHECK-NEXT: [[SUM4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM4]] to ptr -// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr -// CHECK-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 -// CHECK-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4 -// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i32 0, i32 0, i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 -// CHECK-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] -// CHECK: omp.arrayinit.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] -// CHECK: omp.arrayinit.done: -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK-NEXT: [[CONV6:%.*]] = sext i32 [[TMP7]] to i64 -// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV6]], [[TMP8]] -// CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 10 -// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP11]], 10 -// CHECK-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 -// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[MUL8]] -// CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] -// CHECK-NEXT: store i32 [[ADD10]], ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i64 0, i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP15]], [[TMP12]] -// CHECK-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK: omp.body.continue: -// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK: omp.inner.for.end: -// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK: omp.loop.exit: -// CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP19]]) -// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[SUM4_ASCAST]], ptr [[TMP22]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP21]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) -// CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP23]], 1 -// CHECK-NEXT: br i1 [[TMP24]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK: .omp.reduction.then: -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 -// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] -// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// CHECK: omp.arraycpy.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] -// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] -// CHECK: omp.arraycpy.done19: -// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK: .omp.reduction.done: -// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 -// CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK: .omp.lastprivate.then: -// CHECK-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 -// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP30]], ptr [[J_ADDR_ASCAST]], align 4 -// CHECK-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK: .omp.lastprivate.done: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) -// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) -// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 -// CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 -// CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 -// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 -// CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] -// CHECK: .shuffle.pre_cond: -// CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] -// CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] -// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 -// CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 -// CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] -// CHECK-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 -// CHECK-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] -// CHECK: .shuffle.then: -// CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 -// CHECK-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) -// CHECK-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 -// CHECK-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 -// CHECK-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 -// CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND]] -// CHECK: .shuffle.exit: -// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 -// CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 -// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] -// CHECK-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 -// CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 -// CHECK-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 -// CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] -// CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 -// CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] -// CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] -// CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] -// CHECK-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK: then: -// CHECK-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] -// CHECK-NEXT: br label [[IFCONT:%.*]] -// CHECK: else: -// CHECK-NEXT: br label [[IFCONT]] -// CHECK: ifcont: -// CHECK-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK: then4: -// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 -// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) -// CHECK-NEXT: br label [[IFCONT6:%.*]] -// CHECK: else5: -// CHECK-NEXT: br label [[IFCONT6]] -// CHECK: ifcont6: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 -// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: br label [[PRECOND:%.*]] -// CHECK: precond: -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 -// CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] -// CHECK: body: -// CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4:[0-9]+]] to ptr), i32 [[TMP2]]) -// CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK: then: -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 -// CHECK-NEXT: br label [[IFCONT:%.*]] -// CHECK: else: -// CHECK-NEXT: br label [[IFCONT]] -// CHECK: ifcont: -// CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] -// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] -// CHECK: then2: -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] -// CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 -// CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 -// CHECK-NEXT: br label [[IFCONT4:%.*]] -// CHECK: else3: -// CHECK-NEXT: br label [[IFCONT4]] -// CHECK: ifcont4: -// CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: br label [[PRECOND]] -// CHECK: exit: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func.3 -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) -// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) -// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 -// CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 -// CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 -// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 -// CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] -// CHECK: .shuffle.pre_cond: -// CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] -// CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] -// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 -// CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 -// CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] -// CHECK-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 -// CHECK-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] -// CHECK: .shuffle.then: -// CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 -// CHECK-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) -// CHECK-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 -// CHECK-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 -// CHECK-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 -// CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND]] -// CHECK: .shuffle.exit: -// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 -// CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 -// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] -// CHECK-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 -// CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 -// CHECK-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 -// CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] -// CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 -// CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] -// CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] -// CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] -// CHECK-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK: then: -// CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] -// CHECK-NEXT: br label [[IFCONT:%.*]] -// CHECK: else: -// CHECK-NEXT: br label [[IFCONT]] -// CHECK: ifcont: -// CHECK-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] -// CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK: then4: -// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 -// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) -// CHECK-NEXT: br label [[IFCONT6:%.*]] -// CHECK: else5: -// CHECK-NEXT: br label [[IFCONT6]] -// CHECK: ifcont6: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func.4 -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 -// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: br label [[PRECOND:%.*]] -// CHECK: precond: -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 -// CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] -// CHECK: body: -// CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) -// CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK: then: -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 -// CHECK-NEXT: br label [[IFCONT:%.*]] -// CHECK: else: -// CHECK-NEXT: br label [[IFCONT]] -// CHECK: ifcont: -// CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] -// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] -// CHECK: then2: -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] -// CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 -// CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 -// CHECK-NEXT: br label [[IFCONT4:%.*]] -// CHECK: else3: -// CHECK-NEXT: br label [[IFCONT4]] -// CHECK: ifcont4: -// CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 -// CHECK-NEXT: br label [[PRECOND]] -// CHECK: exit: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP8]], ptr align 4 [[TMP7]], i64 400, i1 false) -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] -// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP7]]) #[[ATTR2]] -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 128 [[TMP8]], i64 400, i1 false) -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func -// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr -// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] -// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] -// CHECK-NEXT: ret void // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 // IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] { // IR-GPU-NEXT: entry: @@ -2015,7 +903,7 @@ int foo() { // IR-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 // IR-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2036,7 +924,7 @@ int foo() { // IR-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] // IR: omp.arraycpy.done10: -// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR: .omp.reduction.case2: // IR-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 @@ -2052,7 +940,6 @@ int foo() { // IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] // IR: omp.arraycpy.done18: -// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR: .omp.reduction.default: // IR-NEXT: ret void @@ -2171,7 +1058,7 @@ int foo() { // IR-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 // IR-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2192,7 +1079,7 @@ int foo() { // IR-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] // IR: omp.arraycpy.done19: -// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR: .omp.reduction.case2: // IR-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 @@ -2208,7 +1095,6 @@ int foo() { // IR-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] // IR: omp.arraycpy.done27: -// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR: .omp.reduction.default: // IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 @@ -2412,7 +1298,7 @@ int foo() { // IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 // IR-PCH-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2433,7 +1319,7 @@ int foo() { // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] // IR-PCH: omp.arraycpy.done10: -// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR-PCH: .omp.reduction.case2: // IR-PCH-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 @@ -2449,7 +1335,6 @@ int foo() { // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] // IR-PCH: omp.arraycpy.done18: -// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR-PCH: .omp.reduction.default: // IR-PCH-NEXT: ret void @@ -2568,7 +1453,7 @@ int foo() { // IR-PCH-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 // IR-PCH-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2589,7 +1474,7 @@ int foo() { // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] // IR-PCH: omp.arraycpy.done19: -// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR-PCH: .omp.reduction.case2: // IR-PCH-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 @@ -2605,7 +1490,6 @@ int foo() { // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] // IR-PCH: omp.arraycpy.done27: -// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR-PCH: .omp.reduction.default: // IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen_as_distribute.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_distribute.cpp new file mode 100644 index 0000000000000..f3bbbc6229abd --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_distribute.cpp @@ -0,0 +1,587 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 2 +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU + +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH +extern int foo(int i); + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER +int N = 100000; +int main() +{ + int i; + int a[N]; + int b[N]; + + // Presence of call. Cannot use 'parallel for', must use 'distribute' when + // assume-no-neseted-parallelism isn't specified. + #pragma omp target teams loop + for (i=0; i < N; i++) { + for (int j=0; j < N; j++) { + a[i] = b[i] * N + foo(j); + } + } + return 0; +} +#endif +// IR-GPU-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 +// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment to ptr), ptr [[DYN_PTR]]) +// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 +// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// IR-GPU: user_code.entry: +// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]] +// IR-GPU-NEXT: call void @__kmpc_target_deinit() +// IR-GPU-NEXT: ret void +// IR-GPU: worker.exit: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I5:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[I5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I5]] to ptr +// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) +// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-GPU-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-GPU: cond.true: +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END:%.*]] +// IR-GPU: cond.false: +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END]] +// IR-GPU: cond.end: +// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-GPU-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-GPU-NEXT: store i32 [[ADD]], ptr [[I5_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[FOR_COND:%.*]] +// IR-GPU: for.cond: +// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]] +// IR-GPU-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR-GPU: for.body: +// IR-GPU-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP21]], [[TMP22]] +// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP23]]) #[[ATTR4:[0-9]+]] +// IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 [[MUL9]], [[CALL]] +// IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM11]] +// IR-GPU-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4 +// IR-GPU-NEXT: br label [[FOR_INC:%.*]] +// IR-GPU: for.inc: +// IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 +// IR-GPU-NEXT: store i32 [[INC]], ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// IR-GPU: for.end: +// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-GPU: omp.body.continue: +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], 1 +// IR-GPU-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP28]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-LABEL: define dso_local noundef i32 @main +// IR-SAME: () #[[ATTR0:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// IR-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// IR-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0() +// IR-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// IR-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 +// IR-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// IR-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16 +// IR-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]] +// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// IR-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP7]]) +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 +// IR-NEXT: ret i32 [[TMP8]] +// +// +// IR-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 +// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NEXT: ret void +// +// +// IR-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I5:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 +// IR-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]] +// IR-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR: for.body: +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4 +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP21]], [[TMP22]] +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP23]]) +// IR-NEXT: [[ADD10:%.*]] = add nsw i32 [[MUL9]], [[CALL]] +// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4 +// IR-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 +// IR-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM11]] +// IR-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4 +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 +// IR-NEXT: store i32 [[INC]], ptr [[J]], align 4 +// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// IR: for.end: +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], 1 +// IR-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-PCH-LABEL: define dso_local noundef i32 @main +// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// IR-PCH-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0() +// IR-PCH-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// IR-PCH-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 +// IR-PCH-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// IR-PCH-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16 +// IR-PCH-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]] +// IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// IR-PCH-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP7]]) +// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 +// IR-PCH-NEXT: ret i32 [[TMP8]] +// +// +// IR-PCH-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 +// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I5:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-PCH-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-PCH-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NEXT: br label [[FOR_COND:%.*]] +// IR-PCH: for.cond: +// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]] +// IR-PCH-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR-PCH: for.body: +// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4 +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP21]], [[TMP22]] +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP23]]) +// IR-PCH-NEXT: [[ADD10:%.*]] = add nsw i32 [[MUL9]], [[CALL]] +// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4 +// IR-PCH-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM11]] +// IR-PCH-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4 +// IR-PCH-NEXT: br label [[FOR_INC:%.*]] +// IR-PCH: for.inc: +// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 +// IR-PCH-NEXT: store i32 [[INC]], ptr [[J]], align 4 +// IR-PCH-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// IR-PCH: for.end: +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], 1 +// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp new file mode 100644 index 0000000000000..7c7cdc53fa2d2 --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp @@ -0,0 +1,3998 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU + +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH + + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-assume-no-nested-parallelism -DNESTED -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-assume-no-nested-parallelism -DNESTED -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU-NESTED + +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-assume-no-nested-parallelism -DNESTED -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-NESTED + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-assume-no-nested-parallelism -DNESTED -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -fopenmp-assume-no-nested-parallelism -DNESTED -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH-NESTED + +// expected-no-diagnostics + +#ifndef NESTED +extern int omp_get_num_teams(void); +#endif + +#ifndef HEADER +#define HEADER +extern int foo(int i); + +int N = 100000; +int main() +{ + int a[N]; + int b[N]; + +#ifndef NESTED + // Should be transformed into 'target teams distribute parallel for' + #pragma omp target teams loop + for (int j = 0; j != N; j++) + a[j]=b[j]; + + // Should be transformed into 'target teams distribute parallel for' + #pragma omp target teams loop collapse(2) + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + a[i] = b[i] * N + j; + } + } + + int nt = 0; + // Should be transformed into 'target teams distribute parallel for' + #pragma omp target teams loop num_teams(32) + for (int i=0; i < N; i++) { + if (!nt) nt = omp_get_num_teams(); + for (int j=0; j < N; j++) + a[j] = b[j] * N + nt; + } +#else + // Should be transformed into 'target teams distribute parallel for' + // even with function call because of assume-no-nested-parallelism. + #pragma omp target teams loop collapse(2) + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + a[i] = b[i] * N + foo(j); + } + } +#endif + return 0; +} +#endif +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 +// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment to ptr), ptr [[DYN_PTR]]) +// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 +// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// IR-GPU: user_code.entry: +// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]] +// IR-GPU-NEXT: call void @__kmpc_target_deinit() +// IR-GPU-NEXT: ret void +// IR-GPU: worker.exit: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr +// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-GPU-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-GPU: cond.true: +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END:%.*]] +// IR-GPU: cond.false: +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END]] +// IR-GPU: cond.end: +// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// IR-GPU-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// IR-GPU-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr +// IR-GPU-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// IR-GPU-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// IR-GPU-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr +// IR-GPU-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// IR-GPU-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// IR-GPU-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to ptr +// IR-GPU-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// IR-GPU-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 +// IR-GPU-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP0]] to ptr +// IR-GPU-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 +// IR-GPU-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4 +// IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 8 +// IR-GPU-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5 +// IR-GPU-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP2]] to ptr +// IR-GPU-NEXT: store ptr [[TMP33]], ptr [[TMP32]], align 8 +// IR-GPU-NEXT: [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6 +// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 8 +// IR-GPU-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7) +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// IR-GPU-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// IR-GPU-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// IR-GPU-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] +// IR-GPU-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// IR-GPU: cond.true12: +// IR-GPU-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END14:%.*]] +// IR-GPU: cond.false13: +// IR-GPU-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END14]] +// IR-GPU: cond.end14: +// IR-GPU-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE12]] ], [ [[TMP46]], [[COND_FALSE13]] ] +// IR-GPU-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP49]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_omp_outlined_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J6:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr +// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[J6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J6]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 +// IR-GPU-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP14]] +// IR-GPU-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-GPU-NEXT: store i32 [[ADD]], ptr [[J6_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[J6_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[J6_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]] +// IR-GPU-NEXT: store i32 [[TMP17]], ptr [[ARRAYIDX10]], align 4 +// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-GPU: omp.body.continue: +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// IR-GPU-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP22]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46 +// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_kernel_environment to ptr), ptr [[DYN_PTR]]) +// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 +// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// IR-GPU: user_code.entry: +// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2]] +// IR-GPU-NEXT: call void @__kmpc_target_deinit() +// IR-GPU-NEXT: ret void +// IR-GPU: worker.exit: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr +// IR-GPU-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-GPU-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-GPU-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-GPU-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-GPU-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: land.lhs.true: +// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-GPU-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// IR-GPU-NEXT: [[CONV13:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// IR-GPU-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 [[CONV13]]) +// IR-GPU-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: [[CMP14:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]] +// IR-GPU-NEXT: br i1 [[CMP14]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-GPU: cond.true: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[COND_END:%.*]] +// IR-GPU: cond.false: +// IR-GPU-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[COND_END]] +// IR-GPU: cond.end: +// IR-GPU-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-GPU-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP19]], 1 +// IR-GPU-NEXT: [[CMP15:%.*]] = icmp slt i64 [[TMP18]], [[ADD]] +// IR-GPU-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP22]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP20]] to ptr +// IR-GPU-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// IR-GPU-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// IR-GPU-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP21]] to ptr +// IR-GPU-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// IR-GPU-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// IR-GPU-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP23]] to ptr +// IR-GPU-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8 +// IR-GPU-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 +// IR-GPU-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP0]] to ptr +// IR-GPU-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// IR-GPU-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4 +// IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP32]], align 8 +// IR-GPU-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5 +// IR-GPU-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP2]] to ptr +// IR-GPU-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8 +// IR-GPU-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6 +// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP35]], align 8 +// IR-GPU-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7) +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] +// IR-GPU-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP40]], [[TMP41]] +// IR-GPU-NEXT: store i64 [[ADD17]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP42]], [[TMP43]] +// IR-GPU-NEXT: store i64 [[ADD18]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[TMP44]], [[TMP45]] +// IR-GPU-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]] +// IR-GPU: cond.true20: +// IR-GPU-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[COND_END22:%.*]] +// IR-GPU: cond.false21: +// IR-GPU-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[COND_END22]] +// IR-GPU: cond.end22: +// IR-GPU-NEXT: [[COND23:%.*]] = phi i64 [ [[TMP46]], [[COND_TRUE20]] ], [ [[TMP47]], [[COND_FALSE21]] ] +// IR-GPU-NEXT: store i64 [[COND23]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP50]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr +// IR-GPU-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-GPU-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-GPU-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-GPU-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-GPU-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: land.lhs.true: +// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-GPU-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i64 0, ptr [[DOTOMP_LB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 1) +// IR-GPU-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// IR-GPU-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP19]], 0 +// IR-GPU-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 +// IR-GPU-NEXT: [[MUL16:%.*]] = mul nsw i32 1, [[DIV15]] +// IR-GPU-NEXT: [[CONV17:%.*]] = sext i32 [[MUL16]] to i64 +// IR-GPU-NEXT: [[DIV18:%.*]] = sdiv i64 [[TMP18]], [[CONV17]] +// IR-GPU-NEXT: [[MUL19:%.*]] = mul nsw i64 [[DIV18]], 1 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL19]] +// IR-GPU-NEXT: [[CONV20:%.*]] = trunc i64 [[ADD]] to i32 +// IR-GPU-NEXT: store i32 [[CONV20]], ptr [[I11_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP22]], 0 +// IR-GPU-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 +// IR-GPU-NEXT: [[MUL23:%.*]] = mul nsw i32 1, [[DIV22]] +// IR-GPU-NEXT: [[CONV24:%.*]] = sext i32 [[MUL23]] to i64 +// IR-GPU-NEXT: [[DIV25:%.*]] = sdiv i64 [[TMP21]], [[CONV24]] +// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP23]], 0 +// IR-GPU-NEXT: [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1 +// IR-GPU-NEXT: [[MUL28:%.*]] = mul nsw i32 1, [[DIV27]] +// IR-GPU-NEXT: [[CONV29:%.*]] = sext i32 [[MUL28]] to i64 +// IR-GPU-NEXT: [[MUL30:%.*]] = mul nsw i64 [[DIV25]], [[CONV29]] +// IR-GPU-NEXT: [[SUB31:%.*]] = sub nsw i64 [[TMP20]], [[MUL30]] +// IR-GPU-NEXT: [[MUL32:%.*]] = mul nsw i64 [[SUB31]], 1 +// IR-GPU-NEXT: [[ADD33:%.*]] = add nsw i64 0, [[MUL32]] +// IR-GPU-NEXT: [[CONV34:%.*]] = trunc i64 [[ADD33]] to i32 +// IR-GPU-NEXT: store i32 [[CONV34]], ptr [[J12_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[I11_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL35:%.*]] = mul nsw i32 [[TMP25]], [[TMP26]] +// IR-GPU-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD36:%.*]] = add nsw i32 [[MUL35]], [[TMP27]] +// IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM37:%.*]] = sext i32 [[TMP28]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM37]] +// IR-GPU-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX38]], align 4 +// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-GPU: omp.body.continue: +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP29]], [[TMP30]] +// IR-GPU-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP32]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55 +// IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR4:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[NT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[NT_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_CASTED]] to ptr +// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[NT]], ptr [[NT_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_kernel_environment to ptr), ptr [[DYN_PTR]]) +// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 +// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// IR-GPU: user_code.entry: +// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP8]], ptr [[NT_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[NT_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2]] +// IR-GPU-NEXT: call void @__kmpc_target_deinit() +// IR-GPU-NEXT: ret void +// IR-GPU: worker.exit: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I5:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[NT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[I5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I5]] to ptr +// IR-GPU-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NEXT: [[NT_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_CASTED]] to ptr +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[NT]], ptr [[NT_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-GPU-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-GPU: cond.true: +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END:%.*]] +// IR-GPU: cond.false: +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END]] +// IR-GPU: cond.end: +// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// IR-GPU-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// IR-GPU-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP23]], ptr [[NT_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP24:%.*]] = load i64, ptr [[NT_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP25:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP18]] to ptr +// IR-GPU-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// IR-GPU-NEXT: [[TMP27:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// IR-GPU-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP20]] to ptr +// IR-GPU-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// IR-GPU-NEXT: [[TMP29:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// IR-GPU-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP22]] to ptr +// IR-GPU-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 +// IR-GPU-NEXT: [[TMP31:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 +// IR-GPU-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP24]] to ptr +// IR-GPU-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 8 +// IR-GPU-NEXT: [[TMP33:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4 +// IR-GPU-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP0]] to ptr +// IR-GPU-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8 +// IR-GPU-NEXT: [[TMP35:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5 +// IR-GPU-NEXT: store ptr [[TMP1]], ptr [[TMP35]], align 8 +// IR-GPU-NEXT: [[TMP36:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6 +// IR-GPU-NEXT: [[TMP37:%.*]] = inttoptr i64 [[TMP2]] to ptr +// IR-GPU-NEXT: store ptr [[TMP37]], ptr [[TMP36]], align 8 +// IR-GPU-NEXT: [[TMP38:%.*]] = getelementptr inbounds [8 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 7 +// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP38]], align 8 +// IR-GPU-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP40]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 8) +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// IR-GPU-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// IR-GPU-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] +// IR-GPU-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP47]], [[TMP48]] +// IR-GPU-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// IR-GPU: cond.true12: +// IR-GPU-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END14:%.*]] +// IR-GPU: cond.false13: +// IR-GPU-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END14]] +// IR-GPU: cond.end14: +// IR-GPU-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP49]], [[COND_TRUE12]] ], [ [[TMP50]], [[COND_FALSE13]] ] +// IR-GPU-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP51]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP52:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP52]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP53]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55_omp_outlined_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I6:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr +// IR-GPU-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NEXT: [[NT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NT_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[I6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I6]] to ptr +// IR-GPU-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[NT]], ptr [[NT_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-GPU-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU: omp.precond.then: +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 +// IR-GPU-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 +// IR-GPU-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP14]] +// IR-GPU-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-GPU-NEXT: store i32 [[ADD]], ptr [[I6_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// IR-GPU-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +// IR-GPU: if.then: +// IR-GPU-NEXT: [[CALL:%.*]] = call noundef i32 @_Z17omp_get_num_teamsv() #[[ATTR6:[0-9]+]] +// IR-GPU-NEXT: store i32 [[CALL]], ptr [[NT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[IF_END]] +// IR-GPU: if.end: +// IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[FOR_COND:%.*]] +// IR-GPU: for.cond: +// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// IR-GPU-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR-GPU: for.body: +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-GPU-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[NT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL10]], [[TMP22]] +// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP23]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM12]] +// IR-GPU-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX13]], align 4 +// IR-GPU-NEXT: br label [[FOR_INC:%.*]] +// IR-GPU: for.inc: +// IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP24]], 1 +// IR-GPU-NEXT: store i32 [[INC]], ptr [[J_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// IR-GPU: for.end: +// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-GPU: omp.body.continue: +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// IR-GPU-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP28]]) +// IR-GPU-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU: omp.precond.end: +// IR-GPU-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@main +// IR-SAME: () #[[ATTR0:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// IR-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: [[N_CASTED2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT:%.*]] = alloca i32, align 4 +// IR-NEXT: [[N_CASTED3:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// IR-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0() +// IR-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// IR-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 +// IR-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// IR-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16 +// IR-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]] +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[N_CASTED2]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[N_CASTED2]], align 8 +// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46(i64 [[TMP8]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]] +// IR-NEXT: store i32 0, ptr [[NT]], align 4 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr @N, align 4 +// IR-NEXT: store i32 [[TMP9]], ptr [[N_CASTED3]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[N_CASTED3]], align 8 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[NT]], align 4 +// IR-NEXT: store i32 [[TMP11]], ptr [[NT_CASTED]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i64, ptr [[NT_CASTED]], align 8 +// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]] +// IR-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// IR-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP13]]) +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 +// IR-NEXT: ret i32 [[TMP14]] +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 +// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J5:%.*]] = alloca i32, align 4 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J6:%.*]] = alloca i32, align 4 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// IR-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 +// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// IR-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// IR-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[J6]], align 4 +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[J6]], align 4 +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[J6]], align 4 +// IR-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP22]] to i64 +// IR-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]] +// IR-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX10]], align 4 +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP23]], 1 +// IR-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46 +// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: land.lhs.true: +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR: omp.precond.then: +// IR-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]] +// IR-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]] +// IR-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// IR-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]] +// IR-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: land.lhs.true: +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR: omp.precond.then: +// IR-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8 +// IR-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8 +// IR-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8 +// IR-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]] +// IR-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// IR-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// IR-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// IR-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]] +// IR-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0 +// IR-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// IR-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// IR-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// IR-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]] +// IR-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// IR-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// IR-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// IR-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0 +// IR-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// IR-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// IR-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// IR-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]] +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0 +// IR-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// IR-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// IR-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// IR-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// IR-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]] +// IR-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// IR-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// IR-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// IR-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4 +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]] +// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 +// IR-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[TMP31]] +// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4 +// IR-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64 +// IR-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]] +// IR-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4 +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1 +// IR-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP35]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55 +// IR-SAME: (i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 32, i32 0) +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[NT_CASTED]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[NT_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined, i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP3]], ptr [[TMP4]]) +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I5:%.*]] = alloca i32, align 4 +// IR-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP23]], ptr [[NT_CASTED]], align 4 +// IR-NEXT: [[TMP24:%.*]] = load i64, ptr [[NT_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 8, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I6:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// IR-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 +// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// IR-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// IR-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[I6]], align 4 +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0 +// IR-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +// IR: if.then: +// IR-NEXT: [[CALL:%.*]] = call noundef i32 @_Z17omp_get_num_teamsv() +// IR-NEXT: store i32 [[CALL]], ptr [[NT_ADDR]], align 4 +// IR-NEXT: br label [[IF_END]] +// IR: if.end: +// IR-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP21]], [[TMP22]] +// IR-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR: for.body: +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP24]], [[TMP25]] +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL10]], [[TMP26]] +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// IR-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM12]] +// IR-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX13]], align 4 +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP28]], 1 +// IR-NEXT: store i32 [[INC]], ptr [[J]], align 4 +// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// IR: for.end: +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 +// IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP31]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@main +// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[N_CASTED2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[N_CASTED3:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// IR-PCH-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0() +// IR-PCH-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// IR-PCH-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 +// IR-PCH-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// IR-PCH-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16 +// IR-PCH-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]] +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[N_CASTED2]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[N_CASTED2]], align 8 +// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46(i64 [[TMP8]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]] +// IR-PCH-NEXT: store i32 0, ptr [[NT]], align 4 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NEXT: store i32 [[TMP9]], ptr [[N_CASTED3]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[N_CASTED3]], align 8 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[NT]], align 4 +// IR-PCH-NEXT: store i32 [[TMP11]], ptr [[NT_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP12:%.*]] = load i64, ptr [[NT_CASTED]], align 8 +// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3]] +// IR-PCH-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-PCH-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// IR-PCH-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP13]]) +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 +// IR-PCH-NEXT: ret i32 [[TMP14]] +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 +// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-PCH-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-PCH-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J6:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 +// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// IR-PCH-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// IR-PCH-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[J6]], align 4 +// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[J6]], align 4 +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[J6]], align 4 +// IR-PCH-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP22]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]] +// IR-PCH-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX10]], align 4 +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP23]], 1 +// IR-PCH-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46 +// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-PCH-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-PCH-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-PCH-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: land.lhs.true: +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-PCH-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-PCH-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]] +// IR-PCH-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-PCH-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]] +// IR-PCH-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-PCH-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]] +// IR-PCH-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46.omp_outlined.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-PCH-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-PCH-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-PCH-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: land.lhs.true: +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-PCH-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8 +// IR-PCH-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-PCH-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]] +// IR-PCH-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// IR-PCH-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]] +// IR-PCH-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0 +// IR-PCH-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// IR-PCH-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// IR-PCH-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// IR-PCH-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]] +// IR-PCH-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// IR-PCH-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// IR-PCH-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// IR-PCH-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0 +// IR-PCH-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// IR-PCH-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// IR-PCH-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// IR-PCH-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]] +// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0 +// IR-PCH-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// IR-PCH-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// IR-PCH-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// IR-PCH-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// IR-PCH-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]] +// IR-PCH-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// IR-PCH-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// IR-PCH-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// IR-PCH-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4 +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]] +// IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 +// IR-PCH-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[TMP31]] +// IR-PCH-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4 +// IR-PCH-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]] +// IR-PCH-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4 +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1 +// IR-PCH-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP35]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55 +// IR-PCH-SAME: (i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 32, i32 0) +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[NT_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[NT_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined, i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP3]], ptr [[TMP4]]) +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I5:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// IR-PCH-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// IR-PCH-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP23]], ptr [[NT_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP24:%.*]] = load i64, ptr [[NT_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 8, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l55.omp_outlined.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[NT:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[NT_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I6:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[NT]], ptr [[NT_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// IR-PCH-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH: omp.precond.then: +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 +// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[CONV5]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// IR-PCH-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// IR-PCH-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I6]], align 4 +// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-PCH-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0 +// IR-PCH-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +// IR-PCH: if.then: +// IR-PCH-NEXT: [[CALL:%.*]] = call noundef i32 @_Z17omp_get_num_teamsv() +// IR-PCH-NEXT: store i32 [[CALL]], ptr [[NT_ADDR]], align 4 +// IR-PCH-NEXT: br label [[IF_END]] +// IR-PCH: if.end: +// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NEXT: br label [[FOR_COND:%.*]] +// IR-PCH: for.cond: +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP21]], [[TMP22]] +// IR-PCH-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR-PCH: for.body: +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP24]], [[TMP25]] +// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[NT_ADDR]], align 4 +// IR-PCH-NEXT: [[ADD11:%.*]] = add nsw i32 [[MUL10]], [[TMP26]] +// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM12]] +// IR-PCH-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX13]], align 4 +// IR-PCH-NEXT: br label [[FOR_INC:%.*]] +// IR-PCH: for.inc: +// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[INC:%.*]] = add nsw i32 [[TMP28]], 1 +// IR-PCH-NEXT: store i32 [[INC]], ptr [[J]], align 4 +// IR-PCH-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// IR-PCH: for.end: +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 +// IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP31]]) +// IR-PCH-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH: omp.precond.end: +// IR-PCH-NEXT: ret void +// +// +// IR-GPU-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64 +// IR-GPU-NESTED-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// IR-GPU-NESTED-NEXT: entry: +// IR-GPU-NESTED-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NESTED-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment to ptr), ptr [[DYN_PTR]]) +// IR-GPU-NESTED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 +// IR-GPU-NESTED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// IR-GPU-NESTED: user_code.entry: +// IR-GPU-NESTED-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// IR-GPU-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]] +// IR-GPU-NESTED-NEXT: call void @__kmpc_target_deinit() +// IR-GPU-NESTED-NEXT: ret void +// IR-GPU-NESTED: worker.exit: +// IR-GPU-NESTED-NEXT: ret void +// +// +// IR-GPU-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined +// IR-GPU-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-GPU-NESTED-NEXT: entry: +// IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NESTED-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NESTED-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NESTED-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr +// IR-GPU-NESTED-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NESTED-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NESTED-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr +// IR-GPU-NESTED-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr +// IR-GPU-NESTED-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// IR-GPU-NESTED-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-GPU-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-GPU-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-GPU-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-GPU-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-GPU-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-GPU-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-GPU-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU-NESTED: land.lhs.true: +// IR-GPU-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-GPU-NESTED: omp.precond.then: +// IR-GPU-NESTED-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// IR-GPU-NESTED-NEXT: [[CONV13:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// IR-GPU-NESTED-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-GPU-NESTED-NEXT: call void @__kmpc_distribute_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 [[CONV13]]) +// IR-GPU-NESTED-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[CMP14:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP14]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-GPU-NESTED: cond.true: +// IR-GPU-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[COND_END:%.*]] +// IR-GPU-NESTED: cond.false: +// IR-GPU-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[COND_END]] +// IR-GPU-NESTED: cond.end: +// IR-GPU-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-GPU-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU-NESTED: omp.inner.for.cond: +// IR-GPU-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP19]], 1 +// IR-GPU-NESTED-NEXT: [[CMP15:%.*]] = icmp slt i64 [[TMP18]], [[ADD]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU-NESTED: omp.inner.for.body: +// IR-GPU-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP22]], ptr [[N_CASTED_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// IR-GPU-NESTED-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP20]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// IR-GPU-NESTED-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP21]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// IR-GPU-NESTED-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP23]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 +// IR-GPU-NESTED-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP0]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4 +// IR-GPU-NESTED-NEXT: store ptr [[TMP1]], ptr [[TMP32]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5 +// IR-GPU-NESTED-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP2]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6 +// IR-GPU-NESTED-NEXT: store ptr [[TMP3]], ptr [[TMP35]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// IR-GPU-NESTED-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7) +// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU-NESTED: omp.inner.for.inc: +// IR-GPU-NESTED-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] +// IR-GPU-NESTED-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP40]], [[TMP41]] +// IR-GPU-NESTED-NEXT: store i64 [[ADD17]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP42]], [[TMP43]] +// IR-GPU-NESTED-NEXT: store i64 [[ADD18]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[TMP44]], [[TMP45]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]] +// IR-GPU-NESTED: cond.true20: +// IR-GPU-NESTED-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[COND_END22:%.*]] +// IR-GPU-NESTED: cond.false21: +// IR-GPU-NESTED-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[COND_END22]] +// IR-GPU-NESTED: cond.end22: +// IR-GPU-NESTED-NEXT: [[COND23:%.*]] = phi i64 [ [[TMP46]], [[COND_TRUE20]] ], [ [[TMP47]], [[COND_FALSE21]] ] +// IR-GPU-NESTED-NEXT: store i64 [[COND23]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU-NESTED: omp.inner.for.end: +// IR-GPU-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU-NESTED: omp.loop.exit: +// IR-GPU-NESTED-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +// IR-GPU-NESTED-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP50]]) +// IR-GPU-NESTED-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU-NESTED: omp.precond.end: +// IR-GPU-NESTED-NEXT: ret void +// +// +// IR-GPU-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined_omp_outlined +// IR-GPU-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +// IR-GPU-NESTED-NEXT: entry: +// IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr +// IR-GPU-NESTED-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NESTED-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NESTED-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_4]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTCAPTURE_EXPR_5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_5]] to ptr +// IR-GPU-NESTED-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NESTED-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NESTED-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NESTED-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr +// IR-GPU-NESTED-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr +// IR-GPU-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-GPU-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-GPU-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-GPU-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-GPU-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-GPU-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-GPU-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-GPU-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-GPU-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-GPU-NESTED: land.lhs.true: +// IR-GPU-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-GPU-NESTED: omp.precond.then: +// IR-GPU-NESTED-NEXT: store i64 0, ptr [[DOTOMP_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// IR-GPU-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 1) +// IR-GPU-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU-NESTED: omp.inner.for.cond: +// IR-GPU-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// IR-GPU-NESTED-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU-NESTED: omp.inner.for.body: +// IR-GPU-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP19]], 0 +// IR-GPU-NESTED-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 +// IR-GPU-NESTED-NEXT: [[MUL16:%.*]] = mul nsw i32 1, [[DIV15]] +// IR-GPU-NESTED-NEXT: [[CONV17:%.*]] = sext i32 [[MUL16]] to i64 +// IR-GPU-NESTED-NEXT: [[DIV18:%.*]] = sdiv i64 [[TMP18]], [[CONV17]] +// IR-GPU-NESTED-NEXT: [[MUL19:%.*]] = mul nsw i64 [[DIV18]], 1 +// IR-GPU-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL19]] +// IR-GPU-NESTED-NEXT: [[CONV20:%.*]] = trunc i64 [[ADD]] to i32 +// IR-GPU-NESTED-NEXT: store i32 [[CONV20]], ptr [[I11_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP22]], 0 +// IR-GPU-NESTED-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 +// IR-GPU-NESTED-NEXT: [[MUL23:%.*]] = mul nsw i32 1, [[DIV22]] +// IR-GPU-NESTED-NEXT: [[CONV24:%.*]] = sext i32 [[MUL23]] to i64 +// IR-GPU-NESTED-NEXT: [[DIV25:%.*]] = sdiv i64 [[TMP21]], [[CONV24]] +// IR-GPU-NESTED-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP23]], 0 +// IR-GPU-NESTED-NEXT: [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1 +// IR-GPU-NESTED-NEXT: [[MUL28:%.*]] = mul nsw i32 1, [[DIV27]] +// IR-GPU-NESTED-NEXT: [[CONV29:%.*]] = sext i32 [[MUL28]] to i64 +// IR-GPU-NESTED-NEXT: [[MUL30:%.*]] = mul nsw i64 [[DIV25]], [[CONV29]] +// IR-GPU-NESTED-NEXT: [[SUB31:%.*]] = sub nsw i64 [[TMP20]], [[MUL30]] +// IR-GPU-NESTED-NEXT: [[MUL32:%.*]] = mul nsw i64 [[SUB31]], 1 +// IR-GPU-NESTED-NEXT: [[ADD33:%.*]] = add nsw i64 0, [[MUL32]] +// IR-GPU-NESTED-NEXT: [[CONV34:%.*]] = trunc i64 [[ADD33]] to i32 +// IR-GPU-NESTED-NEXT: store i32 [[CONV34]], ptr [[J12_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP24:%.*]] = load i32, ptr [[I11_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// IR-GPU-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-GPU-NESTED-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-GPU-NESTED-NEXT: [[TMP26:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[MUL35:%.*]] = mul nsw i32 [[TMP25]], [[TMP26]] +// IR-GPU-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP27]]) #[[ATTR5:[0-9]+]] +// IR-GPU-NESTED-NEXT: [[ADD36:%.*]] = add nsw i32 [[MUL35]], [[CALL]] +// IR-GPU-NESTED-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11_ASCAST]], align 4 +// IR-GPU-NESTED-NEXT: [[IDXPROM37:%.*]] = sext i32 [[TMP28]] to i64 +// IR-GPU-NESTED-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM37]] +// IR-GPU-NESTED-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX38]], align 4 +// IR-GPU-NESTED-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-GPU-NESTED: omp.body.continue: +// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU-NESTED: omp.inner.for.inc: +// IR-GPU-NESTED-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP29]], [[TMP30]] +// IR-GPU-NESTED-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU-NESTED: omp.inner.for.end: +// IR-GPU-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU-NESTED: omp.loop.exit: +// IR-GPU-NESTED-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NESTED-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// IR-GPU-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP32]]) +// IR-GPU-NESTED-NEXT: br label [[OMP_PRECOND_END]] +// IR-GPU-NESTED: omp.precond.end: +// IR-GPU-NESTED-NEXT: ret void +// +// +// IR-NESTED-LABEL: define {{[^@]+}}@main +// IR-NESTED-SAME: () #[[ATTR0:[0-9]+]] { +// IR-NESTED-NEXT: entry: +// IR-NESTED-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NESTED-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4 +// IR-NESTED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// IR-NESTED-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0() +// IR-NESTED-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// IR-NESTED-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 +// IR-NESTED-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// IR-NESTED-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4 +// IR-NESTED-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// IR-NESTED-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16 +// IR-NESTED-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8 +// IR-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4 +// IR-NESTED-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-NESTED-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NESTED-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]] +// IR-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-NESTED-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// IR-NESTED-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP7]]) +// IR-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 +// IR-NESTED-NEXT: ret i32 [[TMP8]] +// +// +// IR-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64 +// IR-NESTED-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-NESTED-NEXT: entry: +// IR-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-NESTED-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NESTED-NEXT: ret void +// +// +// IR-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined +// IR-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NESTED-NEXT: entry: +// IR-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NESTED-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-NESTED: land.lhs.true: +// IR-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-NESTED: omp.precond.then: +// IR-NESTED-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NESTED-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-NESTED-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NESTED-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]] +// IR-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-NESTED: cond.true: +// IR-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: br label [[COND_END:%.*]] +// IR-NESTED: cond.false: +// IR-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NESTED-NEXT: br label [[COND_END]] +// IR-NESTED: cond.end: +// IR-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-NESTED-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-NESTED: omp.inner.for.cond: +// IR-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]] +// IR-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-NESTED: omp.inner.for.body: +// IR-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4 +// IR-NESTED-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-NESTED: omp.inner.for.inc: +// IR-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// IR-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]] +// IR-NESTED-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-NESTED: omp.inner.for.end: +// IR-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-NESTED: omp.loop.exit: +// IR-NESTED-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// IR-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// IR-NESTED-NEXT: br label [[OMP_PRECOND_END]] +// IR-NESTED: omp.precond.end: +// IR-NESTED-NEXT: ret void +// +// +// IR-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined +// IR-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-NESTED-NEXT: entry: +// IR-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: store i32 0, ptr [[I]], align 4 +// IR-NESTED-NEXT: store i32 0, ptr [[J]], align 4 +// IR-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-NESTED: land.lhs.true: +// IR-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-NESTED: omp.precond.then: +// IR-NESTED-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// IR-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8 +// IR-NESTED-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NESTED-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8 +// IR-NESTED-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8 +// IR-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NESTED-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// IR-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]] +// IR-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-NESTED: cond.true: +// IR-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-NESTED-NEXT: br label [[COND_END:%.*]] +// IR-NESTED: cond.false: +// IR-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-NESTED-NEXT: br label [[COND_END]] +// IR-NESTED: cond.end: +// IR-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// IR-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// IR-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// IR-NESTED-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-NESTED: omp.inner.for.cond: +// IR-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]] +// IR-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-NESTED: omp.inner.for.body: +// IR-NESTED-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0 +// IR-NESTED-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// IR-NESTED-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// IR-NESTED-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// IR-NESTED-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]] +// IR-NESTED-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// IR-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// IR-NESTED-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// IR-NESTED-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// IR-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0 +// IR-NESTED-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// IR-NESTED-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// IR-NESTED-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// IR-NESTED-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]] +// IR-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-NESTED-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0 +// IR-NESTED-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// IR-NESTED-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// IR-NESTED-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// IR-NESTED-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// IR-NESTED-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]] +// IR-NESTED-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// IR-NESTED-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// IR-NESTED-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// IR-NESTED-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// IR-NESTED-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4 +// IR-NESTED-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// IR-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-NESTED-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-NESTED-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-NESTED-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]] +// IR-NESTED-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 +// IR-NESTED-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP31]]) +// IR-NESTED-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[CALL]] +// IR-NESTED-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4 +// IR-NESTED-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64 +// IR-NESTED-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]] +// IR-NESTED-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4 +// IR-NESTED-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-NESTED: omp.body.continue: +// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-NESTED: omp.inner.for.inc: +// IR-NESTED-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1 +// IR-NESTED-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8 +// IR-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-NESTED: omp.inner.for.end: +// IR-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-NESTED: omp.loop.exit: +// IR-NESTED-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NESTED-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// IR-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP35]]) +// IR-NESTED-NEXT: br label [[OMP_PRECOND_END]] +// IR-NESTED: omp.precond.end: +// IR-NESTED-NEXT: ret void +// +// +// IR-PCH-NESTED-LABEL: define {{[^@]+}}@main +// IR-PCH-NESTED-SAME: () #[[ATTR0:[0-9]+]] { +// IR-PCH-NESTED-NEXT: entry: +// IR-PCH-NESTED-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0() +// IR-PCH-NESTED-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// IR-PCH-NESTED-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 +// IR-PCH-NESTED-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// IR-PCH-NESTED-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP4]], align 16 +// IR-PCH-NESTED-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR1]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr @N, align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NESTED-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i64 [[TMP6]], i64 [[TMP1]], ptr [[VLA]], i64 [[TMP4]], ptr [[VLA1]]) #[[ATTR3:[0-9]+]] +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// IR-PCH-NESTED-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP7]]) +// IR-PCH-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 +// IR-PCH-NESTED-NEXT: ret i32 [[TMP8]] +// +// +// IR-PCH-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64 +// IR-PCH-NESTED-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-PCH-NESTED-NEXT: entry: +// IR-PCH-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined, i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NESTED-NEXT: ret void +// +// +// IR-PCH-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined +// IR-PCH-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NESTED-NEXT: entry: +// IR-PCH-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-PCH-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-PCH-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-PCH-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-PCH-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-PCH-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-PCH-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-PCH-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH-NESTED: land.lhs.true: +// IR-PCH-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-PCH-NESTED: omp.precond.then: +// IR-PCH-NESTED-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-PCH-NESTED-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP13]], [[TMP14]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH-NESTED: cond.true: +// IR-PCH-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: br label [[COND_END:%.*]] +// IR-PCH-NESTED: cond.false: +// IR-PCH-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NESTED-NEXT: br label [[COND_END]] +// IR-PCH-NESTED: cond.end: +// IR-PCH-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// IR-PCH-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH-NESTED: omp.inner.for.cond: +// IR-PCH-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP18]], [[TMP19]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH-NESTED: omp.inner.for.body: +// IR-PCH-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP22]], ptr [[N_CASTED]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP23:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// IR-PCH-NESTED-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined, i64 [[TMP20]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH-NESTED: omp.inner.for.inc: +// IR-PCH-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// IR-PCH-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP24]], [[TMP25]] +// IR-PCH-NESTED-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH-NESTED: omp.inner.for.end: +// IR-PCH-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH-NESTED: omp.loop.exit: +// IR-PCH-NESTED-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// IR-PCH-NESTED-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH-NESTED: omp.precond.end: +// IR-PCH-NESTED-NEXT: ret void +// +// +// IR-PCH-NESTED-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.omp_outlined.omp_outlined +// IR-PCH-NESTED-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +// IR-PCH-NESTED-NEXT: entry: +// IR-PCH-NESTED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// IR-PCH-NESTED-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[I11:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: [[J12:%.*]] = alloca i32, align 4 +// IR-PCH-NESTED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NESTED-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// IR-PCH-NESTED-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// IR-PCH-NESTED-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// IR-PCH-NESTED-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP7]], 0 +// IR-PCH-NESTED-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// IR-PCH-NESTED-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// IR-PCH-NESTED-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// IR-PCH-NESTED-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// IR-PCH-NESTED-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[I]], align 4 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[J]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-PCH-NESTED-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR-PCH-NESTED: land.lhs.true: +// IR-PCH-NESTED-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP9]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// IR-PCH-NESTED: omp.precond.then: +// IR-PCH-NESTED-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_LB]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NESTED-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// IR-PCH-NESTED-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// IR-PCH-NESTED-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH-NESTED: cond.true: +// IR-PCH-NESTED-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// IR-PCH-NESTED-NEXT: br label [[COND_END:%.*]] +// IR-PCH-NESTED: cond.false: +// IR-PCH-NESTED-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NESTED-NEXT: br label [[COND_END]] +// IR-PCH-NESTED: cond.end: +// IR-PCH-NESTED-NEXT: [[COND:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// IR-PCH-NESTED-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// IR-PCH-NESTED-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH-NESTED: omp.inner.for.cond: +// IR-PCH-NESTED-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// IR-PCH-NESTED-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP20]], [[TMP21]] +// IR-PCH-NESTED-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH-NESTED: omp.inner.for.body: +// IR-PCH-NESTED-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP23]], 0 +// IR-PCH-NESTED-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// IR-PCH-NESTED-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// IR-PCH-NESTED-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// IR-PCH-NESTED-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP22]], [[CONV18]] +// IR-PCH-NESTED-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// IR-PCH-NESTED-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// IR-PCH-NESTED-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// IR-PCH-NESTED-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP26]], 0 +// IR-PCH-NESTED-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// IR-PCH-NESTED-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// IR-PCH-NESTED-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// IR-PCH-NESTED-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP25]], [[CONV25]] +// IR-PCH-NESTED-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// IR-PCH-NESTED-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP27]], 0 +// IR-PCH-NESTED-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// IR-PCH-NESTED-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// IR-PCH-NESTED-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// IR-PCH-NESTED-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// IR-PCH-NESTED-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP24]], [[MUL31]] +// IR-PCH-NESTED-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// IR-PCH-NESTED-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// IR-PCH-NESTED-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// IR-PCH-NESTED-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP28:%.*]] = load i32, ptr [[I11]], align 4 +// IR-PCH-NESTED-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// IR-PCH-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// IR-PCH-NESTED-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// IR-PCH-NESTED-NEXT: [[TMP30:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// IR-PCH-NESTED-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP29]], [[TMP30]] +// IR-PCH-NESTED-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 +// IR-PCH-NESTED-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooi(i32 noundef [[TMP31]]) +// IR-PCH-NESTED-NEXT: [[ADD37:%.*]] = add nsw i32 [[MUL36]], [[CALL]] +// IR-PCH-NESTED-NEXT: [[TMP32:%.*]] = load i32, ptr [[I11]], align 4 +// IR-PCH-NESTED-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP32]] to i64 +// IR-PCH-NESTED-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM38]] +// IR-PCH-NESTED-NEXT: store i32 [[ADD37]], ptr [[ARRAYIDX39]], align 4 +// IR-PCH-NESTED-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH-NESTED: omp.body.continue: +// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH-NESTED: omp.inner.for.inc: +// IR-PCH-NESTED-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP33]], 1 +// IR-PCH-NESTED-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8 +// IR-PCH-NESTED-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH-NESTED: omp.inner.for.end: +// IR-PCH-NESTED-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH-NESTED: omp.loop.exit: +// IR-PCH-NESTED-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NESTED-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// IR-PCH-NESTED-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP35]]) +// IR-PCH-NESTED-NEXT: br label [[OMP_PRECOND_END]] +// IR-PCH-NESTED: omp.precond.end: +// IR-PCH-NESTED-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp index 1edcbfe2d7779..e1a6aad65b796 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp @@ -332,78 +332,8 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z9gtid_testv() @@ -411,14 +341,14 @@ int main() { // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // // @@ -586,78 +516,8 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn4v() @@ -665,14 +525,14 @@ int main() { // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // // @@ -695,7 +555,6 @@ int main() { // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 @@ -725,82 +584,8 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn5v() @@ -808,14 +593,14 @@ int main() { // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // // @@ -847,7 +632,6 @@ int main() { // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -878,30 +662,18 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_IF_END:%.*]] -// CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: br label [[OMP_IF_END]] -// CHECK1: omp_if.end: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn6v() +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -910,85 +682,19 @@ int main() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiEiT_ +// CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] comdat { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn6v() -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiEiT_ -// CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] comdat { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK1-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 // CHECK1-NEXT: store i32 3, ptr [[TMP0]], align 4 @@ -1026,44 +732,61 @@ int main() { // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64() #[[ATTR2]] // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0 -// CHECK1-NEXT: store i32 3, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1 -// CHECK1-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 // CHECK1-NEXT: store ptr null, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS2]]) -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]] -// CHECK1: omp_offload.failed3: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68() #[[ATTR2]] -// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT4]] -// CHECK1: omp_offload.cont4: +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK1-NEXT: store i32 3, ptr [[TMP24]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP35]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP36]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]]) +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK1: omp_offload.failed6: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP17]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK1: omp_offload.cont7: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68() #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP17]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret i32 0 @@ -1117,78 +840,8 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn1v() @@ -1196,14 +849,14 @@ int main() { // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // // @@ -1226,7 +879,6 @@ int main() { // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 @@ -1256,82 +908,8 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64.omp_outlined.omp_outlined(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn2v() @@ -1339,29 +917,38 @@ int main() { // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68 -// CHECK1-SAME: () #[[ATTR1]] { +// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined) +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined, i64 [[TMP1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -1371,6 +958,7 @@ int main() { // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1398,78 +986,8 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn3v() @@ -1477,13 +995,13 @@ int main() { // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp index 99416f76e409c..9b3d77f5b0adc 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp @@ -326,7 +326,7 @@ int main() { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -340,7 +340,7 @@ int main() { // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK1-NEXT: ret void // // @@ -403,149 +403,48 @@ int main() { // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done3: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK1: omp.inner.for.cond.cleanup: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // @@ -596,7 +495,7 @@ int main() { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -645,7 +544,7 @@ int main() { // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK1-NEXT: ret void // // @@ -711,149 +610,45 @@ int main() { // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done5: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK1: omp.inner.for.cond.cleanup: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done9: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done8: // CHECK1-NEXT: ret void // // @@ -1059,7 +854,7 @@ int main() { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -1073,7 +868,7 @@ int main() { // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK3-NEXT: ret void // // @@ -1136,138 +931,41 @@ int main() { // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 -// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done3: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK3: omp.inner.for.cond.cleanup: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1323,7 +1021,7 @@ int main() { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -1372,7 +1070,7 @@ int main() { // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK3-NEXT: ret void // // @@ -1438,138 +1136,38 @@ int main() { // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 -// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done5: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK3: omp.inner.for.cond.cleanup: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1760,7 +1358,7 @@ int main() { // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK5-SAME: () #[[ATTR4:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) // CHECK5-NEXT: ret void // // @@ -1781,6 +1379,7 @@ int main() { // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -1812,17 +1411,29 @@ int main() { // CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK5-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK5-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 +// CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK5-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1831,98 +1442,8 @@ int main() { // CHECK5-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK5: cond.true: -// CHECK5-NEXT: br label [[COND_END:%.*]] -// CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: br label [[COND_END]] -// CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK5-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 -// CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) -// CHECK5-NEXT: ret void -// -// -// CHECK5-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_teams_generic_loop_private_codegen.cpp -// CHECK5-SAME: () #[[ATTR0]] { +// CHECK5-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_teams_generic_loop_private_codegen.cpp +// CHECK5-SAME: () #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: call void @__cxx_global_var_init() // CHECK5-NEXT: call void @__cxx_global_var_init.1() @@ -1935,7 +1456,7 @@ int main() { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK13-NEXT: ret void // // @@ -1998,35 +1519,48 @@ int main() { // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK13-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK13-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK13-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 +// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] -// CHECK13-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done3: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done7: // CHECK13-NEXT: ret void // // @@ -2040,120 +1574,6 @@ int main() { // CHECK13-NEXT: ret void // // -// CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { -// CHECK13-NEXT: entry: -// CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK13: arrayctor.loop: -// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1 -// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK13: arrayctor.cont: -// CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK13: cond.true: -// CHECK13-NEXT: br label [[COND_END:%.*]] -// CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: br label [[COND_END]] -// CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK13: omp.inner.for.cond.cleanup: -// CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] -// CHECK13-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 -// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK13: omp.body.continue: -// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK13: omp.inner.for.end: -// CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 -// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done8: -// CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat { // CHECK13-NEXT: entry: @@ -2169,7 +1589,7 @@ int main() { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK13-NEXT: ret void // // @@ -2235,17 +1655,27 @@ int main() { // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2254,16 +1684,16 @@ int main() { // CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done5: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done8: // CHECK13-NEXT: ret void // // @@ -2277,120 +1707,6 @@ int main() { // CHECK13-NEXT: ret void // // -// CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { -// CHECK13-NEXT: entry: -// CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 -// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK13: arrayctor.loop: -// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 -// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK13: arrayctor.cont: -// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK13: cond.true: -// CHECK13-NEXT: br label [[COND_END:%.*]] -// CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: br label [[COND_END]] -// CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK13: omp.inner.for.cond.cleanup: -// CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) -// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK13: omp.body.continue: -// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK13: omp.inner.for.end: -// CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 -// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done9: -// CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat { // CHECK13-NEXT: entry: @@ -2449,7 +1765,7 @@ int main() { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK15-NEXT: ret void // // @@ -2512,148 +1828,41 @@ int main() { // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] -// CHECK15-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 -// CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done3: -// CHECK15-NEXT: ret void -// -// -// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev -// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] -// CHECK15-NEXT: ret void -// -// -// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK15: arrayctor.loop: -// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1 -// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK15: arrayctor.cont: -// CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK15: cond.true: -// CHECK15-NEXT: br label [[COND_END:%.*]] -// CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END]] -// CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: // CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK15: omp.inner.for.cond.cleanup: -// CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] // CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] // CHECK15-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -2662,6 +1871,16 @@ int main() { // CHECK15-NEXT: ret void // // +// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev +// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK15-NEXT: ret void +// +// // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK15-NEXT: entry: @@ -2677,7 +1896,7 @@ int main() { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK15-NEXT: ret void // // @@ -2743,148 +1962,38 @@ int main() { // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 -// CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done5: -// CHECK15-NEXT: ret void -// -// -// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: call void @_ZN1SIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] -// CHECK15-NEXT: ret void -// -// -// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 -// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK15: arrayctor.loop: -// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 -// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK15: arrayctor.cont: -// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK15: cond.true: -// CHECK15-NEXT: br label [[COND_END:%.*]] -// CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END]] -// CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: // CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK15: omp.inner.for.cond.cleanup: -// CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 // CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -2893,6 +2002,16 @@ int main() { // CHECK15-NEXT: ret void // // +// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK15-NEXT: ret void +// +// // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK15-NEXT: entry: @@ -2951,7 +2070,7 @@ int main() { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) // CHECK17-NEXT: ret void // // @@ -2972,6 +2091,7 @@ int main() { // CHECK17-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -3003,111 +2123,33 @@ int main() { // CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK17: omp.inner.for.end: -// CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: ret void -// -// -// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { -// CHECK17-NEXT: entry: -// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 -// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 -// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK17: cond.true: -// CHECK17-NEXT: br label [[COND_END:%.*]] -// CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: br label [[COND_END]] -// CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK17: omp.inner.for.cond: // CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK17-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK17-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK17-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 // CHECK17-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK17-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8 // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK17-NEXT: ret void // diff --git a/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp b/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp index d4a98f07fe24d..23c5c9db9c700 100644 --- a/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp +++ b/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp @@ -278,7 +278,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store [3 x i32] [[TMP28]], ptr [[TMP40]], align 4 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP41]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 [[TMP21]], i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 [[TMP21]], i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 // CHECK1-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -338,7 +338,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP69]], align 4 // CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP70]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.region_id, ptr [[KERNEL_ARGS15]]) +// CHECK1-NEXT: [[TMP71:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.region_id, ptr [[KERNEL_ARGS15]]) // CHECK1-NEXT: [[TMP72:%.*]] = icmp ne i32 [[TMP71]], 0 // CHECK1-NEXT: br i1 [[TMP72]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]] // CHECK1: omp_offload.failed16: @@ -356,7 +356,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK1-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -364,8 +364,8 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined, ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined, ptr [[N_ADDR]], ptr [[TMP1]]) // CHECK1-NEXT: ret void // // @@ -434,126 +434,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -567,7 +469,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined, ptr [[N_ADDR]], ptr [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -636,126 +538,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -865,7 +669,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store [3 x i32] [[TMP28]], ptr [[TMP40]], align 4 // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 [[TMP21]], i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 [[TMP21]], i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 // CHECK3-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -925,7 +729,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP69]], align 4 // CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP70]], align 4 -// CHECK3-NEXT: [[TMP71:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.region_id, ptr [[KERNEL_ARGS15]]) +// CHECK3-NEXT: [[TMP71:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.region_id, ptr [[KERNEL_ARGS15]]) // CHECK3-NEXT: [[TMP72:%.*]] = icmp ne i32 [[TMP71]], 0 // CHECK3-NEXT: br i1 [[TMP72]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]] // CHECK3: omp_offload.failed16: @@ -943,7 +747,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK3-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -951,8 +755,8 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 -// CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined, ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined, ptr [[N_ADDR]], ptr [[TMP1]]) // CHECK3-NEXT: ret void // // @@ -1021,121 +825,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP16]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1149,7 +859,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined, ptr [[N_ADDR]], ptr [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1218,121 +928,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP16]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1424,7 +1040,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK9-NEXT: store i32 0, ptr [[TMP35]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.region_id, ptr [[KERNEL_ARGS]]) // CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 // CHECK9-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: @@ -1449,7 +1065,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) // CHECK9-NEXT: ret void // // @@ -1521,129 +1137,28 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) -// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK9: omp.inner.for.end: -// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: br label [[OMP_PRECOND_END]] -// CHECK9: omp.precond.end: -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK9: omp.precond.then: -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: br label [[COND_END:%.*]] -// CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: br label [[COND_END]] -// CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1735,7 +1250,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 // CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK11-NEXT: store i32 0, ptr [[TMP35]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.region_id, ptr [[KERNEL_ARGS]]) // CHECK11-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 // CHECK11-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: @@ -1760,7 +1275,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) // CHECK11-NEXT: ret void // // @@ -1832,124 +1347,27 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: br label [[OMP_PRECOND_END]] -// CHECK11: omp.precond.end: -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK11: omp.precond.then: -// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: br label [[COND_END:%.*]] -// CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END]] -// CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: // CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2009,7 +1427,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 // CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK17-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.region_id, ptr [[KERNEL_ARGS]]) +// CHECK17-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.region_id, ptr [[KERNEL_ARGS]]) // CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK17-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK17: omp_offload.failed: @@ -2028,7 +1446,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined, ptr [[TMP0]]) +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined, ptr [[TMP0]]) // CHECK17-NEXT: ret void // // @@ -2041,135 +1459,62 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 -// CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK17: cond.true: -// CHECK17-NEXT: br label [[COND_END:%.*]] -// CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: br label [[COND_END]] -// CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) -// CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK17: omp.inner.for.end: -// CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: ret void -// -// -// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { -// CHECK17-NEXT: entry: -// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK17-NEXT: ret void // // @@ -2227,7 +1572,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 // CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK19-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.region_id, ptr [[KERNEL_ARGS]]) +// CHECK19-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.region_id, ptr [[KERNEL_ARGS]]) // CHECK19-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK19-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK19: omp_offload.failed: @@ -2246,7 +1591,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined, ptr [[TMP0]]) +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined, ptr [[TMP0]]) // CHECK19-NEXT: ret void // // @@ -2294,95 +1639,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) -// CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK19: omp.inner.for.end: -// CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: ret void -// -// -// CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { -// CHECK19-NEXT: entry: -// CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 -// CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK19: cond.true: -// CHECK19-NEXT: br label [[COND_END:%.*]] -// CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: br label [[COND_END]] -// CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK19: omp.inner.for.cond: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK19-NEXT: ret void // // @@ -2478,7 +1754,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 // CHECK25-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK25-NEXT: store i32 0, ptr [[TMP35]], align 4 -// CHECK25-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.region_id, ptr [[KERNEL_ARGS]]) +// CHECK25-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.region_id, ptr [[KERNEL_ARGS]]) // CHECK25-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 // CHECK25-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK25: omp_offload.failed: @@ -2505,7 +1781,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) // CHECK25-NEXT: ret void // // @@ -2577,129 +1853,28 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) -// CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK25: omp.inner.for.end: -// CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK25-NEXT: br label [[OMP_PRECOND_END]] -// CHECK25: omp.precond.end: -// CHECK25-NEXT: ret void -// -// -// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { -// CHECK25-NEXT: entry: -// CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK25: omp.precond.then: -// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK25-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK25-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK25: cond.true: -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: br label [[COND_END:%.*]] -// CHECK25: cond.false: -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: br label [[COND_END]] -// CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK25: omp.inner.for.cond: // CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK25-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK25-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK25-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK25-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK25-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK25-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) // CHECK25-NEXT: br label [[OMP_PRECOND_END]] // CHECK25: omp.precond.end: // CHECK25-NEXT: ret void @@ -2778,7 +1953,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: store [3 x i32] [[TMP18]], ptr [[TMP30]], align 4 // CHECK25-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK25-NEXT: store i32 0, ptr [[TMP31]], align 4 -// CHECK25-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 [[TMP15]], i32 [[TMP16]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.region_id, ptr [[KERNEL_ARGS]]) +// CHECK25-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 [[TMP15]], i32 [[TMP16]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.region_id, ptr [[KERNEL_ARGS]]) // CHECK25-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 // CHECK25-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK25: omp_offload.failed: @@ -2794,15 +1969,15 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK25-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK25-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 // CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 -// CHECK25-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined, ptr [[TMP1]]) +// CHECK25-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined, ptr [[TMP1]]) // CHECK25-NEXT: ret void // // @@ -2850,99 +2025,26 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) -// CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK25: omp.inner.for.end: -// CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK25-NEXT: ret void -// -// -// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { -// CHECK25-NEXT: entry: -// CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 -// CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK25: cond.true: -// CHECK25-NEXT: br label [[COND_END:%.*]] -// CHECK25: cond.false: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: br label [[COND_END]] -// CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK25: omp.inner.for.cond: // CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK25-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK25-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK25-NEXT: ret void // // @@ -3038,7 +2140,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 // CHECK27-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK27-NEXT: store i32 0, ptr [[TMP35]], align 4 -// CHECK27-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.region_id, ptr [[KERNEL_ARGS]]) +// CHECK27-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.region_id, ptr [[KERNEL_ARGS]]) // CHECK27-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 // CHECK27-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK27: omp_offload.failed: @@ -3065,7 +2167,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) // CHECK27-NEXT: ret void // // @@ -3137,124 +2239,27 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) -// CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK27: omp.inner.for.end: -// CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK27-NEXT: br label [[OMP_PRECOND_END]] -// CHECK27: omp.precond.end: -// CHECK27-NEXT: ret void -// -// -// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { -// CHECK27-NEXT: entry: -// CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK27: omp.precond.then: -// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK27: cond.true: -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: br label [[COND_END:%.*]] -// CHECK27: cond.false: -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: br label [[COND_END]] -// CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK27: omp.inner.for.cond: // CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK27-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK27-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) // CHECK27-NEXT: br label [[OMP_PRECOND_END]] // CHECK27: omp.precond.end: // CHECK27-NEXT: ret void @@ -3333,7 +2338,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: store [3 x i32] [[TMP18]], ptr [[TMP30]], align 4 // CHECK27-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK27-NEXT: store i32 0, ptr [[TMP31]], align 4 -// CHECK27-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 [[TMP15]], i32 [[TMP16]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.region_id, ptr [[KERNEL_ARGS]]) +// CHECK27-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 [[TMP15]], i32 [[TMP16]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.region_id, ptr [[KERNEL_ARGS]]) // CHECK27-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 // CHECK27-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK27: omp_offload.failed: @@ -3349,15 +2354,15 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK27-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 // CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 -// CHECK27-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined, ptr [[TMP1]]) +// CHECK27-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined, ptr [[TMP1]]) // CHECK27-NEXT: ret void // // @@ -3405,93 +2410,24 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) -// CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK27: omp.inner.for.end: -// CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK27-NEXT: ret void -// -// -// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { -// CHECK27-NEXT: entry: -// CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 -// CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK27: cond.true: -// CHECK27-NEXT: br label [[COND_END:%.*]] -// CHECK27: cond.false: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: br label [[COND_END]] -// CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK27: omp.inner.for.cond: // CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK27-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK27-NEXT: ret void // diff --git a/clang/test/OpenMP/teams_generic_loop_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_codegen.cpp index 2499fbb6811c9..85dcae26970bc 100644 --- a/clang/test/OpenMP/teams_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/teams_generic_loop_codegen.cpp @@ -29,7 +29,7 @@ int foo() { // IR-NEXT: [[I:%.*]] = alloca i32, align 4 // IR-NEXT: [[J:%.*]] = alloca i32, align 4 // IR-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]]) +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]]) // IR-NEXT: ret i32 0 // // @@ -96,277 +96,100 @@ int foo() { // IR-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // IR-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // IR: omp.inner.for.body: -// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// IR-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @_Z3foov.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[J3]], ptr [[SUM1]]) +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[DIV6:%.*]] = sdiv i32 [[TMP12]], 10 +// IR-NEXT: [[MUL7:%.*]] = mul nsw i32 [[DIV6]], 10 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL7]] +// IR-NEXT: [[MUL8:%.*]] = mul nsw i32 [[SUB]], 1 +// IR-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] +// IR-NEXT: store i32 [[ADD9]], ptr [[J3]], align 4 +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i64 0, i64 [[IDXPROM]] +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[J3]], align 4 +// IR-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP15]] to i64 +// IR-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM10]] +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4 +// IR-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP16]], [[TMP13]] +// IR-NEXT: store i32 [[ADD12]], ptr [[ARRAYIDX11]], align 4 +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: // IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // IR: omp.inner.for.inc: -// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], 1 +// IR-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 // IR-NEXT: br label [[OMP_INNER_FOR_COND]] // IR: omp.inner.for.end: // IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // IR: omp.loop.exit: -// IR-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// IR-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// IR-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // IR: .omp.lastprivate.then: // IR-NEXT: store i32 10, ptr [[J3]], align 4 -// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4 -// IR-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[J3]], align 4 +// IR-NEXT: store i32 [[TMP22]], ptr [[TMP0]], align 4 // IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // IR: .omp.lastprivate.done: -// IR-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// IR-NEXT: store ptr [[SUM1]], ptr [[TMP21]], align 8 -// IR-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-NEXT: store ptr [[SUM1]], ptr [[TMP23]], align 8 +// IR-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// IR-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // IR-NEXT: ] // IR: .omp.reduction.case1: -// IR-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP25]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP27]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // IR: omp.arraycpy.body: // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 -// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// IR-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 -// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 +// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// IR-NEXT: store i32 [[ADD15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// IR: omp.arraycpy.done10: -// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) -// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR: .omp.reduction.case2: -// IR-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP1]], [[TMP28]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] -// IR: omp.arraycpy.body12: -// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 -// IR-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP29]] monotonic, align 4 -// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP28]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] +// IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP27]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] // IR: omp.arraycpy.done18: -// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR: .omp.reduction.default: -// IR-NEXT: ret void -// -// -// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined -// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { -// IR-NEXT: entry: -// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// IR-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8 -// IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 -// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// IR-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// IR-NEXT: [[J3:%.*]] = alloca i32, align 4 -// IR-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-NEXT: [[I:%.*]] = alloca i32, align 4 -// IR-NEXT: [[J5:%.*]] = alloca i32, align 4 -// IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// IR-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8 -// IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 -// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8 -// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 -// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// IR-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// IR-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// IR-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// IR-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 -// IR-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 -// IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] -// IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] -// IR: omp.arrayinit.body: -// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] -// IR: omp.arrayinit.done: -// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 -// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// IR: cond.true: -// IR-NEXT: br label [[COND_END:%.*]] -// IR: cond.false: -// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-NEXT: br label [[COND_END]] -// IR: cond.end: -// IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// IR-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// IR: omp.inner.for.cond: -// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] -// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// IR-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// IR: omp.inner.for.body: -// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP12]], 10 -// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP14]], 10 -// IR-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 -// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[MUL8]] -// IR-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 -// IR-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] -// IR-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] -// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64 -// IR-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] -// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP18]], [[TMP15]] -// IR-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// IR: omp.body.continue: -// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// IR: omp.inner.for.inc: -// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], 1 -// IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] -// IR: omp.inner.for.end: -// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// IR: omp.loop.exit: -// IR-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) -// IR-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// IR-NEXT: store ptr [[SUM4]], ptr [[TMP22]], align 8 -// IR-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// IR-NEXT: ] -// IR: .omp.reduction.case1: -// IR-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP26]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR: omp.arraycpy.body: -// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// IR-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP26]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] -// IR: omp.arraycpy.done19: -// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR: .omp.reduction.case2: -// IR-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP1]], [[TMP29]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] -// IR: omp.arraycpy.body21: -// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 -// IR-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP30]] monotonic, align 4 -// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP29]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] -// IR: omp.arraycpy.done27: +// IR-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[TMP1]], [[TMP30]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] +// IR: omp.arraycpy.body20: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 4 +// IR-NEXT: [[TMP32:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 [[TMP31]] monotonic, align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP30]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY20]] +// IR: omp.arraycpy.done26: // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR: .omp.reduction.default: -// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// IR-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// IR: .omp.lastprivate.then: -// IR-NEXT: store i32 10, ptr [[J3]], align 4 -// IR-NEXT: [[TMP34:%.*]] = load i32, ptr [[J3]], align 4 -// IR-NEXT: store i32 [[TMP34]], ptr [[TMP0]], align 4 -// IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// IR: .omp.lastprivate.done: -// IR-NEXT: ret void -// -// -// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func -// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -// IR-NEXT: entry: -// IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 -// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR: omp.arraycpy.body: -// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] -// IR: omp.arraycpy.done2: // IR-NEXT: ret void // // // IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp.reduction.reduction_func -// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { // IR-NEXT: entry: // IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 @@ -402,7 +225,7 @@ int foo() { // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 // IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 // IR-PCH-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]]) +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]]) // IR-PCH-NEXT: ret i32 0 // // @@ -469,277 +292,100 @@ int foo() { // IR-PCH-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // IR-PCH-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // IR-PCH: omp.inner.for.body: -// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// IR-PCH-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// IR-PCH-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @_Z3foov.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[J3]], ptr [[SUM1]]) +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[DIV6:%.*]] = sdiv i32 [[TMP12]], 10 +// IR-PCH-NEXT: [[MUL7:%.*]] = mul nsw i32 [[DIV6]], 10 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL7]] +// IR-PCH-NEXT: [[MUL8:%.*]] = mul nsw i32 [[SUB]], 1 +// IR-PCH-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] +// IR-PCH-NEXT: store i32 [[ADD9]], ptr [[J3]], align 4 +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i64 0, i64 [[IDXPROM]] +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[J3]], align 4 +// IR-PCH-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP15]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM10]] +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4 +// IR-PCH-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP16]], [[TMP13]] +// IR-PCH-NEXT: store i32 [[ADD12]], ptr [[ARRAYIDX11]], align 4 +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: // IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // IR-PCH: omp.inner.for.inc: -// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], 1 +// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] // IR-PCH: omp.inner.for.end: // IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // IR-PCH: omp.loop.exit: -// IR-PCH-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// IR-PCH-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR-PCH-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// IR-PCH-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // IR-PCH: .omp.lastprivate.then: // IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4 -// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4 -// IR-PCH-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4 +// IR-PCH-NEXT: [[TMP22:%.*]] = load i32, ptr [[J3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP22]], ptr [[TMP0]], align 4 // IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // IR-PCH: .omp.lastprivate.done: -// IR-PCH-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP21]], align 8 -// IR-PCH-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-PCH-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP23]], align 8 +// IR-PCH-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// IR-PCH-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // IR-PCH-NEXT: ] // IR-PCH: .omp.reduction.case1: -// IR-PCH-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP25]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP27]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // IR-PCH: omp.arraycpy.body: // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 -// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// IR-PCH-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 +// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// IR-PCH-NEXT: store i32 [[ADD15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH: omp.arraycpy.done10: -// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR-PCH: .omp.reduction.case2: -// IR-PCH-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP1]], [[TMP28]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] -// IR-PCH: omp.arraycpy.body12: -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 -// IR-PCH-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP29]] monotonic, align 4 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP28]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP27]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] // IR-PCH: omp.arraycpy.done18: -// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// IR-PCH: .omp.reduction.default: -// IR-PCH-NEXT: ret void -// -// -// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined -// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { -// IR-PCH-NEXT: entry: -// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 -// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4 -// IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// IR-PCH-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8 -// IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 -// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8 -// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 -// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// IR-PCH-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// IR-PCH-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// IR-PCH-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 -// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 -// IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] -// IR-PCH: omp.arrayinit.body: -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] -// IR-PCH: omp.arrayinit.done: -// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 -// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// IR-PCH: cond.true: -// IR-PCH-NEXT: br label [[COND_END:%.*]] -// IR-PCH: cond.false: -// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// IR-PCH-NEXT: br label [[COND_END]] -// IR-PCH: cond.end: -// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// IR-PCH-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// IR-PCH: omp.inner.for.cond: -// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] -// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// IR-PCH-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// IR-PCH: omp.inner.for.body: -// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP12]], 10 -// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP14]], 10 -// IR-PCH-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 -// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[MUL8]] -// IR-PCH-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 -// IR-PCH-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] -// IR-PCH-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] -// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64 -// IR-PCH-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] -// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP18]], [[TMP15]] -// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// IR-PCH: omp.body.continue: -// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// IR-PCH: omp.inner.for.inc: -// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], 1 -// IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] -// IR-PCH: omp.inner.for.end: -// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// IR-PCH: omp.loop.exit: -// IR-PCH-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) -// IR-PCH-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// IR-PCH-NEXT: store ptr [[SUM4]], ptr [[TMP22]], align 8 -// IR-PCH-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// IR-PCH-NEXT: ] -// IR-PCH: .omp.reduction.case1: -// IR-PCH-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP26]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR-PCH: omp.arraycpy.body: -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// IR-PCH-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP26]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH: omp.arraycpy.done19: -// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR-PCH: .omp.reduction.case2: -// IR-PCH-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP1]], [[TMP29]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] -// IR-PCH: omp.arraycpy.body21: -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 -// IR-PCH-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP30]] monotonic, align 4 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP29]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] -// IR-PCH: omp.arraycpy.done27: +// IR-PCH-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[TMP1]], [[TMP30]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] +// IR-PCH: omp.arraycpy.body20: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 4 +// IR-PCH-NEXT: [[TMP32:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 [[TMP31]] monotonic, align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP30]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY20]] +// IR-PCH: omp.arraycpy.done26: // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // IR-PCH: .omp.reduction.default: -// IR-PCH-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// IR-PCH-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// IR-PCH-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// IR-PCH: .omp.lastprivate.then: -// IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4 -// IR-PCH-NEXT: [[TMP34:%.*]] = load i32, ptr [[J3]], align 4 -// IR-PCH-NEXT: store i32 [[TMP34]], ptr [[TMP0]], align 4 -// IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// IR-PCH: .omp.lastprivate.done: -// IR-PCH-NEXT: ret void -// -// -// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func -// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -// IR-PCH-NEXT: entry: -// IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] -// IR-PCH: omp.arraycpy.body: -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] -// IR-PCH: omp.arraycpy.done2: // IR-PCH-NEXT: ret void // // // IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp.reduction.reduction_func -// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { // IR-PCH-NEXT: entry: // IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 diff --git a/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp index c0c04986f147e..901b6552a22b6 100644 --- a/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp @@ -157,7 +157,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -177,7 +177,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined, ptr [[TMP0]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined, ptr [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -227,114 +227,39 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -393,7 +318,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -413,7 +338,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined, ptr [[TMP0]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined, ptr [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -463,108 +388,37 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -693,7 +547,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 // CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK9-NEXT: store i32 0, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) // CHECK9-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 // CHECK9-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: @@ -725,7 +579,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined, ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined, ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) // CHECK9-NEXT: ret void // // @@ -820,178 +674,58 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined, i64 [[TMP21]], i64 [[TMP22]], ptr [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]]) -// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP23]], [[TMP24]] -// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK9: omp.inner.for.end: -// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK9-NEXT: br label [[OMP_PRECOND_END]] -// CHECK9: omp.precond.end: -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK9: omp.precond.then: -// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK9: cond.true: -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: br label [[COND_END:%.*]] -// CHECK9: cond.false: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: br label [[COND_END]] -// CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 // CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] // CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]] +// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] // CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] // CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 // CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 // CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] // CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 // CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] // CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 // CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]] +// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] // CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 // CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] // CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 // CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[TMP30:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP30]] -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP28]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 // CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP32]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 // CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1043,7 +777,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK9-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) // CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: @@ -1059,7 +793,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined, ptr [[TMP0]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined, ptr [[TMP0]]) // CHECK9-NEXT: ret void // // @@ -1109,113 +843,38 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) -// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK9: omp.inner.for.end: -// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 -// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK9: cond.true: -// CHECK9-NEXT: br label [[COND_END:%.*]] -// CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: br label [[COND_END]] -// CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 -// CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] -// CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // @@ -1343,7 +1002,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP46]], align 4 // CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK11-NEXT: store i32 0, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP48:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) // CHECK11-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 // CHECK11-NEXT: br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: @@ -1375,7 +1034,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined, ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined, ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) // CHECK11-NEXT: ret void // // @@ -1470,180 +1129,56 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined, i32 [[TMP22]], i32 [[TMP24]], ptr [[TMP0]], ptr [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]]) -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK11-NEXT: br label [[OMP_PRECOND_END]] -// CHECK11: omp.precond.end: -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK11: omp.precond.then: -// CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK11: cond.true: -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: br label [[COND_END:%.*]] -// CHECK11: cond.false: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: br label [[COND_END]] -// CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] -// CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: // CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 // CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP23]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP26]], [[CONV27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP25]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = mul nsw i32 [[TMP29]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP30]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[J14]], align 4 -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP31]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP3]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP30]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1695,7 +1230,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 // CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK11-NEXT: store i32 0, ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) // CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 // CHECK11-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: @@ -1711,7 +1246,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined, ptr [[TMP0]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined, ptr [[TMP0]]) // CHECK11-NEXT: ret void // // @@ -1761,106 +1296,35 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: ret void -// -// -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 -// CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK11: cond.true: -// CHECK11-NEXT: br label [[COND_END:%.*]] -// CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END]] -// CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: // CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp index 303bce8c648c2..e955db129d1da 100644 --- a/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp +++ b/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp @@ -288,7 +288,7 @@ int main() { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -302,7 +302,7 @@ int main() { // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined) // CHECK1-NEXT: ret void // // @@ -365,149 +365,48 @@ int main() { // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done3: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK1: omp.inner.for.cond.cleanup: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // @@ -558,7 +457,7 @@ int main() { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -607,7 +506,7 @@ int main() { // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined) // CHECK1-NEXT: ret void // // @@ -673,149 +572,45 @@ int main() { // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done5: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK1: omp.inner.for.cond.cleanup: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done9: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done8: // CHECK1-NEXT: ret void // // @@ -1021,7 +816,7 @@ int main() { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -1035,7 +830,7 @@ int main() { // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined) // CHECK3-NEXT: ret void // // @@ -1098,138 +893,41 @@ int main() { // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 -// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done3: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK3: omp.inner.for.cond.cleanup: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1285,7 +983,7 @@ int main() { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -1334,7 +1032,7 @@ int main() { // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined) // CHECK3-NEXT: ret void // // @@ -1400,138 +1098,38 @@ int main() { // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 -// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] -// CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done5: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] -// CHECK3: omp.inner.for.cond.cleanup: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1726,7 +1324,7 @@ int main() { // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined) // CHECK9-NEXT: ret void // // @@ -1747,6 +1345,7 @@ int main() { // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -1778,112 +1377,34 @@ int main() { // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) -// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK9: omp.inner.for.end: -// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 -// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK9: cond.true: -// CHECK9-NEXT: br label [[COND_END:%.*]] -// CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: br label [[COND_END]] -// CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp index d80c003c01099..f0067cb93d9b0 100644 --- a/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp @@ -141,7 +141,7 @@ int main() { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 // CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -157,7 +157,7 @@ int main() { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined, ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined, ptr [[SIVAR_ADDR]]) // CHECK1-NEXT: ret void // // @@ -208,165 +208,50 @@ int main() { // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// CHECK1-NEXT: ] -// CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK1: .omp.reduction.default: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 -// CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 @@ -435,7 +320,7 @@ int main() { // CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK1-NEXT: store i32 0, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) // CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 // CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: @@ -450,7 +335,7 @@ int main() { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined, ptr [[T_VAR_ADDR]]) // CHECK1-NEXT: ret void // // @@ -501,163 +386,48 @@ int main() { // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// CHECK1-NEXT: ] -// CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK1: .omp.reduction.default: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 -// CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func // CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: @@ -726,7 +496,7 @@ int main() { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 // CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -742,7 +512,7 @@ int main() { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined, ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined, ptr [[SIVAR_ADDR]]) // CHECK3-NEXT: ret void // // @@ -793,161 +563,50 @@ int main() { // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// CHECK3-NEXT: ] -// CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK3: .omp.reduction.default: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 @@ -1016,7 +675,7 @@ int main() { // CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 // CHECK3-NEXT: store i32 0, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) // CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 // CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: @@ -1031,7 +690,7 @@ int main() { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined, ptr [[T_VAR_ADDR]]) // CHECK3-NEXT: ret void // // @@ -1082,159 +741,48 @@ int main() { // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// CHECK3-NEXT: ] -// CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK3: .omp.reduction.default: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func // CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: @@ -1270,7 +818,7 @@ int main() { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined, ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined, ptr [[SIVAR_ADDR]]) // CHECK9-NEXT: ret void // // @@ -1288,6 +836,7 @@ int main() { // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 @@ -1321,169 +870,53 @@ int main() { // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) -// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK9: omp.inner.for.end: -// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] -// CHECK9-NEXT: ] -// CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK9: .omp.reduction.default: -// CHECK9-NEXT: ret void -// -// -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 -// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK9: cond.true: -// CHECK9-NEXT: br label [[COND_END:%.*]] -// CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: br label [[COND_END]] -// CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP17]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void // // -// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined.omp.reduction.reduction_func -// CHECK9-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { -// CHECK9-NEXT: entry: -// CHECK9-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 -// CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp.reduction.reduction_func -// CHECK9-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK9-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8