From d6057c46719b39943a402f074a59465405ca6120 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 21 May 2024 15:22:21 +0200 Subject: [PATCH 01/12] [Clang][OpenMP] Fix tile/unroll on iterator- and foreach-loops --- clang/include/clang/Sema/SemaOpenMP.h | 4 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 29 +- clang/lib/Sema/SemaOpenMP.cpp | 190 ++-- clang/test/OpenMP/tile_codegen.cpp | 887 +++++++++++++----- .../OpenMP/tile_codegen_for_dependent.cpp | 130 +-- clang/test/OpenMP/tile_codegen_tile_for.cpp | 218 ++--- openmp/runtime/test/lit.cfg | 4 + .../runtime/test/transform/tile/foreach.cpp | 228 +++++ .../runtime/test/transform/tile/iterfor.cpp | 233 +++++ .../tile/parallel-wsloop-collapse-foreach.cpp | 366 ++++++++ .../test/transform/unroll/factor_foreach.cpp | 162 ++++ .../test/transform/unroll/factor_intfor.c | 25 + .../test/transform/unroll/factor_iterfor.cpp | 169 ++++ ...actor_parallel-wsloop-collapse-foreach.cpp | 199 ++++ ...factor_parallel-wsloop-collapse-intfor.cpp | 32 + .../test/transform/unroll/full_intfor.c | 25 + .../test/transform/unroll/heuristic_intfor.c | 25 + .../test/transform/unroll/partial_intfor.c | 25 + 18 files changed, 2506 insertions(+), 445 deletions(-) create mode 100644 openmp/runtime/test/transform/tile/foreach.cpp create mode 100644 openmp/runtime/test/transform/tile/iterfor.cpp create mode 100644 openmp/runtime/test/transform/tile/parallel-wsloop-collapse-foreach.cpp create mode 100644 openmp/runtime/test/transform/unroll/factor_foreach.cpp create mode 100644 openmp/runtime/test/transform/unroll/factor_intfor.c create mode 100644 openmp/runtime/test/transform/unroll/factor_iterfor.cpp create mode 100644 openmp/runtime/test/transform/unroll/factor_parallel-wsloop-collapse-foreach.cpp create mode 100644 openmp/runtime/test/transform/unroll/factor_parallel-wsloop-collapse-intfor.cpp create mode 100644 openmp/runtime/test/transform/unroll/full_intfor.c create mode 100644 openmp/runtime/test/transform/unroll/heuristic_intfor.c create mode 100644 openmp/runtime/test/transform/unroll/partial_intfor.c diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 9927459bbc594..51981e1c9a8b9 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -1390,9 +1390,7 @@ class SemaOpenMP : public SemaBase { bool checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, - Stmt *&Body, - SmallVectorImpl, 0>> - &OriginalInits); + Stmt *&Body, SmallVectorImpl> &OriginalInits); /// Helper to keep information about the current `omp begin/end declare /// variant` nesting. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index eac5ef3262937..6410f9e102c90 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -142,7 +142,7 @@ class OMPTeamsScope final : public OMPLexicalScope { /// of used expression from loop statement. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { - const DeclStmt *PreInits; + const Stmt *PreInits; CodeGenFunction::OMPMapVars PreCondVars; if (auto *LD = dyn_cast(&S)) { llvm::DenseSet EmittedAsPrivate; @@ -182,17 +182,34 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { } return false; }); - PreInits = cast_or_null(LD->getPreInits()); + PreInits = LD->getPreInits(); } else if (const auto *Tile = dyn_cast(&S)) { - PreInits = cast_or_null(Tile->getPreInits()); + PreInits = Tile->getPreInits(); } else if (const auto *Unroll = dyn_cast(&S)) { - PreInits = cast_or_null(Unroll->getPreInits()); + PreInits = Unroll->getPreInits(); } else { llvm_unreachable("Unknown loop-based directive kind."); } if (PreInits) { - for (const auto *I : PreInits->decls()) - CGF.EmitVarDecl(cast(*I)); + // CompoundStmts and DeclStmts are used as lists of PreInit statements and + // declarations. Since declarations must be visible in the the following + // that they initialize, unpack the ComboundStmt they are nested in. + SmallVector PreInitStmts; + if (auto *PreInitCompound = dyn_cast(PreInits)) + llvm::append_range(PreInitStmts, PreInitCompound->body()); + else + PreInitStmts.push_back(PreInits); + + for (const Stmt *S : PreInitStmts) { + // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted + // here. + if (auto *PreInitDecl = dyn_cast(S)) { + for (Decl *I : PreInitDecl->decls()) + CGF.EmitVarDecl(cast(*I)); + continue; + } + CGF.EmitStmt(S); + } } PreCondVars.restore(CGF); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 6110e5229b076..663dbb927250e 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9828,6 +9828,23 @@ buildPreInits(ASTContext &Context, return nullptr; } +/// Build pre-init statement for the given statements. +static Stmt *buildPreInits(ASTContext &Context, ArrayRef PreInits) { + if (!PreInits.empty()) { + SmallVector Stmts; + for (Stmt *S : PreInits) { + // Do not nest CompoundStmts. + if (auto *CS = dyn_cast(S)) { + llvm::append_range(Stmts, CS->body()); + continue; + } + Stmts.push_back(S); + } + return CompoundStmt::Create(Context, PreInits, FPOptionsOverride(), {}, {}); + } + return nullptr; +} + /// Build postupdate expression for the given list of postupdates expressions. static Expr *buildPostUpdate(Sema &S, ArrayRef PostUpdates) { Expr *PostUpdate = nullptr; @@ -9924,11 +9941,24 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, Stmt *DependentPreInits = Transform->getPreInits(); if (!DependentPreInits) return; - for (Decl *C : cast(DependentPreInits)->getDeclGroup()) { - auto *D = cast(C); - DeclRefExpr *Ref = buildDeclRefExpr(SemaRef, D, D->getType(), - Transform->getBeginLoc()); - Captures[Ref] = Ref; + + // Search for pre-init declared variables that need to be captured + // to be referenceable inside the directive. + SmallVector Constituents; + if (auto *CS = dyn_cast(DependentPreInits)) + llvm::append_range(Constituents, CS->body()); + else + Constituents.push_back(DependentPreInits); + for (Stmt *S : Constituents) { + if (DeclStmt *DC = dyn_cast(S)) { + for (Decl *C : DC->decls()) { + auto *D = cast(C); + DeclRefExpr *Ref = buildDeclRefExpr( + SemaRef, D, D->getType().getNonReferenceType(), + Transform->getBeginLoc()); + Captures[Ref] = Ref; + } + } } })) return 0; @@ -15059,9 +15089,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( bool SemaOpenMP::checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, - Stmt *&Body, - SmallVectorImpl, 0>> - &OriginalInits) { + Stmt *&Body, SmallVectorImpl> &OriginalInits) { OriginalInits.emplace_back(); bool Result = OMPLoopBasedDirective::doForAllLoops( AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops, @@ -15097,14 +15125,75 @@ bool SemaOpenMP::checkTransformableLoopNest( llvm_unreachable("Unhandled loop transformation"); if (!DependentPreInits) return; - llvm::append_range(OriginalInits.back(), - cast(DependentPreInits)->getDeclGroup()); + // CompoundStmts are used as lists of other statements, add their + // contents, not the lists themselves to avoid nesting. This is + // necessary because DeclStmts need to be visible after the pre-init. + else if (auto *CS = dyn_cast(DependentPreInits)) + llvm::append_range(OriginalInits.back(), CS->body()); + else + OriginalInits.back().push_back(DependentPreInits); }); assert(OriginalInits.back().empty() && "No preinit after innermost loop"); OriginalInits.pop_back(); return Result; } +/// Add preinit statements that need to be propageted from the selected loop. +static void addLoopPreInits(ASTContext &Context, + OMPLoopBasedDirective::HelperExprs &LoopHelper, + Stmt *LoopStmt, ArrayRef OriginalInit, + SmallVectorImpl &PreInits) { + + // For range-based for-statements, ensure that their syntactic sugar is + // executed by adding them as pre-init statements. + if (auto *CXXRangeFor = dyn_cast(LoopStmt)) { + Stmt *RangeInit = CXXRangeFor->getInit(); + if (RangeInit) + PreInits.push_back(RangeInit); + + DeclStmt *RangeStmt = CXXRangeFor->getRangeStmt(); + PreInits.push_back(new (Context) DeclStmt(RangeStmt->getDeclGroup(), + RangeStmt->getBeginLoc(), + RangeStmt->getEndLoc())); + + DeclStmt *RangeEnd = CXXRangeFor->getEndStmt(); + PreInits.push_back(new (Context) DeclStmt(RangeEnd->getDeclGroup(), + RangeEnd->getBeginLoc(), + RangeEnd->getEndLoc())); + } + + llvm::append_range(PreInits, OriginalInit); + + // List of OMPCapturedExprDecl, for __begin, __end, and NumIterations + if (auto *PI = cast_or_null(LoopHelper.PreInits)) { + PreInits.push_back(new (Context) DeclStmt( + PI->getDeclGroup(), PI->getBeginLoc(), PI->getEndLoc())); + } + + // Gather declarations for the data members used as counters. + for (Expr *CounterRef : LoopHelper.Counters) { + auto *CounterDecl = cast(CounterRef)->getDecl(); + if (isa(CounterDecl)) + PreInits.push_back(new (Context) DeclStmt( + DeclGroupRef(CounterDecl), SourceLocation(), SourceLocation())); + } +} + +/// Collect the loop statements (ForStmt or CXXRangeForStmt) of the affected +/// loop of a construct. +static void collectLoopStmts(Stmt *AStmt, MutableArrayRef LoopStmts) { + size_t NumLoops = LoopStmts.size(); + OMPLoopBasedDirective::doForAllLoops( + AStmt, /*TryImperfectlyNestedLoops=*/false, NumLoops, + [LoopStmts](unsigned Cnt, Stmt *CurStmt) { + assert(!LoopStmts[Cnt] && "Loop statement must not yet be assigned"); + LoopStmts[Cnt] = CurStmt; + return false; + }); + assert(llvm::all_of(LoopStmts, [](Stmt *LoopStmt) { return LoopStmt; }) && + "Expecting a loop statement for each affected loop"); +} + StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, @@ -15126,8 +15215,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 0>, 4> - OriginalInits; + SmallVector, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15144,7 +15232,11 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, "Expecting loop iteration space dimensionality to match number of " "affected loops"); - SmallVector PreInits; + // Collect all affected loop statements. + SmallVector LoopStmts(NumLoops, nullptr); + collectLoopStmts(AStmt, LoopStmts); + + SmallVector PreInits; CaptureVars CopyTransformer(SemaRef); // Create iteration variables for the generated loops. @@ -15184,20 +15276,9 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, &SemaRef.PP.getIdentifierTable().get(TileCntName)); TileIndVars[I] = TileCntDecl; } - for (auto &P : OriginalInits[I]) { - if (auto *D = P.dyn_cast()) - PreInits.push_back(D); - else if (auto *PI = dyn_cast_or_null(P.dyn_cast())) - PreInits.append(PI->decl_begin(), PI->decl_end()); - } - if (auto *PI = cast_or_null(LoopHelper.PreInits)) - PreInits.append(PI->decl_begin(), PI->decl_end()); - // Gather declarations for the data members used as counters. - for (Expr *CounterRef : LoopHelper.Counters) { - auto *CounterDecl = cast(CounterRef)->getDecl(); - if (isa(CounterDecl)) - PreInits.push_back(CounterDecl); - } + + addLoopPreInits(Context, LoopHelper, LoopStmts[I], OriginalInits[I], + PreInits); } // Once the original iteration values are set, append the innermost body. @@ -15246,19 +15327,20 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I]; Expr *NumIterations = LoopHelper.NumIterations; auto *OrigCntVar = cast(LoopHelper.Counters[0]); - QualType CntTy = OrigCntVar->getType(); + QualType IVTy = NumIterations->getType(); + Stmt *LoopStmt = LoopStmts[I]; // Commonly used variables. One of the constraints of an AST is that every // node object must appear at most once, hence we define lamdas that create // a new AST node at every use. - auto MakeTileIVRef = [&SemaRef = this->SemaRef, &TileIndVars, I, CntTy, + auto MakeTileIVRef = [&SemaRef = this->SemaRef, &TileIndVars, I, IVTy, OrigCntVar]() { - return buildDeclRefExpr(SemaRef, TileIndVars[I], CntTy, + return buildDeclRefExpr(SemaRef, TileIndVars[I], IVTy, OrigCntVar->getExprLoc()); }; - auto MakeFloorIVRef = [&SemaRef = this->SemaRef, &FloorIndVars, I, CntTy, + auto MakeFloorIVRef = [&SemaRef = this->SemaRef, &FloorIndVars, I, IVTy, OrigCntVar]() { - return buildDeclRefExpr(SemaRef, FloorIndVars[I], CntTy, + return buildDeclRefExpr(SemaRef, FloorIndVars[I], IVTy, OrigCntVar->getExprLoc()); }; @@ -15320,6 +15402,8 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, // further into the inner loop. SmallVector BodyParts; BodyParts.append(LoopHelper.Updates.begin(), LoopHelper.Updates.end()); + if (auto *SourceCXXFor = dyn_cast(LoopStmt)) + BodyParts.push_back(SourceCXXFor->getLoopVarStmt()); BodyParts.push_back(Inner); Inner = CompoundStmt::Create(Context, BodyParts, FPOptionsOverride(), Inner->getBeginLoc(), Inner->getEndLoc()); @@ -15334,12 +15418,14 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, auto &LoopHelper = LoopHelpers[I]; Expr *NumIterations = LoopHelper.NumIterations; DeclRefExpr *OrigCntVar = cast(LoopHelper.Counters[0]); - QualType CntTy = OrigCntVar->getType(); + QualType IVTy = NumIterations->getType(); - // Commonly used variables. - auto MakeFloorIVRef = [&SemaRef = this->SemaRef, &FloorIndVars, I, CntTy, + // Commonly used variables. One of the constraints of an AST is that every + // node object must appear at most once, hence we define lamdas that create + // a new AST node at every use. + auto MakeFloorIVRef = [&SemaRef = this->SemaRef, &FloorIndVars, I, IVTy, OrigCntVar]() { - return buildDeclRefExpr(SemaRef, FloorIndVars[I], CntTy, + return buildDeclRefExpr(SemaRef, FloorIndVars[I], IVTy, OrigCntVar->getExprLoc()); }; @@ -15405,8 +15491,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *Body = nullptr; SmallVector LoopHelpers( NumLoops); - SmallVector, 0>, NumLoops + 1> - OriginalInits; + SmallVector, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15418,6 +15503,10 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, return OMPUnrollDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt, NumGeneratedLoops, nullptr, nullptr); + assert(LoopHelpers.size() == NumLoops && + "Expecting a single-dimensional loop iteration space"); + assert(OriginalInits.size() == NumLoops && + "Expecting a single-dimensional loop iteration space"); OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers.front(); if (FullClause) { @@ -15481,24 +15570,13 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, // of a canonical loop nest where these PreInits are emitted before the // outermost directive. + // Find the loop statement. + Stmt *LoopStmt = nullptr; + collectLoopStmts(AStmt, {LoopStmt}); + // Determine the PreInit declarations. - SmallVector PreInits; - assert(OriginalInits.size() == 1 && - "Expecting a single-dimensional loop iteration space"); - for (auto &P : OriginalInits[0]) { - if (auto *D = P.dyn_cast()) - PreInits.push_back(D); - else if (auto *PI = dyn_cast_or_null(P.dyn_cast())) - PreInits.append(PI->decl_begin(), PI->decl_end()); - } - if (auto *PI = cast_or_null(LoopHelper.PreInits)) - PreInits.append(PI->decl_begin(), PI->decl_end()); - // Gather declarations for the data members used as counters. - for (Expr *CounterRef : LoopHelper.Counters) { - auto *CounterDecl = cast(CounterRef)->getDecl(); - if (isa(CounterDecl)) - PreInits.push_back(CounterDecl); - } + SmallVector PreInits; + addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits); auto *IterationVarRef = cast(LoopHelper.IterationVarRef); QualType IVTy = IterationVarRef->getType(); @@ -15604,6 +15682,8 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, // Inner For statement. SmallVector InnerBodyStmts; InnerBodyStmts.append(LoopHelper.Updates.begin(), LoopHelper.Updates.end()); + if (auto *CXXRangeFor = dyn_cast(LoopStmt)) + InnerBodyStmts.push_back(CXXRangeFor->getLoopVarStmt()); InnerBodyStmts.push_back(Body); CompoundStmt *InnerBody = CompoundStmt::Create(getASTContext(), InnerBodyStmts, FPOptionsOverride(), diff --git a/clang/test/OpenMP/tile_codegen.cpp b/clang/test/OpenMP/tile_codegen.cpp index 93a3a14133ab5..5fd5609b844cc 100644 --- a/clang/test/OpenMP/tile_codegen.cpp +++ b/clang/test/OpenMP/tile_codegen.cpp @@ -1,10 +1,10 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 4 // Check code generation -// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest -std=c++20 -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 // Check same results after serialization round-trip -// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest -fopenmp -emit-pch -o %t %s -// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest -std=c++20 -fopenmp -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest -std=c++20 -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 // expected-no-diagnostics #ifndef HEADER @@ -91,22 +91,38 @@ extern "C" void foo8(int a) { } +typedef struct { double array[12]; } data_t; +extern "C" void foo9(data_t data) { +#pragma omp tile sizes(5) + for (double v : data.array) + body(v); +} + + +extern "C" void foo10(data_t data) { +#pragma omp tile sizes(5) + for (double c = 42.0; double v : data.array) + body(c, v); +} + + #endif /* HEADER */ -// CHECK1-LABEL: define {{[^@]+}}@body -// CHECK1-SAME: (...) #[[ATTR0:[0-9]+]] { + +// CHECK1-LABEL: define dso_local void @body( +// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init -// CHECK1-SAME: () #[[ATTR1:[0-9]+]] section ".text.startup" { +// CHECK1-LABEL: define internal void @__cxx_global_var_init( +// CHECK1-SAME: ) #[[ATTR1:[0-9]+]] section ".text.startup" { // CHECK1-NEXT: entry: // CHECK1-NEXT: call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @s) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { +// CHECK1-LABEL: define linkonce_odr void @_ZN1SC1Ev( +// CHECK1-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 @@ -115,50 +131,52 @@ extern "C" void foo8(int a) { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_ZN1SC2Ev -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { +// CHECK1-LABEL: define linkonce_odr void @_ZN1SC2Ev( +// CHECK1-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[I2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[I2:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[I2]], ptr [[I]], align 8 +// CHECK1-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I3]], ptr [[I2]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 -// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END11:%.*]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END12:%.*]] // CHECK1: for.body: // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: br label [[FOR_COND3:%.*]] -// CHECK1: for.cond3: +// CHECK1-NEXT: br label [[FOR_COND4:%.*]] +// CHECK1: for.cond4: // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 5 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp slt i32 4, [[ADD]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 4, [[ADD]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP4]], 5 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 5 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[ADD5]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP2]], [[COND]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END:%.*]] -// CHECK1: for.body7: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[ADD6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP2]], [[COND]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[FOR_BODY8:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body8: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 3 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 7, [[MUL]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I]], align 8 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[I]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 7, [[MUL]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I2]], align 8 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[I2]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 // CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP8]]) // CHECK1-NEXT: br label [[FOR_INC:%.*]] @@ -166,20 +184,20 @@ extern "C" void foo8(int a) { // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND4]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK1: for.end: -// CHECK1-NEXT: br label [[FOR_INC9:%.*]] -// CHECK1: for.inc9: +// CHECK1-NEXT: br label [[FOR_INC10:%.*]] +// CHECK1: for.inc10: // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP10]], 5 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] -// CHECK1: for.end11: +// CHECK1: for.end12: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@foo1 -// CHECK1-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] { +// CHECK1-LABEL: define dso_local void @foo1( +// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 @@ -195,81 +213,83 @@ extern "C" void foo8(int a) { // CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 // CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTNEW_STEP]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP5]] // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP6]] +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP7]], [[ADD5]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP8]], [[ADD5]] // CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END17:%.*]] // CHECK1: for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND6:%.*]] // CHECK1: for.cond6: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 5 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP13]], 5 // CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP14]], 1 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP14]], 5 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP15]], 5 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP10]], [[COND]] +// CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP11]], [[COND]] // CHECK1-NEXT: br i1 [[CMP12]], label [[FOR_BODY13:%.*]], label [[FOR_END:%.*]] // CHECK1: for.body13: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: [[ADD14:%.*]] = add i32 [[TMP15]], [[MUL]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[ADD14:%.*]] = add i32 [[TMP16]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP18]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP19]]) // CHECK1-NEXT: br label [[FOR_INC:%.*]] // CHECK1: for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: br label [[FOR_INC15:%.*]] // CHECK1: for.inc15: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add i32 [[TMP21]], 5 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: for.end17: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@foo2 -// CHECK1-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] { +// CHECK1-LABEL: define dso_local void @foo2( +// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 @@ -381,8 +401,8 @@ extern "C" void foo8(int a) { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@foo3 -// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-LABEL: define dso_local void @foo3( +// CHECK1-SAME: ) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -523,8 +543,8 @@ extern "C" void foo8(int a) { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@foo4 -// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-LABEL: define dso_local void @foo4( +// CHECK1-SAME: ) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -676,8 +696,8 @@ extern "C" void foo8(int a) { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@foo5 -// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-LABEL: define dso_local void @foo5( +// CHECK1-SAME: ) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -885,15 +905,15 @@ extern "C" void foo8(int a) { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@foo6 -// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-LABEL: define dso_local void @foo6( +// CHECK1-SAME: ) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @foo6.omp_outlined) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@foo6.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-LABEL: define internal void @foo6.omp_outlined( +// CHECK1-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -988,15 +1008,15 @@ extern "C" void foo8(int a) { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@tfoo7 -// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-LABEL: define dso_local void @tfoo7( +// CHECK1-SAME: ) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: call void @_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_(i32 noundef 0, i32 noundef 42) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_ -// CHECK1-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR0]] comdat { +// CHECK1-LABEL: define linkonce_odr void @_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_( +// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR0]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 @@ -1039,7 +1059,7 @@ extern "C" void foo8(int a) { // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP9]], 1 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], 5 // CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: @@ -1048,7 +1068,7 @@ extern "C" void foo8(int a) { // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP12]], 5 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP12]], 5 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ] @@ -1065,22 +1085,22 @@ extern "C" void foo8(int a) { // CHECK1-NEXT: br label [[FOR_INC:%.*]] // CHECK1: for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: br label [[FOR_INC15:%.*]] // CHECK1: for.inc15: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP17]], 5 +// CHECK1-NEXT: [[ADD16:%.*]] = add i32 [[TMP17]], 5 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: for.end17: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@foo8 -// CHECK1-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK1-LABEL: define dso_local void @foo8( +// CHECK1-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 @@ -1168,22 +1188,219 @@ extern "C" void foo8(int a) { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_tile_codegen.cpp -// CHECK1-SAME: () #[[ATTR1]] section ".text.startup" { +// CHECK1-LABEL: define dso_local void @foo9( +// CHECK1-SAME: ptr noundef byval([[STRUCT_DATA_T:%.*]]) align 8 [[DATA:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTFLOOR_0_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTTILE_0_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[ARRAY:%.*]] = getelementptr inbounds [[STRUCT_DATA_T]], ptr [[DATA]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARRAY]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x double], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 12 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [12 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY1]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [12 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK1-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP7]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i64 [[TMP6]], [[ADD6]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END18:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND7:%.*]] +// CHECK1: for.cond7: +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP11]], 5 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp slt i64 [[ADD8]], [[ADD9]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i64 [[TMP13]], 5 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[ADD11]], [[COND_TRUE]] ], [ [[ADD12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP9]], [[COND]] +// CHECK1-NEXT: br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body14: +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP15]], 1 +// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[MUL]] +// CHECK1-NEXT: store ptr [[ADD_PTR15]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load double, ptr [[TMP16]], align 8 +// CHECK1-NEXT: store double [[TMP17]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load double, ptr [[V]], align 8 +// CHECK1-NEXT: call void (...) @body(double noundef [[TMP18]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP19]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: br label [[FOR_INC16:%.*]] +// CHECK1: for.inc16: +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP20]], 5 +// CHECK1-NEXT: store i64 [[ADD17]], ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK1: for.end18: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo10( +// CHECK1-SAME: ptr noundef byval([[STRUCT_DATA_T:%.*]]) align 8 [[DATA:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[C:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTFLOOR_0_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTTILE_0_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK1-NEXT: store double 4.200000e+01, ptr [[C]], align 8 +// CHECK1-NEXT: [[ARRAY:%.*]] = getelementptr inbounds [[STRUCT_DATA_T]], ptr [[DATA]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARRAY]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x double], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 12 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [12 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY1]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [12 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK1-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP7]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i64 [[TMP6]], [[ADD6]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END18:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND7:%.*]] +// CHECK1: for.cond7: +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP11]], 5 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp slt i64 [[ADD8]], [[ADD9]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i64 [[TMP13]], 5 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[ADD11]], [[COND_TRUE]] ], [ [[ADD12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP9]], [[COND]] +// CHECK1-NEXT: br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body14: +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP15]], 1 +// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[MUL]] +// CHECK1-NEXT: store ptr [[ADD_PTR15]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load double, ptr [[TMP16]], align 8 +// CHECK1-NEXT: store double [[TMP17]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load double, ptr [[C]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load double, ptr [[V]], align 8 +// CHECK1-NEXT: call void (...) @body(double noundef [[TMP18]], double noundef [[TMP19]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP20]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: br label [[FOR_INC16:%.*]] +// CHECK1: for.inc16: +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP21]], 5 +// CHECK1-NEXT: store i64 [[ADD17]], ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK1: for.end18: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define internal void @_GLOBAL__sub_I_tile_codegen.cpp( +// CHECK1-SAME: ) #[[ATTR1]] section ".text.startup" { // CHECK1-NEXT: entry: // CHECK1-NEXT: call void @__cxx_global_var_init() // CHECK1-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init -// CHECK2-SAME: () #[[ATTR0:[0-9]+]] section ".text.startup" { +// CHECK2-LABEL: define internal void @__cxx_global_var_init( +// CHECK2-SAME: ) #[[ATTR0:[0-9]+]] section ".text.startup" { // CHECK2-NEXT: entry: // CHECK2-NEXT: call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @s) // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_ZN1SC1Ev -// CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK2-LABEL: define linkonce_odr void @_ZN1SC1Ev( +// CHECK2-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 @@ -1192,50 +1409,52 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_ZN1SC2Ev -// CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK2-LABEL: define linkonce_odr void @_ZN1SC2Ev( +// CHECK2-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[I:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[I2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[I2:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[I2]], ptr [[I]], align 8 +// CHECK2-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[I3]], ptr [[I2]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND:%.*]] // CHECK2: for.cond: // CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 -// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END11:%.*]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END12:%.*]] // CHECK2: for.body: // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND3:%.*]] -// CHECK2: for.cond3: +// CHECK2-NEXT: br label [[FOR_COND4:%.*]] +// CHECK2: for.cond4: // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 5 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp slt i32 4, [[ADD]] -// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 4, [[ADD]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP4]], 5 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 5 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[ADD5]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP2]], [[COND]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[FOR_BODY7:%.*]], label [[FOR_END:%.*]] -// CHECK2: for.body7: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[ADD6]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP2]], [[COND]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[FOR_BODY8:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body8: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 3 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 7, [[MUL]] -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I]], align 8 -// CHECK2-NEXT: store i32 [[ADD8]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[I]], align 8 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 7, [[MUL]] +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I2]], align 8 +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[I2]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 // CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP8]]) // CHECK2-NEXT: br label [[FOR_INC:%.*]] @@ -1243,26 +1462,26 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND4]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: br label [[FOR_INC9:%.*]] -// CHECK2: for.inc9: +// CHECK2-NEXT: br label [[FOR_INC10:%.*]] +// CHECK2: for.inc10: // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP10]], 5 -// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] -// CHECK2: for.end11: +// CHECK2: for.end12: // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@body -// CHECK2-SAME: (...) #[[ATTR1]] { +// CHECK2-LABEL: define dso_local void @body( +// CHECK2-SAME: ...) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@foo1 -// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR1]] { +// CHECK2-LABEL: define dso_local void @foo1( +// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 @@ -1278,81 +1497,183 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 // CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTNEW_STEP]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] // CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP5]] // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP6]] +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND:%.*]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP7]], [[ADD5]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP8]], [[ADD5]] // CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END17:%.*]] // CHECK2: for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND6:%.*]] // CHECK2: for.cond6: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add i32 [[TMP11]], 1 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 5 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add i32 [[TMP13]], 5 // CHECK2-NEXT: [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add i32 [[TMP13]], 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add i32 [[TMP14]], 1 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP14]], 5 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP15]], 5 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP10]], [[COND]] +// CHECK2-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP11]], [[COND]] // CHECK2-NEXT: br i1 [[CMP12]], label [[FOR_BODY13:%.*]], label [[FOR_END:%.*]] // CHECK2: for.body13: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: [[ADD14:%.*]] = add i32 [[TMP15]], [[MUL]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[ADD14:%.*]] = add i32 [[TMP16]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP18]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP19]]) // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP20]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: for.end: // CHECK2-NEXT: br label [[FOR_INC15:%.*]] // CHECK2: for.inc15: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 5 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add i32 [[TMP21]], 5 // CHECK2-NEXT: store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK2: for.end17: // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@foo2 -// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR1]] { +// CHECK2-LABEL: define dso_local void @foo10( +// CHECK2-SAME: ptr noundef byval([[STRUCT_DATA_T:%.*]]) align 8 [[DATA:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[C:%.*]] = alloca double, align 8 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTFLOOR_0_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTTILE_0_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK2-NEXT: store double 4.200000e+01, ptr [[C]], align 8 +// CHECK2-NEXT: [[ARRAY:%.*]] = getelementptr inbounds [[STRUCT_DATA_T]], ptr [[DATA]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[ARRAY]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x double], ptr [[TMP0]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 12 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [12 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY1]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [12 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK2-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP7]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i64 [[TMP6]], [[ADD6]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END18:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: store i64 [[TMP8]], ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND7:%.*]] +// CHECK2: for.cond7: +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP11]], 5 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp slt i64 [[ADD8]], [[ADD9]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i64 [[TMP13]], 5 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[ADD11]], [[COND_TRUE]] ], [ [[ADD12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP9]], [[COND]] +// CHECK2-NEXT: br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body14: +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP15]], 1 +// CHECK2-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[MUL]] +// CHECK2-NEXT: store ptr [[ADD_PTR15]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load double, ptr [[TMP16]], align 8 +// CHECK2-NEXT: store double [[TMP17]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load double, ptr [[C]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load double, ptr [[V]], align 8 +// CHECK2-NEXT: call void (...) @body(double noundef [[TMP18]], double noundef [[TMP19]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP20]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: br label [[FOR_INC16:%.*]] +// CHECK2: for.inc16: +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP21]], 5 +// CHECK2-NEXT: store i64 [[ADD17]], ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2: for.end18: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo2( +// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 @@ -1438,34 +1759,34 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTTILE_1_IV_J]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_1_IV_J]], align 4 -// CHECK2-NEXT: br label [[FOR_COND10]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND10]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: for.end: // CHECK2-NEXT: br label [[FOR_INC22:%.*]] // CHECK2: for.inc22: // CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: [[INC23:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK2-NEXT: store i32 [[INC23]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND4]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND4]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: for.end24: // CHECK2-NEXT: br label [[FOR_INC25:%.*]] // CHECK2: for.inc25: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 // CHECK2-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP16]], 5 // CHECK2-NEXT: store i32 [[ADD26]], ptr [[DOTFLOOR_1_IV_J]], align 4 -// CHECK2-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: for.end27: // CHECK2-NEXT: br label [[FOR_INC28:%.*]] // CHECK2: for.inc28: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP17]], 5 // CHECK2-NEXT: store i32 [[ADD29]], ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: for.end30: // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@foo3 -// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-LABEL: define dso_local void @foo3( +// CHECK2-SAME: ) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1574,21 +1895,21 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTTILE_1_IV_J]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_1_IV_J]], align 4 -// CHECK2-NEXT: br label [[FOR_COND15]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND15]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK2: for.end: // CHECK2-NEXT: br label [[FOR_INC27:%.*]] // CHECK2: for.inc27: // CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: [[INC28:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK2-NEXT: store i32 [[INC28]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: for.end29: // CHECK2-NEXT: br label [[FOR_INC30:%.*]] // CHECK2: for.inc30: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 // CHECK2-NEXT: [[ADD31:%.*]] = add nsw i32 [[TMP22]], 5 // CHECK2-NEXT: store i32 [[ADD31]], ptr [[DOTFLOOR_1_IV_J]], align 4 -// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: for.end32: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: @@ -1606,8 +1927,8 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@foo4 -// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-LABEL: define dso_local void @foo4( +// CHECK2-SAME: ) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1727,21 +2048,21 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTTILE_1_IV_J]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_1_IV_J]], align 4 -// CHECK2-NEXT: br label [[FOR_COND20]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND20]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK2: for.end: // CHECK2-NEXT: br label [[FOR_INC32:%.*]] // CHECK2: for.inc32: // CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: [[INC33:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK2-NEXT: store i32 [[INC33]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND8]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND8]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: for.end34: // CHECK2-NEXT: br label [[FOR_INC35:%.*]] // CHECK2: for.inc35: // CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 // CHECK2-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP24]], 5 // CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTFLOOR_1_IV_J]], align 4 -// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK2: for.end37: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: @@ -1759,8 +2080,8 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@foo5 -// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-LABEL: define dso_local void @foo5( +// CHECK2-SAME: ) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1968,15 +2289,15 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@foo6 -// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-LABEL: define dso_local void @foo6( +// CHECK2-SAME: ) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @foo6.omp_outlined) // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@foo6.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK2-LABEL: define internal void @foo6.omp_outlined( +// CHECK2-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -2054,7 +2375,7 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK2: for.end: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: @@ -2071,8 +2392,8 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@foo8 -// CHECK2-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK2-LABEL: define dso_local void @foo8( +// CHECK2-SAME: i32 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 @@ -2138,7 +2459,7 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK2: for.end: // CHECK2-NEXT: br label [[FOR_INC17:%.*]] // CHECK2: for.inc17: @@ -2155,20 +2476,117 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 // CHECK2-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP14]], [[COND22]] // CHECK2-NEXT: store i32 [[ADD23]], ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: for.end24: // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@tfoo7 -// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-LABEL: define dso_local void @foo9( +// CHECK2-SAME: ptr noundef byval([[STRUCT_DATA_T:%.*]]) align 8 [[DATA:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTFLOOR_0_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTTILE_0_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca double, align 8 +// CHECK2-NEXT: [[ARRAY:%.*]] = getelementptr inbounds [[STRUCT_DATA_T]], ptr [[DATA]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[ARRAY]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x double], ptr [[TMP0]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 12 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [12 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY1]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [12 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK2-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP7]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i64 [[TMP6]], [[ADD6]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END18:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: store i64 [[TMP8]], ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND7:%.*]] +// CHECK2: for.cond7: +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP11]], 5 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp slt i64 [[ADD8]], [[ADD9]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i64 [[TMP13]], 5 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[ADD11]], [[COND_TRUE]] ], [ [[ADD12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP9]], [[COND]] +// CHECK2-NEXT: br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body14: +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP15]], 1 +// CHECK2-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[MUL]] +// CHECK2-NEXT: store ptr [[ADD_PTR15]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load double, ptr [[TMP16]], align 8 +// CHECK2-NEXT: store double [[TMP17]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load double, ptr [[V]], align 8 +// CHECK2-NEXT: call void (...) @body(double noundef [[TMP18]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP19]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTTILE_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: br label [[FOR_INC16:%.*]] +// CHECK2: for.inc16: +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP20]], 5 +// CHECK2-NEXT: store i64 [[ADD17]], ptr [[DOTFLOOR_0_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK2: for.end18: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @tfoo7( +// CHECK2-SAME: ) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: call void @_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_(i32 noundef 0, i32 noundef 42) // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_ -// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR1]] comdat { +// CHECK2-LABEL: define linkonce_odr void @_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_( +// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR1]] comdat { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 @@ -2211,7 +2629,7 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: [[ADD7:%.*]] = add i32 [[TMP9]], 1 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK2-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], 5 // CHECK2-NEXT: [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: @@ -2220,7 +2638,7 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP12]], 5 +// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP12]], 5 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ] @@ -2237,23 +2655,74 @@ extern "C" void foo8(int a) { // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP16]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: for.end: // CHECK2-NEXT: br label [[FOR_INC15:%.*]] // CHECK2: for.inc15: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP17]], 5 +// CHECK2-NEXT: [[ADD16:%.*]] = add i32 [[TMP17]], 5 // CHECK2-NEXT: store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK2: for.end17: // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_tile_codegen.cpp -// CHECK2-SAME: () #[[ATTR0]] section ".text.startup" { +// CHECK2-LABEL: define internal void @_GLOBAL__sub_I_tile_codegen.cpp( +// CHECK2-SAME: ) #[[ATTR0]] section ".text.startup" { // CHECK2-NEXT: entry: // CHECK2-NEXT: call void @__cxx_global_var_init() // CHECK2-NEXT: ret void // +//. +// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} +// CHECK1: [[LOOP11]] = distinct !{[[LOOP11]], [[META4]]} +// CHECK1: [[LOOP12]] = distinct !{[[LOOP12]], [[META4]]} +// CHECK1: [[LOOP13]] = distinct !{[[LOOP13]], [[META4]]} +// CHECK1: [[LOOP14]] = distinct !{[[LOOP14]], [[META4]]} +// CHECK1: [[LOOP15]] = distinct !{[[LOOP15]], [[META4]]} +// CHECK1: [[LOOP16]] = distinct !{[[LOOP16]], [[META4]]} +// CHECK1: [[LOOP17]] = distinct !{[[LOOP17]], [[META4]]} +// CHECK1: [[LOOP18]] = distinct !{[[LOOP18]], [[META4]]} +// CHECK1: [[LOOP21]] = distinct !{[[LOOP21]], [[META4]]} +// CHECK1: [[LOOP22]] = distinct !{[[LOOP22]], [[META4]]} +// CHECK1: [[LOOP23]] = distinct !{[[LOOP23]], [[META4]]} +// CHECK1: [[LOOP24]] = distinct !{[[LOOP24]], [[META4]]} +// CHECK1: [[LOOP25]] = distinct !{[[LOOP25]], [[META4]]} +// CHECK1: [[LOOP26]] = distinct !{[[LOOP26]], [[META4]]} +// CHECK1: [[LOOP27]] = distinct !{[[LOOP27]], [[META4]]} +// CHECK1: [[LOOP28]] = distinct !{[[LOOP28]], [[META4]]} +//. +// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} +// CHECK2: [[LOOP11]] = distinct !{[[LOOP11]], [[META4]]} +// CHECK2: [[LOOP12]] = distinct !{[[LOOP12]], [[META4]]} +// CHECK2: [[LOOP13]] = distinct !{[[LOOP13]], [[META4]]} +// CHECK2: [[LOOP14]] = distinct !{[[LOOP14]], [[META4]]} +// CHECK2: [[LOOP15]] = distinct !{[[LOOP15]], [[META4]]} +// CHECK2: [[LOOP16]] = distinct !{[[LOOP16]], [[META4]]} +// CHECK2: [[LOOP17]] = distinct !{[[LOOP17]], [[META4]]} +// CHECK2: [[LOOP18]] = distinct !{[[LOOP18]], [[META4]]} +// CHECK2: [[LOOP19]] = distinct !{[[LOOP19]], [[META4]]} +// CHECK2: [[LOOP20]] = distinct !{[[LOOP20]], [[META4]]} +// CHECK2: [[LOOP23]] = distinct !{[[LOOP23]], [[META4]]} +// CHECK2: [[LOOP24]] = distinct !{[[LOOP24]], [[META4]]} +// CHECK2: [[LOOP25]] = distinct !{[[LOOP25]], [[META4]]} +// CHECK2: [[LOOP26]] = distinct !{[[LOOP26]], [[META4]]} +// CHECK2: [[LOOP27]] = distinct !{[[LOOP27]], [[META4]]} +// CHECK2: [[LOOP28]] = distinct !{[[LOOP28]], [[META4]]} +//. diff --git a/clang/test/OpenMP/tile_codegen_for_dependent.cpp b/clang/test/OpenMP/tile_codegen_for_dependent.cpp index 93c51c9165a47..820d33d15287b 100644 --- a/clang/test/OpenMP/tile_codegen_for_dependent.cpp +++ b/clang/test/OpenMP/tile_codegen_for_dependent.cpp @@ -17,7 +17,7 @@ extern "C" void body(...) {} -// IR-LABEL: @func( +// IR-LABEL: define {{.*}}@func( // IR-NEXT: [[ENTRY:.*]]: // IR-NEXT: %[[START_ADDR:.+]] = alloca i32, align 4 // IR-NEXT: %[[END_ADDR:.+]] = alloca i32, align 4 @@ -27,18 +27,18 @@ extern "C" void body(...) {} // IR-NEXT: %[[I:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4 +// IR-NEXT: %[[DOTNEW_STEP:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4 +// IR-NEXT: %[[DOTCAPTURE_EXPR_5:.+]] = alloca i32, align 4 +// IR-NEXT: %[[DOTCAPTURE_EXPR_7:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTFLOOR_0_IV_I:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTOMP_LB:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTOMP_UB:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTFLOOR_0_IV_I12:.+]] = alloca i32, align 4 +// IR-NEXT: %[[DOTFLOOR_0_IV_I11:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTTILE_0_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(ptr @2) +// IR-NEXT: %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:.+]]) // IR-NEXT: store i32 %[[START:.+]], ptr %[[START_ADDR]], align 4 // IR-NEXT: store i32 %[[END:.+]], ptr %[[END_ADDR]], align 4 // IR-NEXT: store i32 %[[STEP:.+]], ptr %[[STEP_ADDR]], align 4 @@ -49,44 +49,44 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP3:.+]] = load i32, ptr %[[END_ADDR]], align 4 // IR-NEXT: store i32 %[[TMP3]], ptr %[[DOTCAPTURE_EXPR_1]], align 4 // IR-NEXT: %[[TMP4:.+]] = load i32, ptr %[[STEP_ADDR]], align 4 -// IR-NEXT: store i32 %[[TMP4]], ptr %[[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: store i32 %[[TMP4]], ptr %[[DOTNEW_STEP]], align 4 // IR-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_1]], align 4 // IR-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 // IR-NEXT: %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]] -// IR-NEXT: %[[SUB4:.+]] = sub i32 %[[SUB]], 1 -// IR-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]] -// IR-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: %[[SUB3:.+]] = sub i32 %[[SUB]], 1 +// IR-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTNEW_STEP]], align 4 +// IR-NEXT: %[[ADD:.+]] = add i32 %[[SUB3]], %[[TMP7]] +// IR-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTNEW_STEP]], align 4 // IR-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]] -// IR-NEXT: %[[SUB5:.+]] = sub i32 %[[DIV]], 1 -// IR-NEXT: store i32 %[[SUB5]], ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD7:.+]] = add i32 %[[TMP9]], 1 -// IR-NEXT: store i32 %[[ADD7]], ptr %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[TMP10:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[SUB9:.+]] = sub i32 %[[TMP10]], -3 -// IR-NEXT: %[[DIV10:.+]] = udiv i32 %[[SUB9]], 4 -// IR-NEXT: %[[SUB11:.+]] = sub i32 %[[DIV10]], 1 -// IR-NEXT: store i32 %[[SUB11]], ptr %[[DOTCAPTURE_EXPR_8]], align 4 +// IR-NEXT: %[[SUB4:.+]] = sub i32 %[[DIV]], 1 +// IR-NEXT: store i32 %[[SUB4]], ptr %[[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: %[[ADD6:.+]] = add i32 %[[TMP9]], 1 +// IR-NEXT: store i32 %[[ADD6]], ptr %[[DOTCAPTURE_EXPR_5]], align 4 +// IR-NEXT: %[[TMP10:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_5]], align 4 +// IR-NEXT: %[[SUB8:.+]] = sub i32 %[[TMP10]], -3 +// IR-NEXT: %[[DIV9:.+]] = udiv i32 %[[SUB8]], 4 +// IR-NEXT: %[[SUB10:.+]] = sub i32 %[[DIV9]], 1 +// IR-NEXT: store i32 %[[SUB10]], ptr %[[DOTCAPTURE_EXPR_7]], align 4 // IR-NEXT: store i32 0, ptr %[[DOTFLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[TMP11:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_6]], align 4 +// IR-NEXT: %[[TMP11:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_5]], align 4 // IR-NEXT: %[[CMP:.+]] = icmp ult i32 0, %[[TMP11]] // IR-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]] // IR-EMPTY: // IR-NEXT: [[OMP_PRECOND_THEN]]: // IR-NEXT: store i32 0, ptr %[[DOTOMP_LB]], align 4 -// IR-NEXT: %[[TMP12:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 +// IR-NEXT: %[[TMP12:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_7]], align 4 // IR-NEXT: store i32 %[[TMP12]], ptr %[[DOTOMP_UB]], align 4 // IR-NEXT: store i32 1, ptr %[[DOTOMP_STRIDE]], align 4 // IR-NEXT: store i32 0, ptr %[[DOTOMP_IS_LAST]], align 4 -// IR-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[TMP0]], i32 34, ptr %[[DOTOMP_IS_LAST]], ptr %[[DOTOMP_LB]], ptr %[[DOTOMP_UB]], ptr %[[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:.+]], i32 %[[TMP0]], i32 34, ptr %[[DOTOMP_IS_LAST]], ptr %[[DOTOMP_LB]], ptr %[[DOTOMP_UB]], ptr %[[DOTOMP_STRIDE]], i32 1, i32 1) // IR-NEXT: %[[TMP13:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[TMP14:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: %[[CMP13:.+]] = icmp ugt i32 %[[TMP13]], %[[TMP14]] -// IR-NEXT: br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] +// IR-NEXT: %[[TMP14:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: %[[CMP12:.+]] = icmp ugt i32 %[[TMP13]], %[[TMP14]] +// IR-NEXT: br i1 %[[CMP12]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] // IR-EMPTY: // IR-NEXT: [[COND_TRUE]]: -// IR-NEXT: %[[TMP15:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 +// IR-NEXT: %[[TMP15:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_7]], align 4 // IR-NEXT: br label %[[COND_END:.+]] // IR-EMPTY: // IR-NEXT: [[COND_FALSE]]: @@ -103,50 +103,50 @@ extern "C" void body(...) {} // IR-NEXT: [[OMP_INNER_FOR_COND]]: // IR-NEXT: %[[TMP18:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 // IR-NEXT: %[[TMP19:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[ADD14:.+]] = add i32 %[[TMP19]], 1 -// IR-NEXT: %[[CMP15:.+]] = icmp ult i32 %[[TMP18]], %[[ADD14]] -// IR-NEXT: br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] +// IR-NEXT: %[[ADD13:.+]] = add i32 %[[TMP19]], 1 +// IR-NEXT: %[[CMP14:.+]] = icmp ult i32 %[[TMP18]], %[[ADD13]] +// IR-NEXT: br i1 %[[CMP14]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] // IR-EMPTY: // IR-NEXT: [[OMP_INNER_FOR_BODY]]: // IR-NEXT: %[[TMP20:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 // IR-NEXT: %[[MUL:.+]] = mul i32 %[[TMP20]], 4 -// IR-NEXT: %[[ADD16:.+]] = add i32 0, %[[MUL]] -// IR-NEXT: store i32 %[[ADD16]], ptr %[[DOTFLOOR_0_IV_I12]], align 4 -// IR-NEXT: %[[TMP21:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I12]], align 4 +// IR-NEXT: %[[ADD15:.+]] = add i32 0, %[[MUL]] +// IR-NEXT: store i32 %[[ADD15]], ptr %[[DOTFLOOR_0_IV_I11]], align 4 +// IR-NEXT: %[[TMP21:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I11]], align 4 // IR-NEXT: store i32 %[[TMP21]], ptr %[[DOTTILE_0_IV_I]], align 4 // IR-NEXT: br label %[[FOR_COND:.+]] // IR-EMPTY: // IR-NEXT: [[FOR_COND]]: // IR-NEXT: %[[TMP22:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: %[[TMP23:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP23]], 1 -// IR-NEXT: %[[TMP24:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I12]], align 4 -// IR-NEXT: %[[ADD18:.+]] = add nsw i32 %[[TMP24]], 4 -// IR-NEXT: %[[CMP19:.+]] = icmp ult i32 %[[ADD17]], %[[ADD18]] -// IR-NEXT: br i1 %[[CMP19]], label %[[COND_TRUE20:.+]], label %[[COND_FALSE22:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_TRUE20]]: -// IR-NEXT: %[[TMP25:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD21:.+]] = add i32 %[[TMP25]], 1 -// IR-NEXT: br label %[[COND_END24:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_FALSE22]]: -// IR-NEXT: %[[TMP26:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I12]], align 4 -// IR-NEXT: %[[ADD23:.+]] = add nsw i32 %[[TMP26]], 4 -// IR-NEXT: br label %[[COND_END24]] -// IR-EMPTY: -// IR-NEXT: [[COND_END24]]: -// IR-NEXT: %[[COND25:.+]] = phi i32 [ %[[ADD21]], %[[COND_TRUE20]] ], [ %[[ADD23]], %[[COND_FALSE22]] ] -// IR-NEXT: %[[CMP26:.+]] = icmp ult i32 %[[TMP22]], %[[COND25]] -// IR-NEXT: br i1 %[[CMP26]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]] +// IR-NEXT: %[[TMP23:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: %[[ADD16:.+]] = add i32 %[[TMP23]], 1 +// IR-NEXT: %[[TMP24:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I11]], align 4 +// IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP24]], 4 +// IR-NEXT: %[[CMP18:.+]] = icmp ult i32 %[[ADD16]], %[[ADD17]] +// IR-NEXT: br i1 %[[CMP18]], label %[[COND_TRUE19:.+]], label %[[COND_FALSE21:.+]] +// IR-EMPTY: +// IR-NEXT: [[COND_TRUE19]]: +// IR-NEXT: %[[TMP25:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: %[[ADD20:.+]] = add i32 %[[TMP25]], 1 +// IR-NEXT: br label %[[COND_END23:.+]] +// IR-EMPTY: +// IR-NEXT: [[COND_FALSE21]]: +// IR-NEXT: %[[TMP26:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I11]], align 4 +// IR-NEXT: %[[ADD22:.+]] = add i32 %[[TMP26]], 4 +// IR-NEXT: br label %[[COND_END23]] +// IR-EMPTY: +// IR-NEXT: [[COND_END23]]: +// IR-NEXT: %[[COND24:.+]] = phi i32 [ %[[ADD20]], %[[COND_TRUE19]] ], [ %[[ADD22]], %[[COND_FALSE21]] ] +// IR-NEXT: %[[CMP25:.+]] = icmp ult i32 %[[TMP22]], %[[COND24]] +// IR-NEXT: br i1 %[[CMP25]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]] // IR-EMPTY: // IR-NEXT: [[FOR_BODY]]: // IR-NEXT: %[[TMP27:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 // IR-NEXT: %[[TMP28:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: %[[TMP29:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[MUL27:.+]] = mul i32 %[[TMP28]], %[[TMP29]] -// IR-NEXT: %[[ADD28:.+]] = add i32 %[[TMP27]], %[[MUL27]] -// IR-NEXT: store i32 %[[ADD28]], ptr %[[I]], align 4 +// IR-NEXT: %[[TMP29:.+]] = load i32, ptr %[[DOTNEW_STEP]], align 4 +// IR-NEXT: %[[MUL26:.+]] = mul i32 %[[TMP28]], %[[TMP29]] +// IR-NEXT: %[[ADD27:.+]] = add i32 %[[TMP27]], %[[MUL26]] +// IR-NEXT: store i32 %[[ADD27]], ptr %[[I]], align 4 // IR-NEXT: %[[TMP30:.+]] = load i32, ptr %[[START_ADDR]], align 4 // IR-NEXT: %[[TMP31:.+]] = load i32, ptr %[[END_ADDR]], align 4 // IR-NEXT: %[[TMP32:.+]] = load i32, ptr %[[STEP_ADDR]], align 4 @@ -156,9 +156,9 @@ extern "C" void body(...) {} // IR-EMPTY: // IR-NEXT: [[FOR_INC]]: // IR-NEXT: %[[TMP34:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP34]], 1 +// IR-NEXT: %[[INC:.+]] = add i32 %[[TMP34]], 1 // IR-NEXT: store i32 %[[INC]], ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]] +// IR-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP3:[0-9]+]] // IR-EMPTY: // IR-NEXT: [[FOR_END]]: // IR-NEXT: br label %[[OMP_BODY_CONTINUE:.+]] @@ -168,19 +168,19 @@ extern "C" void body(...) {} // IR-EMPTY: // IR-NEXT: [[OMP_INNER_FOR_INC]]: // IR-NEXT: %[[TMP35:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[ADD29:.+]] = add i32 %[[TMP35]], 1 -// IR-NEXT: store i32 %[[ADD29]], ptr %[[DOTOMP_IV]], align 4 +// IR-NEXT: %[[ADD28:.+]] = add i32 %[[TMP35]], 1 +// IR-NEXT: store i32 %[[ADD28]], ptr %[[DOTOMP_IV]], align 4 // IR-NEXT: br label %[[OMP_INNER_FOR_COND]] // IR-EMPTY: // IR-NEXT: [[OMP_INNER_FOR_END]]: // IR-NEXT: br label %[[OMP_LOOP_EXIT:.+]] // IR-EMPTY: // IR-NEXT: [[OMP_LOOP_EXIT]]: -// IR-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[TMP0]]) +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 %[[TMP0]]) // IR-NEXT: br label %[[OMP_PRECOND_END]] // IR-EMPTY: // IR-NEXT: [[OMP_PRECOND_END]]: -// IR-NEXT: call void @__kmpc_barrier(ptr @3, i32 %[[TMP0]]) +// IR-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:.+]], i32 %[[TMP0]]) // IR-NEXT: ret void // IR-NEXT: } extern "C" void func(int start, int end, int step) { diff --git a/clang/test/OpenMP/tile_codegen_tile_for.cpp b/clang/test/OpenMP/tile_codegen_tile_for.cpp index d0fb89398c241..91536c406368b 100644 --- a/clang/test/OpenMP/tile_codegen_tile_for.cpp +++ b/clang/test/OpenMP/tile_codegen_tile_for.cpp @@ -16,7 +16,7 @@ extern "C" void body(...) {} -// IR-LABEL: @func( +// IR-LABEL: define {{.*}}@func( // IR-NEXT: [[ENTRY:.*]]: // IR-NEXT: %[[START_ADDR:.+]] = alloca i32, align 4 // IR-NEXT: %[[END_ADDR:.+]] = alloca i32, align 4 @@ -26,22 +26,22 @@ extern "C" void body(...) {} // IR-NEXT: %[[I:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4 +// IR-NEXT: %[[DOTNEW_STEP:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTFLOOR_0_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_12:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_14:.+]] = alloca i32, align 4 +// IR-NEXT: %[[DOTCAPTURE_EXPR_5:.+]] = alloca i32, align 4 +// IR-NEXT: %[[DOTCAPTURE_EXPR_7:.+]] = alloca i32, align 4 +// IR-NEXT: %[[DOTCAPTURE_EXPR_11:.+]] = alloca i32, align 4 +// IR-NEXT: %[[DOTCAPTURE_EXPR_13:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTFLOOR_0_IV__FLOOR_0_IV_I:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTOMP_LB:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTOMP_UB:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18:.+]] = alloca i32, align 4 +// IR-NEXT: %[[DOTFLOOR_0_IV__FLOOR_0_IV_I17:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTTILE_0_IV__FLOOR_0_IV_I:.+]] = alloca i32, align 4 // IR-NEXT: %[[DOTTILE_0_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(ptr @2) +// IR-NEXT: %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:.+]]) // IR-NEXT: store i32 %[[START:.+]], ptr %[[START_ADDR]], align 4 // IR-NEXT: store i32 %[[END:.+]], ptr %[[END_ADDR]], align 4 // IR-NEXT: store i32 %[[STEP:.+]], ptr %[[STEP_ADDR]], align 4 @@ -52,53 +52,53 @@ extern "C" void body(...) {} // IR-NEXT: %[[TMP3:.+]] = load i32, ptr %[[END_ADDR]], align 4 // IR-NEXT: store i32 %[[TMP3]], ptr %[[DOTCAPTURE_EXPR_1]], align 4 // IR-NEXT: %[[TMP4:.+]] = load i32, ptr %[[STEP_ADDR]], align 4 -// IR-NEXT: store i32 %[[TMP4]], ptr %[[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: store i32 %[[TMP4]], ptr %[[DOTNEW_STEP]], align 4 // IR-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_1]], align 4 // IR-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 // IR-NEXT: %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]] -// IR-NEXT: %[[SUB4:.+]] = sub i32 %[[SUB]], 1 -// IR-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]] -// IR-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: %[[SUB3:.+]] = sub i32 %[[SUB]], 1 +// IR-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTNEW_STEP]], align 4 +// IR-NEXT: %[[ADD:.+]] = add i32 %[[SUB3]], %[[TMP7]] +// IR-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTNEW_STEP]], align 4 // IR-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]] -// IR-NEXT: %[[SUB5:.+]] = sub i32 %[[DIV]], 1 -// IR-NEXT: store i32 %[[SUB5]], ptr %[[DOTCAPTURE_EXPR_3]], align 4 +// IR-NEXT: %[[SUB4:.+]] = sub i32 %[[DIV]], 1 +// IR-NEXT: store i32 %[[SUB4]], ptr %[[DOTCAPTURE_EXPR_2]], align 4 // IR-NEXT: store i32 0, ptr %[[DOTFLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD7:.+]] = add i32 %[[TMP9]], 1 -// IR-NEXT: store i32 %[[ADD7]], ptr %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[TMP10:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[SUB9:.+]] = sub i32 %[[TMP10]], -3 -// IR-NEXT: %[[DIV10:.+]] = udiv i32 %[[SUB9]], 4 -// IR-NEXT: %[[SUB11:.+]] = sub i32 %[[DIV10]], 1 -// IR-NEXT: store i32 %[[SUB11]], ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: %[[TMP11:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: %[[ADD13:.+]] = add i32 %[[TMP11]], 1 -// IR-NEXT: store i32 %[[ADD13]], ptr %[[DOTCAPTURE_EXPR_12]], align 4 -// IR-NEXT: %[[TMP12:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_12]], align 4 -// IR-NEXT: %[[SUB15:.+]] = sub i32 %[[TMP12]], -2 -// IR-NEXT: %[[DIV16:.+]] = udiv i32 %[[SUB15]], 3 -// IR-NEXT: %[[SUB17:.+]] = sub i32 %[[DIV16]], 1 -// IR-NEXT: store i32 %[[SUB17]], ptr %[[DOTCAPTURE_EXPR_14]], align 4 +// IR-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: %[[ADD6:.+]] = add i32 %[[TMP9]], 1 +// IR-NEXT: store i32 %[[ADD6]], ptr %[[DOTCAPTURE_EXPR_5]], align 4 +// IR-NEXT: %[[TMP10:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_5]], align 4 +// IR-NEXT: %[[SUB8:.+]] = sub i32 %[[TMP10]], -3 +// IR-NEXT: %[[DIV9:.+]] = udiv i32 %[[SUB8]], 4 +// IR-NEXT: %[[SUB10:.+]] = sub i32 %[[DIV9]], 1 +// IR-NEXT: store i32 %[[SUB10]], ptr %[[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: %[[TMP11:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: %[[ADD12:.+]] = add i32 %[[TMP11]], 1 +// IR-NEXT: store i32 %[[ADD12]], ptr %[[DOTCAPTURE_EXPR_11]], align 4 +// IR-NEXT: %[[TMP12:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_11]], align 4 +// IR-NEXT: %[[SUB14:.+]] = sub i32 %[[TMP12]], -2 +// IR-NEXT: %[[DIV15:.+]] = udiv i32 %[[SUB14]], 3 +// IR-NEXT: %[[SUB16:.+]] = sub i32 %[[DIV15]], 1 +// IR-NEXT: store i32 %[[SUB16]], ptr %[[DOTCAPTURE_EXPR_13]], align 4 // IR-NEXT: store i32 0, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[TMP13:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_12]], align 4 +// IR-NEXT: %[[TMP13:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_11]], align 4 // IR-NEXT: %[[CMP:.+]] = icmp ult i32 0, %[[TMP13]] // IR-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]] // IR-EMPTY: // IR-NEXT: [[OMP_PRECOND_THEN]]: // IR-NEXT: store i32 0, ptr %[[DOTOMP_LB]], align 4 -// IR-NEXT: %[[TMP14:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_14]], align 4 +// IR-NEXT: %[[TMP14:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_13]], align 4 // IR-NEXT: store i32 %[[TMP14]], ptr %[[DOTOMP_UB]], align 4 // IR-NEXT: store i32 1, ptr %[[DOTOMP_STRIDE]], align 4 // IR-NEXT: store i32 0, ptr %[[DOTOMP_IS_LAST]], align 4 -// IR-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[TMP0]], i32 34, ptr %[[DOTOMP_IS_LAST]], ptr %[[DOTOMP_LB]], ptr %[[DOTOMP_UB]], ptr %[[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:.+]], i32 %[[TMP0]], i32 34, ptr %[[DOTOMP_IS_LAST]], ptr %[[DOTOMP_LB]], ptr %[[DOTOMP_UB]], ptr %[[DOTOMP_STRIDE]], i32 1, i32 1) // IR-NEXT: %[[TMP15:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[TMP16:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_14]], align 4 -// IR-NEXT: %[[CMP19:.+]] = icmp ugt i32 %[[TMP15]], %[[TMP16]] -// IR-NEXT: br i1 %[[CMP19]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] +// IR-NEXT: %[[TMP16:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_13]], align 4 +// IR-NEXT: %[[CMP18:.+]] = icmp ugt i32 %[[TMP15]], %[[TMP16]] +// IR-NEXT: br i1 %[[CMP18]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] // IR-EMPTY: // IR-NEXT: [[COND_TRUE]]: -// IR-NEXT: %[[TMP17:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_14]], align 4 +// IR-NEXT: %[[TMP17:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_13]], align 4 // IR-NEXT: br label %[[COND_END:.+]] // IR-EMPTY: // IR-NEXT: [[COND_FALSE]]: @@ -115,83 +115,83 @@ extern "C" void body(...) {} // IR-NEXT: [[OMP_INNER_FOR_COND]]: // IR-NEXT: %[[TMP20:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 // IR-NEXT: %[[TMP21:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[ADD20:.+]] = add i32 %[[TMP21]], 1 -// IR-NEXT: %[[CMP21:.+]] = icmp ult i32 %[[TMP20]], %[[ADD20]] -// IR-NEXT: br i1 %[[CMP21]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] +// IR-NEXT: %[[ADD19:.+]] = add i32 %[[TMP21]], 1 +// IR-NEXT: %[[CMP20:.+]] = icmp ult i32 %[[TMP20]], %[[ADD19]] +// IR-NEXT: br i1 %[[CMP20]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] // IR-EMPTY: // IR-NEXT: [[OMP_INNER_FOR_BODY]]: // IR-NEXT: %[[TMP22:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 // IR-NEXT: %[[MUL:.+]] = mul i32 %[[TMP22]], 3 -// IR-NEXT: %[[ADD22:.+]] = add i32 0, %[[MUL]] -// IR-NEXT: store i32 %[[ADD22]], ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18]], align 4 -// IR-NEXT: %[[TMP23:.+]] = load i32, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18]], align 4 +// IR-NEXT: %[[ADD21:.+]] = add i32 0, %[[MUL]] +// IR-NEXT: store i32 %[[ADD21]], ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I17]], align 4 +// IR-NEXT: %[[TMP23:.+]] = load i32, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I17]], align 4 // IR-NEXT: store i32 %[[TMP23]], ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 // IR-NEXT: br label %[[FOR_COND:.+]] // IR-EMPTY: // IR-NEXT: [[FOR_COND]]: // IR-NEXT: %[[TMP24:.+]] = load i32, ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[TMP25:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: %[[ADD23:.+]] = add i32 %[[TMP25]], 1 -// IR-NEXT: %[[TMP26:.+]] = load i32, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18]], align 4 -// IR-NEXT: %[[ADD24:.+]] = add i32 %[[TMP26]], 3 -// IR-NEXT: %[[CMP25:.+]] = icmp ult i32 %[[ADD23]], %[[ADD24]] -// IR-NEXT: br i1 %[[CMP25]], label %[[COND_TRUE26:.+]], label %[[COND_FALSE28:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_TRUE26]]: -// IR-NEXT: %[[TMP27:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: %[[ADD27:.+]] = add i32 %[[TMP27]], 1 -// IR-NEXT: br label %[[COND_END30:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_FALSE28]]: -// IR-NEXT: %[[TMP28:.+]] = load i32, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18]], align 4 -// IR-NEXT: %[[ADD29:.+]] = add i32 %[[TMP28]], 3 -// IR-NEXT: br label %[[COND_END30]] -// IR-EMPTY: -// IR-NEXT: [[COND_END30]]: -// IR-NEXT: %[[COND31:.+]] = phi i32 [ %[[ADD27]], %[[COND_TRUE26]] ], [ %[[ADD29]], %[[COND_FALSE28]] ] -// IR-NEXT: %[[CMP32:.+]] = icmp ult i32 %[[TMP24]], %[[COND31]] -// IR-NEXT: br i1 %[[CMP32]], label %[[FOR_BODY:.+]], label %[[FOR_END51:.+]] +// IR-NEXT: %[[TMP25:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: %[[ADD22:.+]] = add i32 %[[TMP25]], 1 +// IR-NEXT: %[[TMP26:.+]] = load i32, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I17]], align 4 +// IR-NEXT: %[[ADD23:.+]] = add i32 %[[TMP26]], 3 +// IR-NEXT: %[[CMP24:.+]] = icmp ult i32 %[[ADD22]], %[[ADD23]] +// IR-NEXT: br i1 %[[CMP24]], label %[[COND_TRUE25:.+]], label %[[COND_FALSE27:.+]] +// IR-EMPTY: +// IR-NEXT: [[COND_TRUE25]]: +// IR-NEXT: %[[TMP27:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: %[[ADD26:.+]] = add i32 %[[TMP27]], 1 +// IR-NEXT: br label %[[COND_END29:.+]] +// IR-EMPTY: +// IR-NEXT: [[COND_FALSE27]]: +// IR-NEXT: %[[TMP28:.+]] = load i32, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I17]], align 4 +// IR-NEXT: %[[ADD28:.+]] = add i32 %[[TMP28]], 3 +// IR-NEXT: br label %[[COND_END29]] +// IR-EMPTY: +// IR-NEXT: [[COND_END29]]: +// IR-NEXT: %[[COND30:.+]] = phi i32 [ %[[ADD26]], %[[COND_TRUE25]] ], [ %[[ADD28]], %[[COND_FALSE27]] ] +// IR-NEXT: %[[CMP31:.+]] = icmp ult i32 %[[TMP24]], %[[COND30]] +// IR-NEXT: br i1 %[[CMP31]], label %[[FOR_BODY:.+]], label %[[FOR_END50:.+]] // IR-EMPTY: // IR-NEXT: [[FOR_BODY]]: // IR-NEXT: %[[TMP29:.+]] = load i32, ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[MUL33:.+]] = mul i32 %[[TMP29]], 4 -// IR-NEXT: %[[ADD34:.+]] = add i32 0, %[[MUL33]] -// IR-NEXT: store i32 %[[ADD34]], ptr %[[DOTFLOOR_0_IV_I]], align 4 +// IR-NEXT: %[[MUL32:.+]] = mul i32 %[[TMP29]], 4 +// IR-NEXT: %[[ADD33:.+]] = add i32 0, %[[MUL32]] +// IR-NEXT: store i32 %[[ADD33]], ptr %[[DOTFLOOR_0_IV_I]], align 4 // IR-NEXT: %[[TMP30:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I]], align 4 // IR-NEXT: store i32 %[[TMP30]], ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND35:.+]] +// IR-NEXT: br label %[[FOR_COND34:.+]] // IR-EMPTY: -// IR-NEXT: [[FOR_COND35]]: +// IR-NEXT: [[FOR_COND34]]: // IR-NEXT: %[[TMP31:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: %[[TMP32:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD36:.+]] = add i32 %[[TMP32]], 1 +// IR-NEXT: %[[TMP32:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: %[[ADD35:.+]] = add i32 %[[TMP32]], 1 // IR-NEXT: %[[TMP33:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[ADD37:.+]] = add nsw i32 %[[TMP33]], 4 -// IR-NEXT: %[[CMP38:.+]] = icmp ult i32 %[[ADD36]], %[[ADD37]] -// IR-NEXT: br i1 %[[CMP38]], label %[[COND_TRUE39:.+]], label %[[COND_FALSE41:.+]] +// IR-NEXT: %[[ADD36:.+]] = add i32 %[[TMP33]], 4 +// IR-NEXT: %[[CMP37:.+]] = icmp ult i32 %[[ADD35]], %[[ADD36]] +// IR-NEXT: br i1 %[[CMP37]], label %[[COND_TRUE38:.+]], label %[[COND_FALSE40:.+]] // IR-EMPTY: -// IR-NEXT: [[COND_TRUE39]]: -// IR-NEXT: %[[TMP34:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD40:.+]] = add i32 %[[TMP34]], 1 -// IR-NEXT: br label %[[COND_END43:.+]] +// IR-NEXT: [[COND_TRUE38]]: +// IR-NEXT: %[[TMP34:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: %[[ADD39:.+]] = add i32 %[[TMP34]], 1 +// IR-NEXT: br label %[[COND_END42:.+]] // IR-EMPTY: -// IR-NEXT: [[COND_FALSE41]]: +// IR-NEXT: [[COND_FALSE40]]: // IR-NEXT: %[[TMP35:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[ADD42:.+]] = add nsw i32 %[[TMP35]], 4 -// IR-NEXT: br label %[[COND_END43]] +// IR-NEXT: %[[ADD41:.+]] = add i32 %[[TMP35]], 4 +// IR-NEXT: br label %[[COND_END42]] // IR-EMPTY: -// IR-NEXT: [[COND_END43]]: -// IR-NEXT: %[[COND44:.+]] = phi i32 [ %[[ADD40]], %[[COND_TRUE39]] ], [ %[[ADD42]], %[[COND_FALSE41]] ] -// IR-NEXT: %[[CMP45:.+]] = icmp ult i32 %[[TMP31]], %[[COND44]] -// IR-NEXT: br i1 %[[CMP45]], label %[[FOR_BODY46:.+]], label %[[FOR_END:.+]] +// IR-NEXT: [[COND_END42]]: +// IR-NEXT: %[[COND43:.+]] = phi i32 [ %[[ADD39]], %[[COND_TRUE38]] ], [ %[[ADD41]], %[[COND_FALSE40]] ] +// IR-NEXT: %[[CMP44:.+]] = icmp ult i32 %[[TMP31]], %[[COND43]] +// IR-NEXT: br i1 %[[CMP44]], label %[[FOR_BODY45:.+]], label %[[FOR_END:.+]] // IR-EMPTY: -// IR-NEXT: [[FOR_BODY46]]: +// IR-NEXT: [[FOR_BODY45]]: // IR-NEXT: %[[TMP36:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 // IR-NEXT: %[[TMP37:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: %[[TMP38:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[MUL47:.+]] = mul i32 %[[TMP37]], %[[TMP38]] -// IR-NEXT: %[[ADD48:.+]] = add i32 %[[TMP36]], %[[MUL47]] -// IR-NEXT: store i32 %[[ADD48]], ptr %[[I]], align 4 +// IR-NEXT: %[[TMP38:.+]] = load i32, ptr %[[DOTNEW_STEP]], align 4 +// IR-NEXT: %[[MUL46:.+]] = mul i32 %[[TMP37]], %[[TMP38]] +// IR-NEXT: %[[ADD47:.+]] = add i32 %[[TMP36]], %[[MUL46]] +// IR-NEXT: store i32 %[[ADD47]], ptr %[[I]], align 4 // IR-NEXT: %[[TMP39:.+]] = load i32, ptr %[[START_ADDR]], align 4 // IR-NEXT: %[[TMP40:.+]] = load i32, ptr %[[END_ADDR]], align 4 // IR-NEXT: %[[TMP41:.+]] = load i32, ptr %[[STEP_ADDR]], align 4 @@ -201,20 +201,20 @@ extern "C" void body(...) {} // IR-EMPTY: // IR-NEXT: [[FOR_INC]]: // IR-NEXT: %[[TMP43:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP43]], 1 +// IR-NEXT: %[[INC:.+]] = add i32 %[[TMP43]], 1 // IR-NEXT: store i32 %[[INC]], ptr %[[DOTTILE_0_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND35]], !llvm.loop ![[LOOP2:[0-9]+]] +// IR-NEXT: br label %[[FOR_COND34]], !llvm.loop ![[LOOP3:[0-9]+]] // IR-EMPTY: // IR-NEXT: [[FOR_END]]: -// IR-NEXT: br label %[[FOR_INC49:.+]] +// IR-NEXT: br label %[[FOR_INC48:.+]] // IR-EMPTY: -// IR-NEXT: [[FOR_INC49]]: +// IR-NEXT: [[FOR_INC48]]: // IR-NEXT: %[[TMP44:.+]] = load i32, ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 -// IR-NEXT: %[[INC50:.+]] = add i32 %[[TMP44]], 1 -// IR-NEXT: store i32 %[[INC50]], ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP4:[0-9]+]] +// IR-NEXT: %[[INC49:.+]] = add i32 %[[TMP44]], 1 +// IR-NEXT: store i32 %[[INC49]], ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4 +// IR-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP5:[0-9]+]] // IR-EMPTY: -// IR-NEXT: [[FOR_END51]]: +// IR-NEXT: [[FOR_END50]]: // IR-NEXT: br label %[[OMP_BODY_CONTINUE:.+]] // IR-EMPTY: // IR-NEXT: [[OMP_BODY_CONTINUE]]: @@ -222,21 +222,23 @@ extern "C" void body(...) {} // IR-EMPTY: // IR-NEXT: [[OMP_INNER_FOR_INC]]: // IR-NEXT: %[[TMP45:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[ADD52:.+]] = add i32 %[[TMP45]], 1 -// IR-NEXT: store i32 %[[ADD52]], ptr %[[DOTOMP_IV]], align 4 +// IR-NEXT: %[[ADD51:.+]] = add i32 %[[TMP45]], 1 +// IR-NEXT: store i32 %[[ADD51]], ptr %[[DOTOMP_IV]], align 4 // IR-NEXT: br label %[[OMP_INNER_FOR_COND]] // IR-EMPTY: // IR-NEXT: [[OMP_INNER_FOR_END]]: // IR-NEXT: br label %[[OMP_LOOP_EXIT:.+]] // IR-EMPTY: // IR-NEXT: [[OMP_LOOP_EXIT]]: -// IR-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[TMP0]]) +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 %[[TMP0]]) // IR-NEXT: br label %[[OMP_PRECOND_END]] // IR-EMPTY: // IR-NEXT: [[OMP_PRECOND_END]]: -// IR-NEXT: call void @__kmpc_barrier(ptr @3, i32 %[[TMP0]]) +// IR-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:.+]], i32 %[[TMP0]]) // IR-NEXT: ret void // IR-NEXT: } + + extern "C" void func(int start, int end, int step) { #pragma omp for #pragma omp tile sizes(3) @@ -246,8 +248,10 @@ extern "C" void func(int start, int end, int step) { } #endif /* HEADER */ + // IR: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// IR: ![[META1:[0-9]+]] = !{!"{{[^"]*}}"} -// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]]} -// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"} -// IR: ![[LOOP4]] = distinct !{![[LOOP4]], ![[LOOPPROP3]]} +// IR: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} +// IR: ![[META2:[0-9]+]] = +// IR: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]]} +// IR: ![[LOOPPROP4]] = !{!"llvm.loop.mustprogress"} +// IR: ![[LOOP5]] = distinct !{![[LOOP5]], ![[LOOPPROP4]]} diff --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg index e8f7f3470580e..14c7468982137 100644 --- a/openmp/runtime/test/lit.cfg +++ b/openmp/runtime/test/lit.cfg @@ -171,10 +171,14 @@ config.substitutions.append(("%libomp-c99-compile-and-run", \ "%libomp-c99-compile && %libomp-run")) config.substitutions.append(("%libomp-cxx-compile-and-run", \ "%libomp-cxx-compile && %libomp-run")) +config.substitutions.append(("%libomp-cxx20-compile-and-run", \ + "%libomp-cxx20-compile && %libomp-run")) config.substitutions.append(("%libomp-cxx-compile-c", \ "%clangXX %openmp_flags %flags -std=c++17 -x c++ %s -o %t" + libs)) config.substitutions.append(("%libomp-cxx-compile", \ "%clangXX %openmp_flags %flags -std=c++17 %s -o %t" + libs)) +config.substitutions.append(("%libomp-cxx20-compile", \ + "%clangXX %openmp_flags %flags -std=c++20 %s -o %t" + libs)) config.substitutions.append(("%libomp-compile", \ "%clang %openmp_flags %flags %s -o %t" + libs)) config.substitutions.append(("%libomp-irbuilder-compile", \ diff --git a/openmp/runtime/test/transform/tile/foreach.cpp b/openmp/runtime/test/transform/tile/foreach.cpp new file mode 100644 index 0000000000000..4fb3595760974 --- /dev/null +++ b/openmp/runtime/test/transform/tile/foreach.cpp @@ -0,0 +1,228 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp tile sizes(2, 2) + for (Reporter c{"C"}; auto &&v : Reporter("A")) + for (Reporter d{"D"}; auto &&w : Reporter("B")) + printf("v=%d w=%d\n", v, w); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [C] ctor +// CHECK-NEXT: [A] ctor +// CHECK-NEXT: [A] end() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] iterator distance: 3 +// CHECK-NEXT: [D] ctor +// CHECK-NEXT: [B] ctor +// CHECK-NEXT: [B] end() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] iterator distance: 3 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: v=0 w=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: v=0 w=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: v=1 w=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: v=1 w=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: v=0 w=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: v=1 w=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: v=2 w=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: v=2 w=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: v=2 w=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] dtor +// CHECK-NEXT: [D] dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] dtor +// CHECK-NEXT: [C] dtor +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/tile/iterfor.cpp b/openmp/runtime/test/transform/tile/iterfor.cpp new file mode 100644 index 0000000000000..12613544f6e5b --- /dev/null +++ b/openmp/runtime/test/transform/tile/iterfor.cpp @@ -0,0 +1,233 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + bool operator!=(const Iterator &that) const { + owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); + { + Reporter A("A"), B("B"); +#pragma omp tile sizes(2, 2) + for (auto it = A.begin(); it != A.end(); ++it) + for (auto jt = B.begin(); jt != B.end(); ++jt) + printf("i=%d j=%d\n", *it, *jt); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [A] ctor +// CHECK-NEXT: [B] ctor +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] end() +// CHECK-NEXT: [A] iterator distance: 3 +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] end() +// CHECK-NEXT: [B] iterator distance: 3 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=0 j=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=0 j=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=1 j=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=1 j=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=0 j=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=1 j=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=2 j=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=2 j=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=2 j=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] dtor +// CHECK-NEXT: [A] dtor +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-foreach.cpp new file mode 100644 index 0000000000000..b1f4d98a52ddc --- /dev/null +++ b/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-foreach.cpp @@ -0,0 +1,366 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(3) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp tile sizes(2, 2) + for (Reporter c{"C"}; auto &&v : Reporter("A")) + for (Reporter d{"D"}; auto &&w : Reporter("B")) + printf("i=%d v=%d w=%d\n", i, v, w); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [C] ctor +// CHECK-NEXT: [A] ctor +// CHECK-NEXT: [A] end() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] iterator distance: 3 +// CHECK-NEXT: [D] ctor +// CHECK-NEXT: [B] ctor +// CHECK-NEXT: [B] end() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] iterator distance: 3 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=0 v=0 w=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=0 v=0 w=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=0 v=1 w=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=0 v=1 w=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=0 v=0 w=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=0 v=1 w=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=0 v=2 w=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=0 v=2 w=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=0 v=2 w=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=1 v=0 w=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=1 v=0 w=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=1 v=1 w=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=1 v=1 w=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=1 v=0 w=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=1 v=1 w=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=1 v=2 w=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=1 v=2 w=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=1 v=2 w=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=2 v=0 w=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=2 v=0 w=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=2 v=1 w=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=2 v=1 w=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=2 v=0 w=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=2 v=1 w=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=2 v=2 w=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=2 v=2 w=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=2 v=2 w=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] dtor +// CHECK-NEXT: [D] dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] dtor +// CHECK-NEXT: [C] dtor +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/unroll/factor_foreach.cpp b/openmp/runtime/test/transform/unroll/factor_foreach.cpp new file mode 100644 index 0000000000000..29fef7c187362 --- /dev/null +++ b/openmp/runtime/test/transform/unroll/factor_foreach.cpp @@ -0,0 +1,162 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + bool operator!=(const Iterator &that) const { + owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos); + return this->pos != that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp unroll partial(2) + for (Reporter c{"init-stmt"}; auto &&v : Reporter("range")) + printf("v=%d\n", v); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [init-stmt] ctor +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] iterator 0 != 3 +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [range] iterator prefix ++ +// CHECK-NEXT: [range] iterator 1 != 3 +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [range] iterator prefix ++ +// CHECK-NEXT: [range] iterator 2 != 3 +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [range] iterator prefix ++ +// CHECK-NEXT: [range] iterator 3 != 3 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor +// CHECK-NEXT: [init-stmt] dtor +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/unroll/factor_intfor.c b/openmp/runtime/test/transform/unroll/factor_intfor.c new file mode 100644 index 0000000000000..42ebeb48e41c8 --- /dev/null +++ b/openmp/runtime/test/transform/unroll/factor_intfor.c @@ -0,0 +1,25 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp unroll partial(2) + for (int i = 7; i < 19; i += 3) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=7 +// CHECK-NEXT: i=10 +// CHECK-NEXT: i=13 +// CHECK-NEXT: i=16 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/unroll/factor_iterfor.cpp b/openmp/runtime/test/transform/unroll/factor_iterfor.cpp new file mode 100644 index 0000000000000..0298477110b25 --- /dev/null +++ b/openmp/runtime/test/transform/unroll/factor_iterfor.cpp @@ -0,0 +1,169 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + print("iterator move ctor"); + } + + ~Iterator() { print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + bool operator!=(const Iterator &that) const { + owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos); + return this->pos != that.pos; + } + + Iterator &operator++() { + print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); + { + Reporter range("range"); +#pragma omp unroll partial(2) + for (auto it = range.begin(); it != range.end(); ++it) + printf("v=%d\n", *it); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] iterator 0 != 3 +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [range] iterator prefix ++ +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] iterator 1 != 3 +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [range] iterator prefix ++ +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] iterator 2 != 3 +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [range] iterator prefix ++ +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] iterator 3 != 3 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/unroll/factor_parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/unroll/factor_parallel-wsloop-collapse-foreach.cpp new file mode 100644 index 0000000000000..71567faf79646 --- /dev/null +++ b/openmp/runtime/test/transform/unroll/factor_parallel-wsloop-collapse-foreach.cpp @@ -0,0 +1,199 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + bool operator!=(const Iterator &that) const { + owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos); + return this->pos != that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(2) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp unroll partial(2) + for (Reporter c{"init-stmt"}; auto &&v : Reporter("range")) + printf("i=%d v=%d\n", i, v); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [init-stmt] ctor +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] iterator distance: 3 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=0 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=0 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=0 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=1 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=1 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=1 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=2 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=2 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=2 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor +// CHECK-NEXT: [init-stmt] dtor +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/unroll/factor_parallel-wsloop-collapse-intfor.cpp b/openmp/runtime/test/transform/unroll/factor_parallel-wsloop-collapse-intfor.cpp new file mode 100644 index 0000000000000..0a31f8db07016 --- /dev/null +++ b/openmp/runtime/test/transform/unroll/factor_parallel-wsloop-collapse-intfor.cpp @@ -0,0 +1,32 @@ +// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(2) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp unroll partial(2) + for (int j = 0; j < 3; ++j) + printf("i=%d j=%d\n", i, j); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=0 j=0 +// CHECK-NEXT: i=0 j=1 +// CHECK-NEXT: i=0 j=2 +// CHECK-NEXT: i=1 j=0 +// CHECK-NEXT: i=1 j=1 +// CHECK-NEXT: i=1 j=2 +// CHECK-NEXT: i=2 j=0 +// CHECK-NEXT: i=2 j=1 +// CHECK-NEXT: i=2 j=2 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/unroll/full_intfor.c b/openmp/runtime/test/transform/unroll/full_intfor.c new file mode 100644 index 0000000000000..0814511091766 --- /dev/null +++ b/openmp/runtime/test/transform/unroll/full_intfor.c @@ -0,0 +1,25 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp unroll full + for (int i = 7; i < 19; i += 3) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=7 +// CHECK-NEXT: i=10 +// CHECK-NEXT: i=13 +// CHECK-NEXT: i=16 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/unroll/heuristic_intfor.c b/openmp/runtime/test/transform/unroll/heuristic_intfor.c new file mode 100644 index 0000000000000..b07bec7d82f0b --- /dev/null +++ b/openmp/runtime/test/transform/unroll/heuristic_intfor.c @@ -0,0 +1,25 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp unroll + for (int i = 7; i < 19; i += 3) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=7 +// CHECK-NEXT: i=10 +// CHECK-NEXT: i=13 +// CHECK-NEXT: i=16 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/unroll/partial_intfor.c b/openmp/runtime/test/transform/unroll/partial_intfor.c new file mode 100644 index 0000000000000..2ede94e70e12d --- /dev/null +++ b/openmp/runtime/test/transform/unroll/partial_intfor.c @@ -0,0 +1,25 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp unroll partial + for (int i = 7; i < 19; i += 3) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=7 +// CHECK-NEXT: i=10 +// CHECK-NEXT: i=13 +// CHECK-NEXT: i=16 +// CHECK-NEXT: done From b15caff37dfd8a30551d5a250969975e720ffa33 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 21 May 2024 15:33:47 +0200 Subject: [PATCH 02/12] [Clang][OpenMP] Add reverse directive --- clang/include/clang-c/Index.h | 4 + clang/include/clang/AST/RecursiveASTVisitor.h | 3 + clang/include/clang/AST/StmtOpenMP.h | 72 +- clang/include/clang/Basic/StmtNodes.td | 1 + clang/include/clang/Sema/SemaOpenMP.h | 5 + .../include/clang/Serialization/ASTBitCodes.h | 1 + clang/lib/AST/StmtOpenMP.cpp | 19 + clang/lib/AST/StmtPrinter.cpp | 5 + clang/lib/AST/StmtProfile.cpp | 4 + clang/lib/Basic/OpenMPKinds.cpp | 3 +- clang/lib/CodeGen/CGStmt.cpp | 3 + clang/lib/CodeGen/CGStmtOpenMP.cpp | 8 + clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/lib/Parse/ParseOpenMP.cpp | 2 + clang/lib/Sema/SemaExceptionSpec.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 190 ++ clang/lib/Sema/TreeTransform.h | 11 + clang/lib/Serialization/ASTReaderStmt.cpp | 12 + clang/lib/Serialization/ASTWriterStmt.cpp | 5 + clang/test/OpenMP/reverse_ast_print.cpp | 159 ++ clang/test/OpenMP/reverse_codegen.cpp | 1554 +++++++++++++++++ clang/test/OpenMP/reverse_messages.cpp | 40 + clang/tools/libclang/CIndex.cpp | 7 + clang/tools/libclang/CXCursor.cpp | 3 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 3 + .../test/transform/reverse/foreach.cpp | 162 ++ .../runtime/test/transform/reverse/intfor.c | 25 + .../test/transform/reverse/iterfor.cpp | 164 ++ .../parallel-wsloop-collapse-foreach.cpp | 285 +++ .../parallel-wsloop-collapse-intfor.cpp | 51 + 30 files changed, 2800 insertions(+), 3 deletions(-) create mode 100644 clang/test/OpenMP/reverse_ast_print.cpp create mode 100644 clang/test/OpenMP/reverse_codegen.cpp create mode 100644 clang/test/OpenMP/reverse_messages.cpp create mode 100644 openmp/runtime/test/transform/reverse/foreach.cpp create mode 100644 openmp/runtime/test/transform/reverse/intfor.c create mode 100644 openmp/runtime/test/transform/reverse/iterfor.cpp create mode 100644 openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-foreach.cpp create mode 100644 openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-intfor.cpp diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 365b607c74117..c7d63818ece23 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2146,6 +2146,10 @@ enum CXCursorKind { */ CXCursor_OMPScopeDirective = 306, + /** OpenMP reverse directive. + */ + CXCursor_OMPReverseDirective = 307, + /** OpenACC Compute Construct. */ CXCursor_OpenACCComputeConstruct = 320, diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index f5cefedb07e0e..06b29d59785f6 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3021,6 +3021,9 @@ DEF_TRAVERSE_STMT(OMPTileDirective, DEF_TRAVERSE_STMT(OMPUnrollDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPReverseDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPForDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index f735fa5643aec..4be2e2d3a4605 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -1007,8 +1007,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { Stmt *getPreInits() const; static bool classof(const Stmt *T) { - return T->getStmtClass() == OMPTileDirectiveClass || - T->getStmtClass() == OMPUnrollDirectiveClass; + Stmt::StmtClass C = T->getStmtClass(); + return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass || + C == OMPReverseDirectiveClass; } }; @@ -5711,6 +5712,73 @@ class OMPUnrollDirective final : public OMPLoopTransformationDirective { } }; +/// Represents the '#pragma omp reverse' loop transformation directive. +/// +/// \code +/// #pragma omp reverse +/// for (int i = 0; i < n; ++i) +/// ... +/// \endcode +class OMPReverseDirective final : public OMPLoopTransformationDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + + /// Offsets of child members. + enum { + PreInitsOffset = 0, + TransformedStmtOffset, + }; + + explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc) + : OMPLoopTransformationDirective(OMPReverseDirectiveClass, + llvm::omp::OMPD_reverse, StartLoc, + EndLoc, 1) {} + + void setPreInits(Stmt *PreInits) { + Data->getChildren()[PreInitsOffset] = PreInits; + } + + void setTransformedStmt(Stmt *S) { + Data->getChildren()[TransformedStmtOffset] = S; + } + +public: + /// Create a new AST node representation for '#pragma omp reverse'. + /// + /// \param C Context of the AST. + /// \param StartLoc Location of the introducer (e.g. the 'omp' token). + /// \param EndLoc Location of the directive's end (e.g. the tok::eod). + /// \param Clauses The directive's clauses. + /// \param AssociatedStmt The outermost associated loop. + /// \param TransformedStmt The loop nest after tiling, or nullptr in + /// dependent contexts. + /// \param PreInits Helper preinits statements for the loop nest. + static OMPReverseDirective * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef Clauses, Stmt *AssociatedStmt, + Stmt *TransformedStmt, Stmt *PreInits); + + /// Build an empty '#pragma omp reverse' AST node for deserialization. + /// + /// \param C Context of the AST. + /// \param NumClauses Number of clauses to allocate. + static OMPReverseDirective *CreateEmpty(const ASTContext &C, + unsigned NumClauses); + + /// Gets/sets the associated loops after the transformation, i.e. after + /// de-sugaring. + Stmt *getTransformedStmt() const { + return Data->getChildren()[TransformedStmtOffset]; + } + + /// Return preinits statement. + Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPReverseDirectiveClass; + } +}; + /// This represents '#pragma omp scan' directive. /// /// \code diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index 305f19daa4a92..b2e2be5c998bb 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -229,6 +229,7 @@ def OMPSimdDirective : StmtNode; def OMPLoopTransformationDirective : StmtNode; def OMPTileDirective : StmtNode; def OMPUnrollDirective : StmtNode; +def OMPReverseDirective : StmtNode; def OMPForDirective : StmtNode; def OMPForSimdDirective : StmtNode; def OMPSectionsDirective : StmtNode; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 51981e1c9a8b9..e36a90ba4e1b9 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -422,6 +422,11 @@ class SemaOpenMP : public SemaBase { StmtResult ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed '#pragma omp reverse' after parsing of its clauses + /// and the associated statement. + StmtResult ActOnOpenMPReverseDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed '\#pragma omp for' after parsing /// of the associated statement. StmtResult diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index fe1bd47348be1..dee0d073557cc 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1856,6 +1856,7 @@ enum StmtCode { STMT_OMP_SIMD_DIRECTIVE, STMT_OMP_TILE_DIRECTIVE, STMT_OMP_UNROLL_DIRECTIVE, + STMT_OMP_REVERSE_DIRECTIVE, STMT_OMP_FOR_DIRECTIVE, STMT_OMP_FOR_SIMD_DIRECTIVE, STMT_OMP_SECTIONS_DIRECTIVE, diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index d8519b2071e6d..0be0d9d2cfa94 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -449,6 +449,25 @@ OMPUnrollDirective *OMPUnrollDirective::CreateEmpty(const ASTContext &C, SourceLocation(), SourceLocation()); } +OMPReverseDirective * +OMPReverseDirective::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses, Stmt *AssociatedStmt, + Stmt *TransformedStmt, Stmt *PreInits) { + OMPReverseDirective *Dir = createDirective( + C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); + Dir->setTransformedStmt(TransformedStmt); + Dir->setPreInits(PreInits); + return Dir; +} + +OMPReverseDirective *OMPReverseDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses) { + return createEmptyDirective( + C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, + SourceLocation(), SourceLocation()); +} + OMPForSimdDirective * OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index be2d5a2eb6b46..64b481f680311 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -763,6 +763,11 @@ void StmtPrinter::VisitOMPUnrollDirective(OMPUnrollDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPReverseDirective(OMPReverseDirective *Node) { + Indent() << "#pragma omp reverse"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) { Indent() << "#pragma omp for"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index caab4ab0ef160..f0e1c9548de72 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -985,6 +985,10 @@ void StmtProfiler::VisitOMPUnrollDirective(const OMPUnrollDirective *S) { VisitOMPLoopTransformationDirective(S); } +void StmtProfiler::VisitOMPReverseDirective(const OMPReverseDirective *S) { + VisitOMPLoopTransformationDirective(S); +} + void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) { VisitOMPLoopDirective(S); } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index b3e9affbb3e58..803808c38e2fe 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -684,7 +684,7 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { } bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { - return DKind == OMPD_tile || DKind == OMPD_unroll; + return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse; } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { @@ -808,6 +808,7 @@ void clang::getOpenMPCaptureRegions( break; case OMPD_tile: case OMPD_unroll: + case OMPD_reverse: // loop transformations do not introduce captures. break; case OMPD_threadprivate: diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 99daaa14cf3fe..93c2f8900dd12 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -222,6 +222,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::OMPUnrollDirectiveClass: EmitOMPUnrollDirective(cast(*S)); break; + case Stmt::OMPReverseDirectiveClass: + EmitOMPReverseDirective(cast(*S)); + break; case Stmt::OMPForDirectiveClass: EmitOMPForDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 6410f9e102c90..ad6c044aa483b 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -187,6 +187,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { PreInits = Tile->getPreInits(); } else if (const auto *Unroll = dyn_cast(&S)) { PreInits = Unroll->getPreInits(); + } else if (const auto *Reverse = dyn_cast(&S)) { + PreInits = Reverse->getPreInits(); } else { llvm_unreachable("Unknown loop-based directive kind."); } @@ -2762,6 +2764,12 @@ void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { EmitStmt(S.getTransformedStmt()); } +void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) { + // Emit the de-sugared statement. + OMPTransformDirectiveScopeRAII ReverseScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 5f3ee7eb943f9..ac738e1e82886 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3807,6 +3807,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPSimdDirective(const OMPSimdDirective &S); void EmitOMPTileDirective(const OMPTileDirective &S); void EmitOMPUnrollDirective(const OMPUnrollDirective &S); + void EmitOMPReverseDirective(const OMPReverseDirective &S); void EmitOMPForDirective(const OMPForDirective &S); void EmitOMPForSimdDirective(const OMPForSimdDirective &S); void EmitOMPSectionsDirective(const OMPSectionsDirective &S); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index e959dd6378f46..57fcf6ce520ac 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2384,6 +2384,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( case OMPD_simd: case OMPD_tile: case OMPD_unroll: + case OMPD_reverse: case OMPD_task: case OMPD_taskyield: case OMPD_barrier: @@ -2802,6 +2803,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( case OMPD_simd: case OMPD_tile: case OMPD_unroll: + case OMPD_reverse: case OMPD_for: case OMPD_for_simd: case OMPD_sections: diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 41bf273d12f2f..4de7183cde281 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1486,6 +1486,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPSimdDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPUnrollDirectiveClass: + case Stmt::OMPReverseDirectiveClass: case Stmt::OMPSingleDirectiveClass: case Stmt::OMPTargetDataDirectiveClass: case Stmt::OMPTargetDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 663dbb927250e..7b9898704eb1c 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4334,6 +4334,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, case OMPD_masked: case OMPD_tile: case OMPD_unroll: + case OMPD_reverse: break; case OMPD_loop: // TODO: 'loop' may require additional parameters depending on the binding. @@ -6546,6 +6547,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPUnrollDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); break; + case OMPD_reverse: + Res = ActOnOpenMPReverseDirective(ClausesWithImplicit, AStmt, StartLoc, + EndLoc); + break; case OMPD_for: Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); @@ -15121,6 +15126,8 @@ bool SemaOpenMP::checkTransformableLoopNest( DependentPreInits = Dir->getPreInits(); else if (auto *Dir = dyn_cast(Transform)) DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); else llvm_unreachable("Unhandled loop transformation"); if (!DependentPreInits) @@ -15746,6 +15753,189 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, buildPreInits(Context, PreInits)); } +StmtResult +SemaOpenMP::ActOnOpenMPReverseDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); + Scope *CurScope = SemaRef.getCurScope(); + assert(Clauses.empty() && "reverse directive does not accept any clauses; " + "must have beed checked before"); + + // Empty statement should only be possible if there already was an error. + if (!AStmt) + return StmtError(); + + constexpr unsigned NumLoops = 1; + Stmt *Body = nullptr; + SmallVector LoopHelpers( + NumLoops); + SmallVector, NumLoops + 1> OriginalInits; + if (!checkTransformableLoopNest(OMPD_reverse, AStmt, NumLoops, LoopHelpers, + Body, OriginalInits)) + return StmtError(); + + // Delay applying the transformation to when template is completely + // instantiated. + if (SemaRef.CurContext->isDependentContext()) + return OMPReverseDirective::Create(Context, StartLoc, EndLoc, Clauses, + AStmt, nullptr, nullptr); + + assert(LoopHelpers.size() == NumLoops && + "Expecting a single-dimensional loop iteration space"); + assert(OriginalInits.size() == NumLoops && + "Expecting a single-dimensional loop iteration space"); + OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers.front(); + + // Find the loop statement. + Stmt *LoopStmt = nullptr; + collectLoopStmts(AStmt, {LoopStmt}); + + // Determine the PreInit declarations. + SmallVector PreInits; + addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits); + + auto *IterationVarRef = cast(LoopHelper.IterationVarRef); + QualType IVTy = IterationVarRef->getType(); + uint64_t IVWidth = Context.getTypeSize(IVTy); + auto *OrigVar = cast(LoopHelper.Counters.front()); + + // Iteration variable SourceLocations. + SourceLocation OrigVarLoc = OrigVar->getExprLoc(); + SourceLocation OrigVarLocBegin = OrigVar->getBeginLoc(); + SourceLocation OrigVarLocEnd = OrigVar->getEndLoc(); + + // Locations pointing to the transformation. + SourceLocation TransformLoc = StartLoc; + SourceLocation TransformLocBegin = StartLoc; + SourceLocation TransformLocEnd = EndLoc; + + // Internal variable names. + std::string OrigVarName = OrigVar->getNameInfo().getAsString(); + std::string TripCountName = (Twine(".tripcount.") + OrigVarName).str(); + std::string ForwardIVName = (Twine(".forward.iv.") + OrigVarName).str(); + std::string ReversedIVName = (Twine(".reversed.iv.") + OrigVarName).str(); + + // LoopHelper.Updates will read the logical iteration number from + // LoopHelper.IterationVarRef, compute the value of the user loop counter of + // that logical iteration from it, then assign it to the user loop counter + // variable. We cannot directly use LoopHelper.IterationVarRef as the + // induction variable of the generated loop because it may cause an underflow: + // \code + // for (unsigned i = 0; i < n; ++i) + // body(i); + // \endcode + // + // Naive reversal: + // \code + // for (unsigned i = n-1; i >= 0; --i) + // body(i); + // \endcode + // + // Instead, we introduce a new iteration variable representing the logical + // iteration counter of the original loop, convert it to the logical iteration + // number of the reversed loop, then let LoopHelper.Updates compute the user's + // loop iteration variable from it. + // \code + // for (auto .forward.iv = 0; .forward.iv < n; ++.forward.iv) { + // auto .reversed.iv = n - .forward.iv - 1; + // i = (.reversed.iv + 0) * 1; // LoopHelper.Updates + // body(i); // Body + // } + // \endcode + + // Subexpressions with more than one use. One of the constraints of an AST is + // that every node object must appear at most once, hence we define a lambda + // that creates a new AST node at every use. + CaptureVars CopyTransformer(SemaRef); + auto MakeNumIterations = [&CopyTransformer, &LoopHelper]() -> Expr * { + return AssertSuccess( + CopyTransformer.TransformExpr(LoopHelper.NumIterations)); + }; + + // Create the iteration variable for the forward loop (from 0 to n-1). + VarDecl *ForwardIVDecl = + buildVarDecl(SemaRef, {}, IVTy, ForwardIVName, nullptr, OrigVar); + auto MakeForwardRef = [&SemaRef = this->SemaRef, ForwardIVDecl, IVTy, + OrigVarLoc]() { + return buildDeclRefExpr(SemaRef, ForwardIVDecl, IVTy, OrigVarLoc); + }; + + // Iteration variable for the reversed induction variable (from n-1 downto 0): + // Reuse the iteration variable created by checkOpenMPLoop. + auto *ReversedIVDecl = cast(IterationVarRef->getDecl()); + ReversedIVDecl->setDeclName( + &SemaRef.PP.getIdentifierTable().get(ReversedIVName)); + + // For init-statement: + // \code + // auto .forward.iv = 0 + // \endcode + IntegerLiteral *Zero = + IntegerLiteral::Create(Context, llvm::APInt::getZero(IVWidth), + ForwardIVDecl->getType(), OrigVarLoc); + SemaRef.AddInitializerToDecl(ForwardIVDecl, Zero, /*DirectInit=*/false); + StmtResult Init = new (Context) + DeclStmt(DeclGroupRef(ForwardIVDecl), OrigVarLocBegin, OrigVarLocEnd); + if (!Init.isUsable()) + return StmtError(); + + // Forward iv cond-expression: + // \code + // .forward.iv < NumIterations + // \endcode + ExprResult Cond = + SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + MakeForwardRef(), MakeNumIterations()); + if (!Cond.isUsable()) + return StmtError(); + + // Forward incr-statement: ++.forward.iv + ExprResult Incr = SemaRef.BuildUnaryOp(CurScope, LoopHelper.Inc->getExprLoc(), + UO_PreInc, MakeForwardRef()); + if (!Incr.isUsable()) + return StmtError(); + + // Reverse the forward-iv: auto .reversed.iv = MakeNumIterations() - 1 - + // .forward.iv + IntegerLiteral *One = IntegerLiteral::Create(Context, llvm::APInt(IVWidth, 1), + IVTy, TransformLoc); + ExprResult Minus = SemaRef.BuildBinOp(CurScope, TransformLoc, BO_Sub, + MakeNumIterations(), One); + if (!Minus.isUsable()) + return StmtError(); + Minus = SemaRef.BuildBinOp(CurScope, TransformLoc, BO_Sub, Minus.get(), + MakeForwardRef()); + if (!Minus.isUsable()) + return StmtError(); + StmtResult InitReversed = new (Context) DeclStmt( + DeclGroupRef(ReversedIVDecl), TransformLocBegin, TransformLocEnd); + if (!InitReversed.isUsable()) + return StmtError(); + SemaRef.AddInitializerToDecl(ReversedIVDecl, Minus.get(), + /*DirectInit=*/false); + + // The new loop body. + SmallVector BodyStmts; + BodyStmts.push_back(InitReversed.get()); + llvm::append_range(BodyStmts, LoopHelper.Updates); + if (auto *CXXRangeFor = dyn_cast(LoopStmt)) + BodyStmts.push_back(CXXRangeFor->getLoopVarStmt()); + BodyStmts.push_back(Body); + auto *ReversedBody = + CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(), + Body->getBeginLoc(), Body->getEndLoc()); + + // Finally create the reversed For-statement. + auto *ReversedFor = new (Context) + ForStmt(Context, Init.get(), Cond.get(), nullptr, Incr.get(), + ReversedBody, LoopHelper.Init->getBeginLoc(), + LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc()); + return OMPReverseDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt, + ReversedFor, + buildPreInits(Context, PreInits)); +} + OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 06ed0843ef504..f8713976fcc5c 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9172,6 +9172,17 @@ TreeTransform::TransformOMPUnrollDirective(OMPUnrollDirective *D) { return Res; } +template +StmtResult +TreeTransform::TransformOMPReverseDirective(OMPReverseDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPForDirective(OMPForDirective *D) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index eac4faff28549..ff72679f993e2 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2428,6 +2428,10 @@ void ASTStmtReader::VisitOMPUnrollDirective(OMPUnrollDirective *D) { VisitOMPLoopTransformationDirective(D); } +void ASTStmtReader::VisitOMPReverseDirective(OMPReverseDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); D->setHasCancel(Record.readBool()); @@ -3436,6 +3440,14 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; } + case STMT_OMP_REVERSE_DIRECTIVE: { + assert(Record[ASTStmtReader::NumStmtFields] == 1 && + "Reverse directive accepts only a single loop"); + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; + S = OMPReverseDirective::CreateEmpty(Context, NumClauses); + break; + } + case STMT_OMP_FOR_DIRECTIVE: { unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields]; unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index a44852af97bea..ffb09ece981e6 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2412,6 +2412,11 @@ void ASTStmtWriter::VisitOMPUnrollDirective(OMPUnrollDirective *D) { Code = serialization::STMT_OMP_UNROLL_DIRECTIVE; } +void ASTStmtWriter::VisitOMPReverseDirective(OMPReverseDirective *D) { + VisitOMPLoopTransformationDirective(D); + Code = serialization::STMT_OMP_REVERSE_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); Record.writeBool(D->hasCancel()); diff --git a/clang/test/OpenMP/reverse_ast_print.cpp b/clang/test/OpenMP/reverse_ast_print.cpp new file mode 100644 index 0000000000000..3ff6d18cfdf8b --- /dev/null +++ b/clang/test/OpenMP/reverse_ast_print.cpp @@ -0,0 +1,159 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check AST and unparsing +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code. +extern "C" void body(...); + +// PRINT-LABEL: void foo1( +// DUMP-LABEL: FunctionDecl {{.*}} foo1 +void foo1() { + // PRINT: #pragma omp reverse + // DUMP: OMPReverseDirective + #pragma omp reverse + // PRINT: for (int i = 7; i < 17; i += 3) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 3) + // PRINT: body(i); + // DUMP: CallExpr + body(i); +} + + +// PRINT-LABEL: void foo2( +// DUMP-LABEL: FunctionDecl {{.*}} foo2 +void foo2(int start, int end, int step) { + // PRINT: #pragma omp reverse + // DUMP: OMPReverseDirective + #pragma omp reverse + // PRINT: for (int i = start; i < end; i += step) + // DUMP-NEXT: ForStmt + for (int i = start; i < end; i += step) + // PRINT: body(i); + // DUMP: CallExpr + body(i); +} + + +// PRINT-LABEL: void foo3( +// DUMP-LABEL: FunctionDecl {{.*}} foo3 +void foo3() { + // PRINT: #pragma omp for + // DUMP: OMPForDirective + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for + // PRINT: #pragma omp reverse + // DUMP-NEXT: OMPReverseDirective + #pragma omp reverse + for (int i = 7; i < 17; i += 3) + // PRINT: body(i); + // DUMP: CallExpr + body(i); +} + + +// PRINT-LABEL: void foo4( +// DUMP-LABEL: FunctionDecl {{.*}} foo4 +void foo4() { + // PRINT: #pragma omp for collapse(2) + // DUMP: OMPForDirective + // DUMP-NEXT: OMPCollapseClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 2 + // DUMP-NEXT: IntegerLiteral {{.*}} 2 + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for collapse(2) + // PRINT: #pragma omp reverse + // DUMP: OMPReverseDirective + #pragma omp reverse + // PRINT: for (int i = 7; i < 17; i += 1) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 1) + // PRINT: for (int j = 7; j < 17; j += 1) + // DUMP: ForStmt + for (int j = 7; j < 17; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo5( +// DUMP-LABEL: FunctionDecl {{.*}} foo5 +void foo5(int start, int end, int step) { + // PRINT: #pragma omp for collapse(2) + // DUMP: OMPForDirective + // DUMP-NEXT: OMPCollapseClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 2 + // DUMP-NEXT: IntegerLiteral {{.*}} 2 + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for collapse(2) + // PRINT: for (int i = 7; i < 17; i += 1) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 1) + // PRINT: #pragma omp reverse + // DUMP: OMPReverseDirective + #pragma omp reverse + // PRINT: for (int j = 7; j < 17; j += 1) + // DUMP-NEXT: ForStmt + for (int j = 7; j < 17; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo6( +// DUMP-LABEL: FunctionTemplateDecl {{.*}} foo6 +template +void foo6(T start, T end) { + // PRINT: #pragma omp reverse + // DUMP: OMPReverseDirective + #pragma omp reverse + // PRINT-NEXT: for (T i = start; i < end; i += Step) + // DUMP-NEXT: ForStmt + for (T i = start; i < end; i += Step) + // PRINT-NEXT: body(i); + // DUMP: CallExpr + body(i); +} + +// Also test instantiating the template. +void tfoo6() { + foo6(0, 42); +} + + +// PRINT-LABEL: void foo7( +// DUMP-LABEL: FunctionDecl {{.*}} foo7 +void foo7() { + double arr[128]; + // PRINT: #pragma omp reverse + // DUMP: OMPReverseDirective + #pragma omp reverse + // PRINT-NEXT: for (auto &&v : arr) + // DUMP-NEXT: CXXForRangeStmt + for (auto &&v : arr) + // PRINT-NEXT: body(v); + // DUMP: CallExpr + body(v); +} + +#endif + diff --git a/clang/test/OpenMP/reverse_codegen.cpp b/clang/test/OpenMP/reverse_codegen.cpp new file mode 100644 index 0000000000000..9adaa6cc7d18d --- /dev/null +++ b/clang/test/OpenMP/reverse_codegen.cpp @@ -0,0 +1,1554 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ + +// expected-no-diagnostics + +// Check code generation +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code. +extern "C" void body(...) {} + + +struct S { + int i; + S() { +#pragma omp reverse + for (i = 7; i < 17; i += 3) + body(i); + } +} s; + + +extern "C" void foo1(int start, int end, int step) { + int i; +#pragma omp reverse + for (i = start; i < end; i += step) + body(i); +} + + +extern "C" void foo2() { +#pragma omp for +#pragma omp reverse + for (int i = 7; i < 17; i += 3) + body(i); +} + + +extern "C" void foo3() { +#pragma omp for collapse(3) + for (int k = 7; k < 17; k += 3) +#pragma omp reverse + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(k, i, j); +} + + +extern "C" void foo4() { +#pragma omp parallel for +#pragma omp reverse + for (int i = 7; i < 17; i += 3) + body(i); +} + + +template +void foo5(T start, T end) { +#pragma omp reverse + for (T i = start; i < end; i += Step) + body(i); +} + +extern "C" void tfoo5() { + foo5(0, 42); +} + + +extern "C" void foo6() { + double arr[128]; +#pragma omp reverse + for (int c = 42; auto && v : arr) + body(v, c); +} + + +extern "C" void foo7() { + double A[128]; + +#pragma omp for collapse(3) + for (int k = 7; k < 17; k += 3) +#pragma omp reverse + for (int c = 42; auto && v : A) + for (int j = 7; j < 17; j += 3) + body(k, c, v, j); +} + +#endif /* HEADER */ + +// CHECK1-LABEL: define {{[^@]+}}@body +// CHECK1-SAME: (...) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK1-SAME: () #[[ATTR1:[0-9]+]] section ".text.startup" { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @s) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SC2Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[I2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I3]], ptr [[I2]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 3, [[TMP1]] +// CHECK1-NEXT: store i32 [[SUB]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 3 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I2]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo1 +// CHECK1-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP8]], [[ADD5]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add i32 [[TMP10]], 1 +// CHECK1-NEXT: [[SUB7:%.*]] = sub i32 [[ADD6]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[SUB8:%.*]] = sub i32 [[SUB7]], [[TMP11]] +// CHECK1-NEXT: store i32 [[SUB8]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP15]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo2 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 3 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 3, [[TMP7]] +// CHECK1-NEXT: store i32 [[SUB]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP8]], 3 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 7, [[MUL2]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP9]]) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo3 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 63 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 16 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP8]], 16 +// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 16 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL5]] +// CHECK1-NEXT: [[DIV6:%.*]] = sdiv i32 [[SUB]], 4 +// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[DIV6]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV9:%.*]] = sdiv i32 [[TMP10]], 16 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 16 +// CHECK1-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP9]], [[MUL10]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV12:%.*]] = sdiv i32 [[TMP12]], 16 +// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 16 +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP11]], [[MUL13]] +// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 4 +// CHECK1-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 4 +// CHECK1-NEXT: [[SUB17:%.*]] = sub nsw i32 [[SUB11]], [[MUL16]] +// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i32 [[SUB17]], 3 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 7, [[MUL18]] +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i32 3, [[TMP13]] +// CHECK1-NEXT: store i32 [[SUB20]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[MUL21:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 7, [[MUL21]] +// CHECK1-NEXT: store i32 [[ADD22]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP15]], i32 noundef [[TMP16]], i32 noundef [[TMP17]]) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo4 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @foo4.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo4.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 3, [[TMP8]] +// CHECK1-NEXT: store i32 [[SUB]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 7, [[MUL2]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP10]]) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@tfoo5 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_Z4foo5IiTnT_Li3EEvS0_S0_(i32 noundef 0, i32 noundef 42) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z4foo5IiTnT_Li3EEvS0_S0_ +// CHECK1-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 3 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 3 +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP6]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP5]], [[ADD5]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add i32 [[TMP7]], 1 +// CHECK1-NEXT: [[SUB7:%.*]] = sub i32 [[ADD6]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[SUB8:%.*]] = sub i32 [[SUB7]], [[TMP8]] +// CHECK1-NEXT: store i32 [[SUB8]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 3 +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP9]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP11]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo6 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[ARR:%.*]] = alloca [128 x double], align 16 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTFORWARD_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTREVERSED_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 128 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY1]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK1-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP7]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i64 [[TMP6]], [[ADD6]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP8]], 1 +// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i64 [[ADD7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[SUB9:%.*]] = sub nsw i64 [[SUB8]], [[TMP9]] +// CHECK1-NEXT: store i64 [[SUB9]], ptr [[DOTREVERSED_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTREVERSED_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP11]], 1 +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i64 [[MUL]] +// CHECK1-NEXT: store ptr [[ADD_PTR10]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: call void (...) @body(double noundef [[TMP14]], i32 noundef [[TMP15]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo7 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A:%.*]] = alloca [128 x double], align 16 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV___BEGIN3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K15:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV___BEGIN316:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[J17:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV___BEGIN3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[__RANGE3]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 128 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END3]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY3]], ptr [[__BEGIN3]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY4]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP7]], 1 +// CHECK1-NEXT: store i64 [[ADD9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB11:%.*]] = sub nsw i64 [[TMP8]], 0 +// CHECK1-NEXT: [[DIV12:%.*]] = sdiv i64 [[SUB11]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 4, [[DIV12]] +// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i64 [[MUL]], 4 +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL13]], 1 +// CHECK1-NEXT: store i64 [[SUB14]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: store i32 7, ptr [[K]], align 4 +// CHECK1-NEXT: store i64 0, ptr [[DOTFORWARD_IV___BEGIN3]], align 8 +// CHECK1-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[CMP18:%.*]] = icmp sgt i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: br i1 [[CMP18]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i64 [[TMP19]], 0 +// CHECK1-NEXT: [[DIV21:%.*]] = sdiv i64 [[SUB20]], 1 +// CHECK1-NEXT: [[MUL22:%.*]] = mul nsw i64 1, [[DIV21]] +// CHECK1-NEXT: [[MUL23:%.*]] = mul nsw i64 [[MUL22]], 4 +// CHECK1-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP18]], [[MUL23]] +// CHECK1-NEXT: [[MUL25:%.*]] = mul nsw i64 [[DIV24]], 3 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i64 7, [[MUL25]] +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[ADD26]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[K15]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB27:%.*]] = sub nsw i64 [[TMP22]], 0 +// CHECK1-NEXT: [[DIV28:%.*]] = sdiv i64 [[SUB27]], 1 +// CHECK1-NEXT: [[MUL29:%.*]] = mul nsw i64 1, [[DIV28]] +// CHECK1-NEXT: [[MUL30:%.*]] = mul nsw i64 [[MUL29]], 4 +// CHECK1-NEXT: [[DIV31:%.*]] = sdiv i64 [[TMP21]], [[MUL30]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], 0 +// CHECK1-NEXT: [[DIV33:%.*]] = sdiv i64 [[SUB32]], 1 +// CHECK1-NEXT: [[MUL34:%.*]] = mul nsw i64 1, [[DIV33]] +// CHECK1-NEXT: [[MUL35:%.*]] = mul nsw i64 [[MUL34]], 4 +// CHECK1-NEXT: [[MUL36:%.*]] = mul nsw i64 [[DIV31]], [[MUL35]] +// CHECK1-NEXT: [[SUB37:%.*]] = sub nsw i64 [[TMP20]], [[MUL36]] +// CHECK1-NEXT: [[DIV38:%.*]] = sdiv i64 [[SUB37]], 4 +// CHECK1-NEXT: [[MUL39:%.*]] = mul nsw i64 [[DIV38]], 1 +// CHECK1-NEXT: [[ADD40:%.*]] = add nsw i64 0, [[MUL39]] +// CHECK1-NEXT: store i64 [[ADD40]], ptr [[DOTFORWARD_IV___BEGIN316]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB41:%.*]] = sub nsw i64 [[TMP26]], 0 +// CHECK1-NEXT: [[DIV42:%.*]] = sdiv i64 [[SUB41]], 1 +// CHECK1-NEXT: [[MUL43:%.*]] = mul nsw i64 1, [[DIV42]] +// CHECK1-NEXT: [[MUL44:%.*]] = mul nsw i64 [[MUL43]], 4 +// CHECK1-NEXT: [[DIV45:%.*]] = sdiv i64 [[TMP25]], [[MUL44]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB46:%.*]] = sub nsw i64 [[TMP27]], 0 +// CHECK1-NEXT: [[DIV47:%.*]] = sdiv i64 [[SUB46]], 1 +// CHECK1-NEXT: [[MUL48:%.*]] = mul nsw i64 1, [[DIV47]] +// CHECK1-NEXT: [[MUL49:%.*]] = mul nsw i64 [[MUL48]], 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul nsw i64 [[DIV45]], [[MUL49]] +// CHECK1-NEXT: [[SUB51:%.*]] = sub nsw i64 [[TMP24]], [[MUL50]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB52:%.*]] = sub nsw i64 [[TMP30]], 0 +// CHECK1-NEXT: [[DIV53:%.*]] = sdiv i64 [[SUB52]], 1 +// CHECK1-NEXT: [[MUL54:%.*]] = mul nsw i64 1, [[DIV53]] +// CHECK1-NEXT: [[MUL55:%.*]] = mul nsw i64 [[MUL54]], 4 +// CHECK1-NEXT: [[DIV56:%.*]] = sdiv i64 [[TMP29]], [[MUL55]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB57:%.*]] = sub nsw i64 [[TMP31]], 0 +// CHECK1-NEXT: [[DIV58:%.*]] = sdiv i64 [[SUB57]], 1 +// CHECK1-NEXT: [[MUL59:%.*]] = mul nsw i64 1, [[DIV58]] +// CHECK1-NEXT: [[MUL60:%.*]] = mul nsw i64 [[MUL59]], 4 +// CHECK1-NEXT: [[MUL61:%.*]] = mul nsw i64 [[DIV56]], [[MUL60]] +// CHECK1-NEXT: [[SUB62:%.*]] = sub nsw i64 [[TMP28]], [[MUL61]] +// CHECK1-NEXT: [[DIV63:%.*]] = sdiv i64 [[SUB62]], 4 +// CHECK1-NEXT: [[MUL64:%.*]] = mul nsw i64 [[DIV63]], 4 +// CHECK1-NEXT: [[SUB65:%.*]] = sub nsw i64 [[SUB51]], [[MUL64]] +// CHECK1-NEXT: [[MUL66:%.*]] = mul nsw i64 [[SUB65]], 3 +// CHECK1-NEXT: [[ADD67:%.*]] = add nsw i64 7, [[MUL66]] +// CHECK1-NEXT: [[CONV68:%.*]] = trunc i64 [[ADD67]] to i32 +// CHECK1-NEXT: store i32 [[CONV68]], ptr [[J17]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK1-NEXT: [[ADD69:%.*]] = add nsw i64 [[TMP32]], 1 +// CHECK1-NEXT: [[SUB70:%.*]] = sub nsw i64 [[ADD69]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN316]], align 8 +// CHECK1-NEXT: [[SUB71:%.*]] = sub nsw i64 [[SUB70]], [[TMP33]] +// CHECK1-NEXT: store i64 [[SUB71]], ptr [[DOTREVERSED_IV___BEGIN3]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTREVERSED_IV___BEGIN3]], align 8 +// CHECK1-NEXT: [[MUL72:%.*]] = mul nsw i64 [[TMP35]], 1 +// CHECK1-NEXT: [[ADD_PTR73:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[MUL72]] +// CHECK1-NEXT: store ptr [[ADD_PTR73]], ptr [[__BEGIN3]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[__BEGIN3]], align 8 +// CHECK1-NEXT: store ptr [[TMP36]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[K15]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load double, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[J17]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP37]], i32 noundef [[TMP38]], double noundef [[TMP40]], i32 noundef [[TMP41]]) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD74:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK1-NEXT: store i64 [[ADD74]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_reverse_codegen.cpp +// CHECK1-SAME: () #[[ATTR1]] section ".text.startup" { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__cxx_global_var_init() +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK2-SAME: () #[[ATTR0:[0-9]+]] section ".text.startup" { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @s) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_ZN1SC1Ev +// CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: call void @_ZN1SC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_ZN1SC2Ev +// CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[I2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[I3]], ptr [[I2]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 3, [[TMP1]] +// CHECK2-NEXT: store i32 [[SUB]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 3 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I2]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@body +// CHECK2-SAME: (...) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo1 +// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP8]], [[ADD5]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add i32 [[TMP10]], 1 +// CHECK2-NEXT: [[SUB7:%.*]] = sub i32 [[ADD6]], 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[SUB8:%.*]] = sub i32 [[SUB7]], [[TMP11]] +// CHECK2-NEXT: store i32 [[SUB8]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP15]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo2 +// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 3 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 3, [[TMP7]] +// CHECK2-NEXT: store i32 [[SUB]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP8]], 3 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 7, [[MUL2]] +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP9]]) +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo3 +// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 63 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 16 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP8]], 16 +// CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 16 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL5]] +// CHECK2-NEXT: [[DIV6:%.*]] = sdiv i32 [[SUB]], 4 +// CHECK2-NEXT: [[MUL7:%.*]] = mul nsw i32 [[DIV6]], 1 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV9:%.*]] = sdiv i32 [[TMP10]], 16 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 16 +// CHECK2-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP9]], [[MUL10]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV12:%.*]] = sdiv i32 [[TMP12]], 16 +// CHECK2-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 16 +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP11]], [[MUL13]] +// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 4 +// CHECK2-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 4 +// CHECK2-NEXT: [[SUB17:%.*]] = sub nsw i32 [[SUB11]], [[MUL16]] +// CHECK2-NEXT: [[MUL18:%.*]] = mul nsw i32 [[SUB17]], 3 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 7, [[MUL18]] +// CHECK2-NEXT: store i32 [[ADD19]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i32 3, [[TMP13]] +// CHECK2-NEXT: store i32 [[SUB20]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[MUL21:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK2-NEXT: [[ADD22:%.*]] = add nsw i32 7, [[MUL21]] +// CHECK2-NEXT: store i32 [[ADD22]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP15]], i32 noundef [[TMP16]], i32 noundef [[TMP17]]) +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo4 +// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @foo4.omp_outlined) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo4.omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 3, [[TMP8]] +// CHECK2-NEXT: store i32 [[SUB]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 7, [[MUL2]] +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP10]]) +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo6 +// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[ARR:%.*]] = alloca [128 x double], align 16 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTFORWARD_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTREVERSED_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP0]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 128 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY1]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK2-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP7]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i64 [[TMP6]], [[ADD6]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP8]], 1 +// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i64 [[ADD7]], 1 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[SUB9:%.*]] = sub nsw i64 [[SUB8]], [[TMP9]] +// CHECK2-NEXT: store i64 [[SUB9]], ptr [[DOTREVERSED_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTREVERSED_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP11]], 1 +// CHECK2-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i64 [[MUL]] +// CHECK2-NEXT: store ptr [[ADD_PTR10]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP12]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: call void (...) @body(double noundef [[TMP14]], i32 noundef [[TMP15]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo7 +// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A:%.*]] = alloca [128 x double], align 16 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV___BEGIN3:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K15:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV___BEGIN316:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[J17:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV___BEGIN3:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[A]], ptr [[__RANGE3]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 128 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END3]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY3]], ptr [[__BEGIN3]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP3]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY4]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END3]], align 8 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK2-NEXT: [[SUB7:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK2-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP7]], 1 +// CHECK2-NEXT: store i64 [[ADD9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB11:%.*]] = sub nsw i64 [[TMP8]], 0 +// CHECK2-NEXT: [[DIV12:%.*]] = sdiv i64 [[SUB11]], 1 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 4, [[DIV12]] +// CHECK2-NEXT: [[MUL13:%.*]] = mul nsw i64 [[MUL]], 4 +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL13]], 1 +// CHECK2-NEXT: store i64 [[SUB14]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: store i32 7, ptr [[K]], align 4 +// CHECK2-NEXT: store i64 0, ptr [[DOTFORWARD_IV___BEGIN3]], align 8 +// CHECK2-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP9]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[CMP18:%.*]] = icmp sgt i64 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: br i1 [[CMP18]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i64 [[TMP19]], 0 +// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i64 [[SUB20]], 1 +// CHECK2-NEXT: [[MUL22:%.*]] = mul nsw i64 1, [[DIV21]] +// CHECK2-NEXT: [[MUL23:%.*]] = mul nsw i64 [[MUL22]], 4 +// CHECK2-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP18]], [[MUL23]] +// CHECK2-NEXT: [[MUL25:%.*]] = mul nsw i64 [[DIV24]], 3 +// CHECK2-NEXT: [[ADD26:%.*]] = add nsw i64 7, [[MUL25]] +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[ADD26]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[K15]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB27:%.*]] = sub nsw i64 [[TMP22]], 0 +// CHECK2-NEXT: [[DIV28:%.*]] = sdiv i64 [[SUB27]], 1 +// CHECK2-NEXT: [[MUL29:%.*]] = mul nsw i64 1, [[DIV28]] +// CHECK2-NEXT: [[MUL30:%.*]] = mul nsw i64 [[MUL29]], 4 +// CHECK2-NEXT: [[DIV31:%.*]] = sdiv i64 [[TMP21]], [[MUL30]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], 0 +// CHECK2-NEXT: [[DIV33:%.*]] = sdiv i64 [[SUB32]], 1 +// CHECK2-NEXT: [[MUL34:%.*]] = mul nsw i64 1, [[DIV33]] +// CHECK2-NEXT: [[MUL35:%.*]] = mul nsw i64 [[MUL34]], 4 +// CHECK2-NEXT: [[MUL36:%.*]] = mul nsw i64 [[DIV31]], [[MUL35]] +// CHECK2-NEXT: [[SUB37:%.*]] = sub nsw i64 [[TMP20]], [[MUL36]] +// CHECK2-NEXT: [[DIV38:%.*]] = sdiv i64 [[SUB37]], 4 +// CHECK2-NEXT: [[MUL39:%.*]] = mul nsw i64 [[DIV38]], 1 +// CHECK2-NEXT: [[ADD40:%.*]] = add nsw i64 0, [[MUL39]] +// CHECK2-NEXT: store i64 [[ADD40]], ptr [[DOTFORWARD_IV___BEGIN316]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB41:%.*]] = sub nsw i64 [[TMP26]], 0 +// CHECK2-NEXT: [[DIV42:%.*]] = sdiv i64 [[SUB41]], 1 +// CHECK2-NEXT: [[MUL43:%.*]] = mul nsw i64 1, [[DIV42]] +// CHECK2-NEXT: [[MUL44:%.*]] = mul nsw i64 [[MUL43]], 4 +// CHECK2-NEXT: [[DIV45:%.*]] = sdiv i64 [[TMP25]], [[MUL44]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB46:%.*]] = sub nsw i64 [[TMP27]], 0 +// CHECK2-NEXT: [[DIV47:%.*]] = sdiv i64 [[SUB46]], 1 +// CHECK2-NEXT: [[MUL48:%.*]] = mul nsw i64 1, [[DIV47]] +// CHECK2-NEXT: [[MUL49:%.*]] = mul nsw i64 [[MUL48]], 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul nsw i64 [[DIV45]], [[MUL49]] +// CHECK2-NEXT: [[SUB51:%.*]] = sub nsw i64 [[TMP24]], [[MUL50]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB52:%.*]] = sub nsw i64 [[TMP30]], 0 +// CHECK2-NEXT: [[DIV53:%.*]] = sdiv i64 [[SUB52]], 1 +// CHECK2-NEXT: [[MUL54:%.*]] = mul nsw i64 1, [[DIV53]] +// CHECK2-NEXT: [[MUL55:%.*]] = mul nsw i64 [[MUL54]], 4 +// CHECK2-NEXT: [[DIV56:%.*]] = sdiv i64 [[TMP29]], [[MUL55]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB57:%.*]] = sub nsw i64 [[TMP31]], 0 +// CHECK2-NEXT: [[DIV58:%.*]] = sdiv i64 [[SUB57]], 1 +// CHECK2-NEXT: [[MUL59:%.*]] = mul nsw i64 1, [[DIV58]] +// CHECK2-NEXT: [[MUL60:%.*]] = mul nsw i64 [[MUL59]], 4 +// CHECK2-NEXT: [[MUL61:%.*]] = mul nsw i64 [[DIV56]], [[MUL60]] +// CHECK2-NEXT: [[SUB62:%.*]] = sub nsw i64 [[TMP28]], [[MUL61]] +// CHECK2-NEXT: [[DIV63:%.*]] = sdiv i64 [[SUB62]], 4 +// CHECK2-NEXT: [[MUL64:%.*]] = mul nsw i64 [[DIV63]], 4 +// CHECK2-NEXT: [[SUB65:%.*]] = sub nsw i64 [[SUB51]], [[MUL64]] +// CHECK2-NEXT: [[MUL66:%.*]] = mul nsw i64 [[SUB65]], 3 +// CHECK2-NEXT: [[ADD67:%.*]] = add nsw i64 7, [[MUL66]] +// CHECK2-NEXT: [[CONV68:%.*]] = trunc i64 [[ADD67]] to i32 +// CHECK2-NEXT: store i32 [[CONV68]], ptr [[J17]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK2-NEXT: [[ADD69:%.*]] = add nsw i64 [[TMP32]], 1 +// CHECK2-NEXT: [[SUB70:%.*]] = sub nsw i64 [[ADD69]], 1 +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN316]], align 8 +// CHECK2-NEXT: [[SUB71:%.*]] = sub nsw i64 [[SUB70]], [[TMP33]] +// CHECK2-NEXT: store i64 [[SUB71]], ptr [[DOTREVERSED_IV___BEGIN3]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTREVERSED_IV___BEGIN3]], align 8 +// CHECK2-NEXT: [[MUL72:%.*]] = mul nsw i64 [[TMP35]], 1 +// CHECK2-NEXT: [[ADD_PTR73:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[MUL72]] +// CHECK2-NEXT: store ptr [[ADD_PTR73]], ptr [[__BEGIN3]], align 8 +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[__BEGIN3]], align 8 +// CHECK2-NEXT: store ptr [[TMP36]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[K15]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP40:%.*]] = load double, ptr [[TMP39]], align 8 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[J17]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP37]], i32 noundef [[TMP38]], double noundef [[TMP40]], i32 noundef [[TMP41]]) +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[ADD74:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK2-NEXT: store i64 [[ADD74]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@tfoo5 +// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @_Z4foo5IiTnT_Li3EEvS0_S0_(i32 noundef 0, i32 noundef 42) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_Z4foo5IiTnT_Li3EEvS0_S0_ +// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR1]] comdat { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 3 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 3 +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP6]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP5]], [[ADD5]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add i32 [[TMP7]], 1 +// CHECK2-NEXT: [[SUB7:%.*]] = sub i32 [[ADD6]], 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[SUB8:%.*]] = sub i32 [[SUB7]], [[TMP8]] +// CHECK2-NEXT: store i32 [[SUB8]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 3 +// CHECK2-NEXT: [[ADD9:%.*]] = add i32 [[TMP9]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP11]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP12]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_reverse_codegen.cpp +// CHECK2-SAME: () #[[ATTR0]] section ".text.startup" { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @__cxx_global_var_init() +// CHECK2-NEXT: ret void + diff --git a/clang/test/OpenMP/reverse_messages.cpp b/clang/test/OpenMP/reverse_messages.cpp new file mode 100644 index 0000000000000..9636a70bf2753 --- /dev/null +++ b/clang/test/OpenMP/reverse_messages.cpp @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s + +void func() { + + // expected-error@+2 {{statement after '#pragma omp reverse' must be a for loop}} + #pragma omp reverse + ; + + // expected-error@+2 {{statement after '#pragma omp reverse' must be a for loop}} + #pragma omp reverse + int b = 0; + + // expected-error@+2 {{statement after '#pragma omp reverse' must be a for loop}} + #pragma omp reverse + #pragma omp for + for (int i = 0; i < 7; ++i) + ; + + { + // expected-error@+2 {{expected statement}} + #pragma omp reverse + } + + // expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}} + #pragma omp reverse + for (int i = 0; i/3<7; ++i) + ; + + // expected-error@+1 {{unexpected OpenMP clause 'sizes' in directive '#pragma omp reverse'}} + #pragma omp reverse sizes(5) + for (int i = 0; i < 7; ++i) + ; + + // expected-warning@+1 {{extra tokens at the end of '#pragma omp reverse' are ignored}} + #pragma omp reverse foo + for (int i = 0; i < 7; ++i) + ; + +} + diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index bfbdb5be9ff2f..82dcee56244bf 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2179,6 +2179,7 @@ class EnqueueVisitor : public ConstStmtVisitor, VisitOMPLoopTransformationDirective(const OMPLoopTransformationDirective *D); void VisitOMPTileDirective(const OMPTileDirective *D); void VisitOMPUnrollDirective(const OMPUnrollDirective *D); + void VisitOMPReverseDirective(const OMPReverseDirective *D); void VisitOMPForDirective(const OMPForDirective *D); void VisitOMPForSimdDirective(const OMPForSimdDirective *D); void VisitOMPSectionsDirective(const OMPSectionsDirective *D); @@ -3217,6 +3218,10 @@ void EnqueueVisitor::VisitOMPUnrollDirective(const OMPUnrollDirective *D) { VisitOMPLoopTransformationDirective(D); } +void EnqueueVisitor::VisitOMPReverseDirective(const OMPReverseDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) { VisitOMPLoopDirective(D); } @@ -6075,6 +6080,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPTileDirective"); case CXCursor_OMPUnrollDirective: return cxstring::createRef("OMPUnrollDirective"); + case CXCursor_OMPReverseDirective: + return cxstring::createRef("OMPReverseDirective"); case CXCursor_OMPForDirective: return cxstring::createRef("OMPForDirective"); case CXCursor_OMPForSimdDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 9325a16d2a848..275714d8fbcdd 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -672,6 +672,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPUnrollDirectiveClass: K = CXCursor_OMPUnrollDirective; break; + case Stmt::OMPReverseDirectiveClass: + K = CXCursor_OMPReverseDirective; + break; case Stmt::OMPForDirectiveClass: K = CXCursor_OMPForDirective; break; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index e91169e8da1aa..0ad3d919103fe 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -537,6 +537,9 @@ def OMP_Unroll : Directive<"unroll"> { ]; let association = AS_Loop; } +def OMP_Reverse : Directive<"reverse"> { + let association = AS_Loop; +} def OMP_For : Directive<"for"> { let allowedClauses = [ VersionedClause, diff --git a/openmp/runtime/test/transform/reverse/foreach.cpp b/openmp/runtime/test/transform/reverse/foreach.cpp new file mode 100644 index 0000000000000..0784e3c0057c9 --- /dev/null +++ b/openmp/runtime/test/transform/reverse/foreach.cpp @@ -0,0 +1,162 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp reverse + for (Reporter c{"init-stmt"}; auto &&v : Reporter("range")) + printf("v=%d\n", v); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [init-stmt] ctor +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] iterator distance: 3 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor +// CHECK-NEXT: [init-stmt] dtor +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/reverse/intfor.c b/openmp/runtime/test/transform/reverse/intfor.c new file mode 100644 index 0000000000000..a526a8d493b3d --- /dev/null +++ b/openmp/runtime/test/transform/reverse/intfor.c @@ -0,0 +1,25 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp reverse + for (int i = 7; i < 19; i += 3) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=16 +// CHECK-NEXT: i=13 +// CHECK-NEXT: i=10 +// CHECK-NEXT: i=7 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/reverse/iterfor.cpp b/openmp/runtime/test/transform/reverse/iterfor.cpp new file mode 100644 index 0000000000000..ba1086dbd76a5 --- /dev/null +++ b/openmp/runtime/test/transform/reverse/iterfor.cpp @@ -0,0 +1,164 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + bool operator!=(const Iterator &that) const { + owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos); + return this->pos != that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); + Reporter range("range"); +#pragma omp reverse + for (auto it = range.begin(); it != range.end(); ++it) + printf("v=%d\n", *it); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] iterator distance: 3 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: done +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor diff --git a/openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-foreach.cpp new file mode 100644 index 0000000000000..240ef59bd6b4b --- /dev/null +++ b/openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-foreach.cpp @@ -0,0 +1,285 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(3) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp reverse + for (Reporter c{"init-stmt"}; auto &&v : Reporter("range")) + for (int k = 0; k < 3; ++k) + printf("i=%d j=%d k=%d\n", i, v, k); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [init-stmt] ctor +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] iterator distance: 3 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=0 j=2 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=0 j=2 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=0 j=2 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=0 j=1 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=0 j=1 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=0 j=1 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=0 j=0 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=0 j=0 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=0 j=0 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=1 j=2 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=1 j=2 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=1 j=2 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=1 j=1 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=1 j=1 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=1 j=1 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=1 j=0 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=1 j=0 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=1 j=0 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=2 j=2 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=2 j=2 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=2 j=2 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=2 j=1 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=2 j=1 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=2 j=1 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=2 j=0 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=2 j=0 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=2 j=0 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor +// CHECK-NEXT: [init-stmt] dtor +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-intfor.cpp b/openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-intfor.cpp new file mode 100644 index 0000000000000..ae545b863d86c --- /dev/null +++ b/openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-intfor.cpp @@ -0,0 +1,51 @@ +// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(3) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp reverse + for (int j = 0; j < 3; ++j) + for (int k = 0; k < 3; ++k) + printf("i=%d j=%d k=%d\n", i, j, k); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=0 j=2 k=0 +// CHECK-NEXT: i=0 j=2 k=1 +// CHECK-NEXT: i=0 j=2 k=2 +// CHECK-NEXT: i=0 j=1 k=0 +// CHECK-NEXT: i=0 j=1 k=1 +// CHECK-NEXT: i=0 j=1 k=2 +// CHECK-NEXT: i=0 j=0 k=0 +// CHECK-NEXT: i=0 j=0 k=1 +// CHECK-NEXT: i=0 j=0 k=2 +// CHECK-NEXT: i=1 j=2 k=0 +// CHECK-NEXT: i=1 j=2 k=1 +// CHECK-NEXT: i=1 j=2 k=2 +// CHECK-NEXT: i=1 j=1 k=0 +// CHECK-NEXT: i=1 j=1 k=1 +// CHECK-NEXT: i=1 j=1 k=2 +// CHECK-NEXT: i=1 j=0 k=0 +// CHECK-NEXT: i=1 j=0 k=1 +// CHECK-NEXT: i=1 j=0 k=2 +// CHECK-NEXT: i=2 j=2 k=0 +// CHECK-NEXT: i=2 j=2 k=1 +// CHECK-NEXT: i=2 j=2 k=2 +// CHECK-NEXT: i=2 j=1 k=0 +// CHECK-NEXT: i=2 j=1 k=1 +// CHECK-NEXT: i=2 j=1 k=2 +// CHECK-NEXT: i=2 j=0 k=0 +// CHECK-NEXT: i=2 j=0 k=1 +// CHECK-NEXT: i=2 j=0 k=2 +// CHECK-NEXT: done From c2bd6a50eed4001c013dc98a21567c092fd2eeb8 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 21 May 2024 17:08:28 +0200 Subject: [PATCH 03/12] Extract out appendFlattendedStmtList --- clang/lib/Sema/SemaOpenMP.cpp | 52 +++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 663dbb927250e..d6a38cb58450b 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9815,6 +9815,25 @@ static Stmt *buildPreInits(ASTContext &Context, return nullptr; } +/// Append the \p Item or the content of a CompoundStmt to the list \p +/// TargetList. +/// +/// A CompoundStmt is used as container in case multiple statements need to be +/// stored in lieu of using an explicit list. Flattening is necessary because +/// contained DeclStmts need to be visible after the execution of the list. Used +/// for OpenMP pre-init declarations/statements. +static void appendFlattendedStmtList(SmallVectorImpl &TargetList, + Stmt *Item) { + // nullptr represents an empty list. + if (!Item) + return; + + if (auto *CS = dyn_cast(Item)) + llvm::append_range(TargetList, CS->body()); + else + TargetList.push_back(Item); +} + /// Build preinits statement for the given declarations. static Stmt * buildPreInits(ASTContext &Context, @@ -9830,19 +9849,13 @@ buildPreInits(ASTContext &Context, /// Build pre-init statement for the given statements. static Stmt *buildPreInits(ASTContext &Context, ArrayRef PreInits) { - if (!PreInits.empty()) { - SmallVector Stmts; - for (Stmt *S : PreInits) { - // Do not nest CompoundStmts. - if (auto *CS = dyn_cast(S)) { - llvm::append_range(Stmts, CS->body()); - continue; - } - Stmts.push_back(S); - } - return CompoundStmt::Create(Context, PreInits, FPOptionsOverride(), {}, {}); - } - return nullptr; + if (PreInits.empty()) + return nullptr; + + SmallVector Stmts; + for (Stmt *S : PreInits) + appendFlattendedStmtList(Stmts, S); + return CompoundStmt::Create(Context, PreInits, FPOptionsOverride(), {}, {}); } /// Build postupdate expression for the given list of postupdates expressions. @@ -9950,7 +9963,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, else Constituents.push_back(DependentPreInits); for (Stmt *S : Constituents) { - if (DeclStmt *DC = dyn_cast(S)) { + if (auto *DC = dyn_cast(S)) { for (Decl *C : DC->decls()) { auto *D = cast(C); DeclRefExpr *Ref = buildDeclRefExpr( @@ -15123,15 +15136,8 @@ bool SemaOpenMP::checkTransformableLoopNest( DependentPreInits = Dir->getPreInits(); else llvm_unreachable("Unhandled loop transformation"); - if (!DependentPreInits) - return; - // CompoundStmts are used as lists of other statements, add their - // contents, not the lists themselves to avoid nesting. This is - // necessary because DeclStmts need to be visible after the pre-init. - else if (auto *CS = dyn_cast(DependentPreInits)) - llvm::append_range(OriginalInits.back(), CS->body()); - else - OriginalInits.back().push_back(DependentPreInits); + + appendFlattendedStmtList(OriginalInits.back(), DependentPreInits); }); assert(OriginalInits.back().empty() && "No preinit after innermost loop"); OriginalInits.pop_back(); From 0a38da39e5520dcd5e4da07275d79d35430dd846 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 21 May 2024 17:32:00 +0200 Subject: [PATCH 04/12] Address review --- clang/include/clang/AST/StmtOpenMP.h | 4 +--- clang/include/clang/Sema/SemaOpenMP.h | 6 ++---- clang/lib/AST/StmtOpenMP.cpp | 5 ++--- clang/lib/Sema/SemaOpenMP.cpp | 20 +++++++++----------- 4 files changed, 14 insertions(+), 21 deletions(-) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 4be2e2d3a4605..fb7f413162fad 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -5748,15 +5748,13 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective { /// \param C Context of the AST. /// \param StartLoc Location of the introducer (e.g. the 'omp' token). /// \param EndLoc Location of the directive's end (e.g. the tok::eod). - /// \param Clauses The directive's clauses. /// \param AssociatedStmt The outermost associated loop. /// \param TransformedStmt The loop nest after tiling, or nullptr in /// dependent contexts. /// \param PreInits Helper preinits statements for the loop nest. static OMPReverseDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, - Stmt *TransformedStmt, Stmt *PreInits); + Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits); /// Build an empty '#pragma omp reverse' AST node for deserialization. /// diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index e36a90ba4e1b9..ca91bffe24f6f 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -422,10 +422,8 @@ class SemaOpenMP : public SemaBase { StmtResult ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); - /// Called on well-formed '#pragma omp reverse' after parsing of its clauses - /// and the associated statement. - StmtResult ActOnOpenMPReverseDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, + /// Called on well-formed '#pragma omp reverse'. + StmtResult ActOnOpenMPReverseDirective(Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); /// Called on well-formed '\#pragma omp for' after parsing /// of the associated statement. diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 0be0d9d2cfa94..83b8a08e9af73 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -451,11 +451,10 @@ OMPUnrollDirective *OMPUnrollDirective::CreateEmpty(const ASTContext &C, OMPReverseDirective * OMPReverseDirective::Create(const ASTContext &C, SourceLocation StartLoc, - SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, + SourceLocation EndLoc, Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits) { OMPReverseDirective *Dir = createDirective( - C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); + C, {}, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); return Dir; diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 7b9898704eb1c..f3fbc462aa4ab 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -6548,8 +6548,9 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( EndLoc); break; case OMPD_reverse: - Res = ActOnOpenMPReverseDirective(ClausesWithImplicit, AStmt, StartLoc, - EndLoc); + assert(ClausesWithImplicit.empty() && + "reverse directive does not support any clauses"); + Res = ActOnOpenMPReverseDirective(AStmt, StartLoc, EndLoc); break; case OMPD_for: Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, @@ -15753,14 +15754,11 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, buildPreInits(Context, PreInits)); } -StmtResult -SemaOpenMP::ActOnOpenMPReverseDirective(ArrayRef Clauses, - Stmt *AStmt, SourceLocation StartLoc, - SourceLocation EndLoc) { +StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { ASTContext &Context = getASTContext(); Scope *CurScope = SemaRef.getCurScope(); - assert(Clauses.empty() && "reverse directive does not accept any clauses; " - "must have beed checked before"); // Empty statement should only be possible if there already was an error. if (!AStmt) @@ -15778,8 +15776,8 @@ SemaOpenMP::ActOnOpenMPReverseDirective(ArrayRef Clauses, // Delay applying the transformation to when template is completely // instantiated. if (SemaRef.CurContext->isDependentContext()) - return OMPReverseDirective::Create(Context, StartLoc, EndLoc, Clauses, - AStmt, nullptr, nullptr); + return OMPReverseDirective::Create(Context, StartLoc, EndLoc, AStmt, + nullptr, nullptr); assert(LoopHelpers.size() == NumLoops && "Expecting a single-dimensional loop iteration space"); @@ -15931,7 +15929,7 @@ SemaOpenMP::ActOnOpenMPReverseDirective(ArrayRef Clauses, ForStmt(Context, Init.get(), Cond.get(), nullptr, Incr.get(), ReversedBody, LoopHelper.Init->getBeginLoc(), LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc()); - return OMPReverseDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt, + return OMPReverseDirective::Create(Context, StartLoc, EndLoc, AStmt, ReversedFor, buildPreInits(Context, PreInits)); } From 8eb4b90d0fabed1819d88d5092d5ab2dc5fd3c6f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 22 May 2024 10:51:42 +0200 Subject: [PATCH 05/12] Address review comments --- clang/lib/Sema/SemaOpenMP.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index d6a38cb58450b..bab61e8fd54e8 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9958,10 +9958,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, // Search for pre-init declared variables that need to be captured // to be referenceable inside the directive. SmallVector Constituents; - if (auto *CS = dyn_cast(DependentPreInits)) - llvm::append_range(Constituents, CS->body()); - else - Constituents.push_back(DependentPreInits); + appendFlattendedStmtList(Constituents, DependentPreInits); for (Stmt *S : Constituents) { if (auto *DC = dyn_cast(S)) { for (Decl *C : DC->decls()) { @@ -15196,7 +15193,7 @@ static void collectLoopStmts(Stmt *AStmt, MutableArrayRef LoopStmts) { LoopStmts[Cnt] = CurStmt; return false; }); - assert(llvm::all_of(LoopStmts, [](Stmt *LoopStmt) { return LoopStmt; }) && + assert(!is_contained(LoopStmts, nullptr) && "Expecting a loop statement for each affected loop"); } From 76634ade083cde2881bb072fe821f1d434f99196 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 22 May 2024 13:22:05 +0200 Subject: [PATCH 06/12] [Clang][OpenMP] Add interchange directive --- clang/include/clang-c/Index.h | 4 + clang/include/clang/AST/RecursiveASTVisitor.h | 3 + clang/include/clang/AST/StmtOpenMP.h | 76 +- clang/include/clang/Basic/StmtNodes.td | 1 + clang/include/clang/Sema/SemaOpenMP.h | 6 + .../include/clang/Serialization/ASTBitCodes.h | 1 + clang/lib/AST/StmtOpenMP.cpp | 20 + clang/lib/AST/StmtPrinter.cpp | 5 + clang/lib/AST/StmtProfile.cpp | 5 + clang/lib/Basic/OpenMPKinds.cpp | 4 +- clang/lib/CodeGen/CGStmt.cpp | 3 + clang/lib/CodeGen/CGStmtOpenMP.cpp | 10 + clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/lib/Parse/ParseOpenMP.cpp | 2 + clang/lib/Sema/SemaExceptionSpec.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 162 ++ clang/lib/Sema/TreeTransform.h | 11 + clang/lib/Serialization/ASTReaderStmt.cpp | 11 + clang/lib/Serialization/ASTWriterStmt.cpp | 5 + clang/test/OpenMP/interchange_ast_print.cpp | 135 ++ clang/test/OpenMP/interchange_codegen.cpp | 1990 +++++++++++++++++ clang/test/OpenMP/interchange_messages.cpp | 77 + clang/tools/libclang/CIndex.cpp | 8 + clang/tools/libclang/CXCursor.cpp | 3 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 3 + .../test/transform/interchange/foreach.cpp | 216 ++ .../test/transform/interchange/intfor.c | 38 + .../test/transform/interchange/iterfor.cpp | 222 ++ .../parallel-wsloop-collapse-foreach.cpp | 340 +++ .../parallel-wsloop-collapse-intfor.cpp | 106 + 30 files changed, 3467 insertions(+), 2 deletions(-) create mode 100644 clang/test/OpenMP/interchange_ast_print.cpp create mode 100644 clang/test/OpenMP/interchange_codegen.cpp create mode 100644 clang/test/OpenMP/interchange_messages.cpp create mode 100644 openmp/runtime/test/transform/interchange/foreach.cpp create mode 100644 openmp/runtime/test/transform/interchange/intfor.c create mode 100644 openmp/runtime/test/transform/interchange/iterfor.cpp create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-foreach.cpp create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intfor.cpp diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index c7d63818ece23..a79aafbf20222 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2150,6 +2150,10 @@ enum CXCursorKind { */ CXCursor_OMPReverseDirective = 307, + /** OpenMP interchange directive. + */ + CXCursor_OMPInterchangeDirective = 308, + /** OpenACC Compute Construct. */ CXCursor_OpenACCComputeConstruct = 320, diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 06b29d59785f6..1bb167d7ddc3c 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3024,6 +3024,9 @@ DEF_TRAVERSE_STMT(OMPUnrollDirective, DEF_TRAVERSE_STMT(OMPReverseDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPInterchangeDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPForDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index fb7f413162fad..01c8b8e1a9f5e 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -1009,7 +1009,7 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { static bool classof(const Stmt *T) { Stmt::StmtClass C = T->getStmtClass(); return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass || - C == OMPReverseDirectiveClass; + C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass; } }; @@ -5777,6 +5777,80 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective { } }; +/// Represents the '#pragma omp interchange' loop transformation directive. +/// +/// \code{c} +/// #pragma omp interchange +/// for (int i = 0; i < m; ++i) +/// for (int j = 0; j < n; ++j) +/// .. +/// \endcode +class OMPInterchangeDirective final : public OMPLoopTransformationDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + + /// Offsets of child members. + enum { + PreInitsOffset = 0, + TransformedStmtOffset, + }; + + explicit OMPInterchangeDirective(SourceLocation StartLoc, + SourceLocation EndLoc, unsigned NumLoops) + : OMPLoopTransformationDirective(OMPInterchangeDirectiveClass, + llvm::omp::OMPD_interchange, StartLoc, + EndLoc, NumLoops) { + setNumGeneratedLoops(3 * NumLoops); + } + + void setPreInits(Stmt *PreInits) { + Data->getChildren()[PreInitsOffset] = PreInits; + } + + void setTransformedStmt(Stmt *S) { + Data->getChildren()[TransformedStmtOffset] = S; + } + +public: + /// Create a new AST node representation for '#pragma omp interchange'. + /// + /// \param C Context of the AST. + /// \param StartLoc Location of the introducer (e.g. the 'omp' token). + /// \param EndLoc Location of the directive's end (e.g. the tok::eod). + /// \param Clauses The directive's clauses. + /// \param NumLoops Number of affected loops + /// (number of items in the 'permutation' clause if present). + /// \param AssociatedStmt The outermost associated loop. + /// \param TransformedStmt The loop nest after tiling, or nullptr in + /// dependent contexts. + /// \param PreInits Helper preinits statements for the loop nest. + static OMPInterchangeDirective * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef Clauses, unsigned NumLoops, Stmt *AssociatedStmt, + Stmt *TransformedStmt, Stmt *PreInits); + + /// Build an empty '#pragma omp interchange' AST node for deserialization. + /// + /// \param C Context of the AST. + /// \param NumClauses Number of clauses to allocate. + /// \param NumLoops Number of associated loops to allocate. + static OMPInterchangeDirective * + CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned NumLoops); + + /// Gets the associated loops after the transformation. This is the de-sugared + /// replacement or nullptr in dependent contexts. + Stmt *getTransformedStmt() const { + return Data->getChildren()[TransformedStmtOffset]; + } + + /// Return preinits statement. + Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPInterchangeDirectiveClass; + } +}; + /// This represents '#pragma omp scan' directive. /// /// \code diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index b2e2be5c998bb..b445ea225eac5 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -230,6 +230,7 @@ def OMPLoopTransformationDirective : StmtNode; def OMPTileDirective : StmtNode; def OMPUnrollDirective : StmtNode; def OMPReverseDirective : StmtNode; +def OMPInterchangeDirective : StmtNode; def OMPForDirective : StmtNode; def OMPForSimdDirective : StmtNode; def OMPSectionsDirective : StmtNode; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index ca91bffe24f6f..06376f173e8df 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -425,6 +425,12 @@ class SemaOpenMP : public SemaBase { /// Called on well-formed '#pragma omp reverse'. StmtResult ActOnOpenMPReverseDirective(Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed '#pragma omp interchange' after parsing of its + /// clauses and the associated statement. + StmtResult ActOnOpenMPInterchangeDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed '\#pragma omp for' after parsing /// of the associated statement. StmtResult diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index dee0d073557cc..5fbdfd7a496fe 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1857,6 +1857,7 @@ enum StmtCode { STMT_OMP_TILE_DIRECTIVE, STMT_OMP_UNROLL_DIRECTIVE, STMT_OMP_REVERSE_DIRECTIVE, + STMT_OMP_INTERCHANGE_DIRECTIVE, STMT_OMP_FOR_DIRECTIVE, STMT_OMP_FOR_SIMD_DIRECTIVE, STMT_OMP_SECTIONS_DIRECTIVE, diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 83b8a08e9af73..24d8eb25c59ba 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -467,6 +467,26 @@ OMPReverseDirective *OMPReverseDirective::CreateEmpty(const ASTContext &C, SourceLocation(), SourceLocation()); } +OMPInterchangeDirective *OMPInterchangeDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef Clauses, unsigned NumLoops, Stmt *AssociatedStmt, + Stmt *TransformedStmt, Stmt *PreInits) { + OMPInterchangeDirective *Dir = createDirective( + C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc, + NumLoops); + Dir->setTransformedStmt(TransformedStmt); + Dir->setPreInits(PreInits); + return Dir; +} + +OMPInterchangeDirective * +OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, + unsigned NumLoops) { + return createEmptyDirective( + C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, + SourceLocation(), SourceLocation(), NumLoops); +} + OMPForSimdDirective * OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 64b481f680311..64bee75b205ae 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -768,6 +768,11 @@ void StmtPrinter::VisitOMPReverseDirective(OMPReverseDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) { + Indent() << "#pragma omp interchange"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) { Indent() << "#pragma omp for"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 7445e5519b972..1ae99d43575a7 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -989,6 +989,11 @@ void StmtProfiler::VisitOMPReverseDirective(const OMPReverseDirective *S) { VisitOMPLoopTransformationDirective(S); } +void StmtProfiler::VisitOMPInterchangeDirective( + const OMPInterchangeDirective *S) { + VisitOMPLoopTransformationDirective(S); +} + void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) { VisitOMPLoopDirective(S); } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 803808c38e2fe..ff5d5c8bdc981 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -684,7 +684,8 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { } bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { - return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse; + return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse || + DKind == OMPD_interchange; } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { @@ -809,6 +810,7 @@ void clang::getOpenMPCaptureRegions( case OMPD_tile: case OMPD_unroll: case OMPD_reverse: + case OMPD_interchange: // loop transformations do not introduce captures. break; case OMPD_threadprivate: diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 93c2f8900dd12..ba7c52cc6ab7b 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -225,6 +225,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::OMPReverseDirectiveClass: EmitOMPReverseDirective(cast(*S)); break; + case Stmt::OMPInterchangeDirectiveClass: + EmitOMPInterchangeDirective(cast(*S)); + break; case Stmt::OMPForDirectiveClass: EmitOMPForDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index ad6c044aa483b..7a37e452fb559 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -189,6 +189,9 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { PreInits = Unroll->getPreInits(); } else if (const auto *Reverse = dyn_cast(&S)) { PreInits = Reverse->getPreInits(); + } else if (const auto *Interchange = + dyn_cast(&S)) { + PreInits = Interchange->getPreInits(); } else { llvm_unreachable("Unknown loop-based directive kind."); } @@ -2770,6 +2773,13 @@ void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) { EmitStmt(S.getTransformedStmt()); } +void CodeGenFunction::EmitOMPInterchangeDirective( + const OMPInterchangeDirective &S) { + // Emit the de-sugared statement. + OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index ac738e1e82886..c2a8e65ca2d0a 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3808,6 +3808,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPTileDirective(const OMPTileDirective &S); void EmitOMPUnrollDirective(const OMPUnrollDirective &S); void EmitOMPReverseDirective(const OMPReverseDirective &S); + void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S); void EmitOMPForDirective(const OMPForDirective &S); void EmitOMPForSimdDirective(const OMPForSimdDirective &S); void EmitOMPSectionsDirective(const OMPSectionsDirective &S); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 57fcf6ce520ac..0e3e604203c86 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2385,6 +2385,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( case OMPD_tile: case OMPD_unroll: case OMPD_reverse: + case OMPD_interchange: case OMPD_task: case OMPD_taskyield: case OMPD_barrier: @@ -2804,6 +2805,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( case OMPD_tile: case OMPD_unroll: case OMPD_reverse: + case OMPD_interchange: case OMPD_for: case OMPD_for_simd: case OMPD_sections: diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 4de7183cde281..5991f496d3a0f 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1487,6 +1487,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPTileDirectiveClass: case Stmt::OMPUnrollDirectiveClass: case Stmt::OMPReverseDirectiveClass: + case Stmt::OMPInterchangeDirectiveClass: case Stmt::OMPSingleDirectiveClass: case Stmt::OMPTargetDataDirectiveClass: case Stmt::OMPTargetDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index c2fd4de933ae4..ef141003b7d61 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -39,6 +39,7 @@ #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/PointerEmbeddedInt.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Frontend/OpenMP/OMPAssume.h" @@ -4335,6 +4336,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, case OMPD_tile: case OMPD_unroll: case OMPD_reverse: + case OMPD_interchange: break; case OMPD_loop: // TODO: 'loop' may require additional parameters depending on the binding. @@ -6552,6 +6554,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( "reverse directive does not support any clauses"); Res = ActOnOpenMPReverseDirective(AStmt, StartLoc, EndLoc); break; + case OMPD_interchange: + Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc, + EndLoc); + break; case OMPD_for: Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); @@ -15139,6 +15145,8 @@ bool SemaOpenMP::checkTransformableLoopNest( DependentPreInits = Dir->getPreInits(); else if (auto *Dir = dyn_cast(Transform)) DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); else llvm_unreachable("Unhandled loop transformation"); @@ -15937,6 +15945,160 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, buildPreInits(Context, PreInits)); } +StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); + DeclContext *CurContext = SemaRef.CurContext; + Scope *CurScope = SemaRef.getCurScope(); + + // Empty statement should only be possible if there already was an error. + if (!AStmt) + return StmtError(); + + // interchange without permutation clause swaps two loops. + constexpr size_t NumLoops = 2; + + // Verify and diagnose loop nest. + SmallVector LoopHelpers(NumLoops); + Stmt *Body = nullptr; + SmallVector, 2> OriginalInits; + if (!checkTransformableLoopNest(OMPD_interchange, AStmt, NumLoops, + LoopHelpers, Body, OriginalInits)) + return StmtError(); + + // Delay interchange to when template is completely instantiated. + if (CurContext->isDependentContext()) + return OMPInterchangeDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, AStmt, nullptr, nullptr); + + assert(LoopHelpers.size() == NumLoops && + "Expecting loop iteration space dimensionaly to match number of " + "affected loops"); + assert(OriginalInits.size() == NumLoops && + "Expecting loop iteration space dimensionaly to match number of " + "affected loops"); + + // Decode the permutation clause. + constexpr uint64_t Permutation[] = {1, 0}; + + // Find the affected loops. + SmallVector LoopStmts(NumLoops, nullptr); + collectLoopStmts(AStmt, LoopStmts); + + // Collect pre-init statements on the order before the permuation. + SmallVector PreInits; + for (auto I : llvm::seq(NumLoops)) { + OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I]; + + assert(LoopHelper.Counters.size() == 1 && + "Single-dimensional loop iteration space expected"); + auto *OrigCntVar = cast(LoopHelper.Counters.front()); + + std::string OrigVarName = OrigCntVar->getNameInfo().getAsString(); + addLoopPreInits(Context, LoopHelper, LoopStmts[I], OriginalInits[I], + PreInits); + } + + SmallVector PermutedIndVars; + PermutedIndVars.resize(NumLoops); + CaptureVars CopyTransformer(SemaRef); + + // Create the permuted loops from the inside to the outside of the + // interchanged loop nest. Body of the innermost new loop is the original + // innermost body. + Stmt *Inner = Body; + for (auto TargetIdx : llvm::reverse(llvm::seq(NumLoops))) { + // Get the original loop that belongs to this new position. + uint64_t SourceIdx = Permutation[TargetIdx]; + OMPLoopBasedDirective::HelperExprs &SourceHelper = LoopHelpers[SourceIdx]; + Stmt *SourceLoopStmt = LoopStmts[SourceIdx]; + assert(SourceHelper.Counters.size() == 1 && + "Single-dimensional loop iteration space expected"); + auto *OrigCntVar = cast(SourceHelper.Counters.front()); + + // Normalized loop counter variable: From 0 to n-1, always an integer type. + DeclRefExpr *IterVarRef = cast(SourceHelper.IterationVarRef); + QualType IVTy = IterVarRef->getType(); + assert(IVTy->isIntegerType() && + "Expected the logical iteration counter to be an integer"); + + std::string OrigVarName = OrigCntVar->getNameInfo().getAsString(); + SourceLocation OrigVarLoc = IterVarRef->getExprLoc(); + + // Make a copy of the NumIterations expression for each use: By the AST + // constraints, every expression object in a DeclContext must be unique. + auto MakeNumIterations = [&CopyTransformer, &SourceHelper]() -> Expr * { + return AssertSuccess( + CopyTransformer.TransformExpr(SourceHelper.NumIterations)); + }; + + // Iteration variable for the permuted loop. Reuse the one from + // checkOpenMPLoop which will also be used to update the original loop + // variable. + std::string PermutedCntName = + (Twine(".permuted_") + llvm::utostr(TargetIdx) + ".iv." + OrigVarName) + .str(); + auto *PermutedCntDecl = cast(IterVarRef->getDecl()); + PermutedCntDecl->setDeclName( + &SemaRef.PP.getIdentifierTable().get(PermutedCntName)); + PermutedIndVars[TargetIdx] = PermutedCntDecl; + auto MakePermutedRef = [this, PermutedCntDecl, IVTy, OrigVarLoc]() { + return buildDeclRefExpr(SemaRef, PermutedCntDecl, IVTy, OrigVarLoc); + }; + + // For init-statement: + // \code{c} + // auto .permuted_{target}.iv = 0 + // \endcode + ExprResult Zero = SemaRef.ActOnIntegerConstant(OrigVarLoc, 0); + if (!Zero.isUsable()) + return StmtError(); + SemaRef.AddInitializerToDecl(PermutedCntDecl, Zero.get(), + /*DirectInit=*/false); + StmtResult InitStmt = new (Context) + DeclStmt(DeclGroupRef(PermutedCntDecl), OrigCntVar->getBeginLoc(), + OrigCntVar->getEndLoc()); + if (!InitStmt.isUsable()) + return StmtError(); + + // For cond-expression: + // \code{c} + // .permuted_{target}.iv < NumIterations + // \endcode + ExprResult CondExpr = + SemaRef.BuildBinOp(CurScope, SourceHelper.Cond->getExprLoc(), BO_LT, + MakePermutedRef(), MakeNumIterations()); + if (!CondExpr.isUsable()) + return StmtError(); + + // For incr-statement: + // \code{c} + // ++.tile.iv + // \endcode + ExprResult IncrStmt = SemaRef.BuildUnaryOp( + CurScope, SourceHelper.Inc->getExprLoc(), UO_PreInc, MakePermutedRef()); + if (!IncrStmt.isUsable()) + return StmtError(); + + SmallVector BodyParts(SourceHelper.Updates.begin(), + SourceHelper.Updates.end()); + if (auto *SourceCXXFor = dyn_cast(SourceLoopStmt)) + BodyParts.push_back(SourceCXXFor->getLoopVarStmt()); + BodyParts.push_back(Inner); + Inner = CompoundStmt::Create(Context, BodyParts, FPOptionsOverride(), + Inner->getBeginLoc(), Inner->getEndLoc()); + Inner = new (Context) ForStmt( + Context, InitStmt.get(), CondExpr.get(), nullptr, IncrStmt.get(), Inner, + SourceHelper.Init->getBeginLoc(), SourceHelper.Init->getBeginLoc(), + SourceHelper.Inc->getEndLoc()); + } + + return OMPInterchangeDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, AStmt, Inner, + buildPreInits(Context, PreInits)); +} + OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index c1a08da885507..8da35f522d8ff 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9184,6 +9184,17 @@ TreeTransform::TransformOMPReverseDirective(OMPReverseDirective *D) { return Res; } +template +StmtResult TreeTransform::TransformOMPInterchangeDirective( + OMPInterchangeDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPForDirective(OMPForDirective *D) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index ff72679f993e2..92e14473e7b51 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2432,6 +2432,10 @@ void ASTStmtReader::VisitOMPReverseDirective(OMPReverseDirective *D) { VisitOMPLoopTransformationDirective(D); } +void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); D->setHasCancel(Record.readBool()); @@ -3448,6 +3452,13 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; } + case STMT_OMP_INTERCHANGE_DIRECTIVE: { + unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; + S = OMPInterchangeDirective::CreateEmpty(Context, NumClauses, NumLoops); + break; + } + case STMT_OMP_FOR_DIRECTIVE: { unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields]; unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index ffb09ece981e6..0ac8e1967e17c 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2417,6 +2417,11 @@ void ASTStmtWriter::VisitOMPReverseDirective(OMPReverseDirective *D) { Code = serialization::STMT_OMP_REVERSE_DIRECTIVE; } +void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { + VisitOMPLoopTransformationDirective(D); + Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); Record.writeBool(D->hasCancel()); diff --git a/clang/test/OpenMP/interchange_ast_print.cpp b/clang/test/OpenMP/interchange_ast_print.cpp new file mode 100644 index 0000000000000..f8bf075cd300f --- /dev/null +++ b/clang/test/OpenMP/interchange_ast_print.cpp @@ -0,0 +1,135 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check AST and unparsing +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code. +extern "C" void body(...); + +// PRINT-LABEL: void foo1( +// DUMP-LABEL: FunctionDecl {{.*}} foo1 +void foo1() { + // PRINT: #pragma omp interchange + // DUMP: OMPInterchangeDirective + #pragma omp interchange + // PRINT: for (int i = 7; i < 17; i += 3) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 3) + // PRINT: for (int j = 7; j < 17; j += 3) + // DUMP: ForStmt + for (int j = 7; j < 17; j += 3) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + + + +// PRINT-LABEL: void foo3( +// DUMP-LABEL: FunctionDecl {{.*}} foo3 +void foo3() { + // PRINT: #pragma omp for collapse(3) + // DUMP: OMPForDirective + // DUMP-NEXT: OMPCollapseClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 3 + // DUMP-NEXT: IntegerLiteral {{.*}} 3 + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for collapse(3) + // PRINT: #pragma omp interchange + // DUMP: OMPInterchangeDirective + #pragma omp interchange + // PRINT: for (int i = 7; i < 17; i += 1) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 1) + // PRINT: for (int j = 7; j < 17; j += 1) + // DUMP: ForStmt + for (int j = 7; j < 17; j += 1) + // PRINT: for (int k = 7; k < 17; k += 1) + // DUMP: ForStmt + for (int k = 7; k < 17; k += 1) + // PRINT: body(i, j, k); + // DUMP: CallExpr + body(i, j, k); +} + + +// PRINT-LABEL: void foo6( +// DUMP-LABEL: FunctionTemplateDecl {{.*}} foo6 +template +void foo6() { + // PRINT: #pragma omp interchange + // DUMP: OMPInterchangeDirective + #pragma omp interchange + // PRINT-NEXT: for (int i = 0; i < 11; i += 2) + // DUMP-NEXT: ForStmt + for (int i = 0; i < 11; i += 2) + // PRINT-NEXT: #pragma omp tile sizes(Tile) + // DUMP: OMPTileDirective + #pragma omp tile sizes(Tile) + // PRINT-NEXT: for (int j = 0; j < 13; j += 2) + // DUMP: ForStmt + for (int j = 0; j < 13; j += 2) + // PRINT-NEXT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + +// Also test instantiating the template. +void tfoo6() { + foo6<32>(); +} + + +// PRINT-LABEL: void foo7( +// DUMP-LABEL: FunctionDecl {{.*}} foo7 +void foo7() { + double arr[128]; + // PRINT: #pragma omp interchange + // DUMP: OMPInterchangeDirective + #pragma omp interchange + // PRINT-NEXT: for (double c = 42; auto &&v : arr) + // DUMP-NEXT: CXXForRangeStmt + for (double c = 42; auto &&v : arr) + // PRINT-NEXT: for (int i = 0; i < 42; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 42; i += 2) + // PRINT-NEXT: body(c, v, i); + // DUMP: CallExpr + body(c, v, i); +} + + +// PRINT-LABEL: void foo8( +// DUMP-LABEL: FunctionDecl {{.*}} foo8 +void foo8() { + double arr[128]; + // PRINT: #pragma omp interchange + // DUMP: OMPInterchangeDirective + #pragma omp interchange + // PRINT-NEXT: for (int i = 0; i < 42; i += 2) + // DUMP-NEXT: ForStmt + for (int i = 0; i < 42; i += 2) + // PRINT-NEXT: for (double c = 42; auto &&v : arr) + // DUMP: CXXForRangeStmt + for (double c = 42; auto &&v : arr) + // PRINT-NEXT: body(i, c, v); + // DUMP: CallExpr + body(i, c, v); +} + +#endif + diff --git a/clang/test/OpenMP/interchange_codegen.cpp b/clang/test/OpenMP/interchange_codegen.cpp new file mode 100644 index 0000000000000..9c1782183cf98 --- /dev/null +++ b/clang/test/OpenMP/interchange_codegen.cpp @@ -0,0 +1,1990 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ + +// expected-no-diagnostics + +// Check code generation +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 + +// Check same results after serialization round-trip +// FIXME: They should be exactly the same but currently differ in function order +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code. +extern "C" void body(...) {} + + + + +extern "C" void foo2(int start1, int start2, int end1, int end2, int step1, int step2) { +#pragma omp interchange + for (int i = start1; i < end1; i += step1) + for (int j = start2; j < end2; j += step2) + body(i, j); +} + + +extern "C" void foo3() { +#pragma omp for +#pragma omp interchange + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(i, j); +} + + +extern "C" void foo4() { +#pragma omp for collapse(2) + for (int k = 7; k < 17; k += 3) +#pragma omp interchange + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(i, j); +} + + +extern "C" void foo6() { +#pragma omp for collapse(4) + for (int i = 7; i < 17; i += 3) +#pragma omp interchange + for (int j = 7; j < 17; j += 3) + for (int k = 7; k < 17; k += 3) + for (int l = 7; l < 17; l += 3) + body(i, j, k, l); +} + + +extern "C" void foo9() { + double arr[128]; + #pragma omp interchange + for (double c = 42; auto && v : arr) + for (int i = 0; i < 42; i += 2) + body(c, v, i); +} + + +extern "C" void foo10() { + double A[128], B[16]; + #pragma omp for collapse(4) + for (int i = 0; i < 128; ++i) + #pragma omp interchange + for (double c = 42; auto aa : A) + for (double d = 42; auto &bb : B) + for (int j = 0; j < 128; ++j) + body(i, c, aa, d, bb, j); +} + +#endif /* HEADER */ + +// CHECK1-LABEL: define {{[^@]+}}@body +// CHECK1-SAME: (...) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo2 +// CHECK1-SAME: (i32 noundef [[START1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTPERMUTED_0_IV_J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTPERMUTED_1_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTNEW_STEP7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[SUB9:%.*]] = sub i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[SUB9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTNEW_STEP7]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[SUB10]], [[TMP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP7]], align 4 +// CHECK1-NEXT: [[DIV12:%.*]] = udiv i32 [[ADD11]], [[TMP15]] +// CHECK1-NEXT: [[SUB13:%.*]] = sub i32 [[DIV12]], 1 +// CHECK1-NEXT: store i32 [[SUB13]], ptr [[DOTCAPTURE_EXPR_8]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_8]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add i32 [[TMP17]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP16]], [[ADD14]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END24:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTNEW_STEP7]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND16:%.*]] +// CHECK1: for.cond16: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP22]], 1 +// CHECK1-NEXT: [[CMP18:%.*]] = icmp ult i32 [[TMP21]], [[ADD17]] +// CHECK1-NEXT: br i1 [[CMP18]], label [[FOR_BODY19:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body19: +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL20:%.*]] = mul i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[ADD21:%.*]] = add i32 [[TMP23]], [[MUL20]] +// CHECK1-NEXT: store i32 [[ADD21]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP26]], i32 noundef [[TMP27]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP28]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND16]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: br label [[FOR_INC22:%.*]] +// CHECK1: for.inc22: +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK1-NEXT: [[INC23:%.*]] = add i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[INC23]], ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1: for.end24: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo3 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTPERMUTED_0_IV_J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTPERMUTED_1_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 3 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP7]], 3 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 7, [[MUL2]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP8]], 4 +// CHECK1-NEXT: br i1 [[CMP4]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 7, [[MUL5]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP10]], i32 noundef [[TMP11]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo4 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTPERMUTED_0_IV_J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTPERMUTED_1_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 15, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 15 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 15, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP8]], 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL4]] +// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 7, [[MUL7]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP10]], 4 +// CHECK1-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP11]], 3 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 7, [[MUL10]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP12]], i32 noundef [[TMP13]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo6 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTPERMUTED_0_IV_K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTPERMUTED_1_IV_J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK1-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 7, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 255, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 255 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 255, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP8]], 64 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 64 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL6]] +// CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB]], 16 +// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTPERMUTED_0_IV_K]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV10:%.*]] = sdiv i32 [[TMP10]], 64 +// CHECK1-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 64 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP9]], [[MUL11]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[TMP12]], 64 +// CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 64 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP11]], [[MUL14]] +// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 16 +// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 16 +// CHECK1-NEXT: [[SUB18:%.*]] = sub nsw i32 [[SUB12]], [[MUL17]] +// CHECK1-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 4 +// CHECK1-NEXT: [[MUL20:%.*]] = mul nsw i32 [[DIV19]], 1 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 0, [[MUL20]] +// CHECK1-NEXT: store i32 [[ADD21]], ptr [[DOTPERMUTED_1_IV_J]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV22:%.*]] = sdiv i32 [[TMP14]], 64 +// CHECK1-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 64 +// CHECK1-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP13]], [[MUL23]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV25:%.*]] = sdiv i32 [[TMP16]], 64 +// CHECK1-NEXT: [[MUL26:%.*]] = mul nsw i32 [[DIV25]], 64 +// CHECK1-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP15]], [[MUL26]] +// CHECK1-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 16 +// CHECK1-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 16 +// CHECK1-NEXT: [[SUB30:%.*]] = sub nsw i32 [[SUB24]], [[MUL29]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV31:%.*]] = sdiv i32 [[TMP18]], 64 +// CHECK1-NEXT: [[MUL32:%.*]] = mul nsw i32 [[DIV31]], 64 +// CHECK1-NEXT: [[SUB33:%.*]] = sub nsw i32 [[TMP17]], [[MUL32]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV34:%.*]] = sdiv i32 [[TMP20]], 64 +// CHECK1-NEXT: [[MUL35:%.*]] = mul nsw i32 [[DIV34]], 64 +// CHECK1-NEXT: [[SUB36:%.*]] = sub nsw i32 [[TMP19]], [[MUL35]] +// CHECK1-NEXT: [[DIV37:%.*]] = sdiv i32 [[SUB36]], 16 +// CHECK1-NEXT: [[MUL38:%.*]] = mul nsw i32 [[DIV37]], 16 +// CHECK1-NEXT: [[SUB39:%.*]] = sub nsw i32 [[SUB33]], [[MUL38]] +// CHECK1-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 4 +// CHECK1-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 4 +// CHECK1-NEXT: [[SUB42:%.*]] = sub nsw i32 [[SUB30]], [[MUL41]] +// CHECK1-NEXT: [[MUL43:%.*]] = mul nsw i32 [[SUB42]], 3 +// CHECK1-NEXT: [[ADD44:%.*]] = add nsw i32 7, [[MUL43]] +// CHECK1-NEXT: store i32 [[ADD44]], ptr [[L]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_K]], align 4 +// CHECK1-NEXT: [[MUL45:%.*]] = mul nsw i32 [[TMP21]], 3 +// CHECK1-NEXT: [[ADD46:%.*]] = add nsw i32 7, [[MUL45]] +// CHECK1-NEXT: store i32 [[ADD46]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_J]], align 4 +// CHECK1-NEXT: [[MUL47:%.*]] = mul nsw i32 [[TMP22]], 3 +// CHECK1-NEXT: [[ADD48:%.*]] = add nsw i32 7, [[MUL47]] +// CHECK1-NEXT: store i32 [[ADD48]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[L]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP23]], i32 noundef [[TMP24]], i32 noundef [[TMP25]], i32 noundef [[TMP26]]) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo9 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[ARR:%.*]] = alloca [128 x double], align 16 +// CHECK1-NEXT: [[C:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTPERMUTED_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTPERMUTED_1_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store double 4.200000e+01, ptr [[C]], align 8 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 128 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY1]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK1-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTPERMUTED_0_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], 21 +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END15:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_I]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 2 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[I]], align 4 +// CHECK1-NEXT: store i64 0, ptr [[DOTPERMUTED_1_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND7:%.*]] +// CHECK1: for.cond7: +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPERMUTED_1_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP9]], 1 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i64 [[TMP8]], [[ADD8]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body10: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPERMUTED_1_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[MUL11:%.*]] = mul nsw i64 [[TMP11]], 1 +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i64 [[MUL11]] +// CHECK1-NEXT: store ptr [[ADD_PTR12]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[C]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(double noundef [[TMP13]], double noundef [[TMP15]], i32 noundef [[TMP16]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPERMUTED_1_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP17]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTPERMUTED_1_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: br label [[FOR_INC13:%.*]] +// CHECK1: for.inc13: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_I]], align 4 +// CHECK1-NEXT: [[INC14:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: store i32 [[INC14]], ptr [[DOTPERMUTED_0_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1: for.end15: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo10 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A:%.*]] = alloca [128 x double], align 16 +// CHECK1-NEXT: [[B:%.*]] = alloca [16 x double], align 16 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[__RANGE3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[D:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[__RANGE4:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END4:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN4:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_14:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_15:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_24:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_26:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_28:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTPERMUTED_0_IV___BEGIN4:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPERMUTED_1_IV___BEGIN3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I37:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTPERMUTED_0_IV___BEGIN438:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPERMUTED_1_IV___BEGIN339:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[J40:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[BB:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK1-NEXT: store double 4.200000e+01, ptr [[C]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[__RANGE3]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 128 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END3]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY4]], ptr [[__BEGIN3]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY5]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK1-NEXT: store i64 [[SUB8]], ptr [[DOTCAPTURE_EXPR_7]], align 8 +// CHECK1-NEXT: store double 4.200000e+01, ptr [[D]], align 8 +// CHECK1-NEXT: store ptr [[B]], ptr [[__RANGE4]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[__RANGE4]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [16 x double], ptr [[TMP7]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY9]], i64 16 +// CHECK1-NEXT: store ptr [[ADD_PTR10]], ptr [[__END4]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE4]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY11:%.*]] = getelementptr inbounds [16 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY11]], ptr [[__BEGIN4]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE4]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY13:%.*]] = getelementptr inbounds [16 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY13]], ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__END4]], align 8 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[DOTCAPTURE_EXPR_14]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_14]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST16:%.*]] = ptrtoint ptr [[TMP11]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST17:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB18:%.*]] = sub i64 [[SUB_PTR_LHS_CAST16]], [[SUB_PTR_RHS_CAST17]] +// CHECK1-NEXT: [[SUB_PTR_DIV19:%.*]] = sdiv exact i64 [[SUB_PTR_SUB18]], 8 +// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i64 [[SUB_PTR_DIV19]], 1 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[SUB20]], 1 +// CHECK1-NEXT: [[DIV22:%.*]] = sdiv i64 [[ADD21]], 1 +// CHECK1-NEXT: [[SUB23:%.*]] = sub nsw i64 [[DIV22]], 1 +// CHECK1-NEXT: store i64 [[SUB23]], ptr [[DOTCAPTURE_EXPR_15]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_15]], align 8 +// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i64 [[TMP13]], 1 +// CHECK1-NEXT: store i64 [[ADD25]], ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_7]], align 8 +// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD27]], ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB29:%.*]] = sub nsw i64 [[TMP15]], 0 +// CHECK1-NEXT: [[DIV30:%.*]] = sdiv i64 [[SUB29]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 128, [[DIV30]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB31:%.*]] = sub nsw i64 [[TMP16]], 0 +// CHECK1-NEXT: [[DIV32:%.*]] = sdiv i64 [[SUB31]], 1 +// CHECK1-NEXT: [[MUL33:%.*]] = mul nsw i64 [[MUL]], [[DIV32]] +// CHECK1-NEXT: [[MUL34:%.*]] = mul nsw i64 [[MUL33]], 128 +// CHECK1-NEXT: [[SUB35:%.*]] = sub nsw i64 [[MUL34]], 1 +// CHECK1-NEXT: store i64 [[SUB35]], ptr [[DOTCAPTURE_EXPR_28]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: store i64 0, ptr [[DOTPERMUTED_0_IV___BEGIN4]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTPERMUTED_1_IV___BEGIN3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: land.lhs.true: +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[CMP36:%.*]] = icmp slt i64 0, [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP36]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_28]], align 8 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_28]], align 8 +// CHECK1-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_28]], align 8 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP42:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB43:%.*]] = sub nsw i64 [[TMP28]], 0 +// CHECK1-NEXT: [[DIV44:%.*]] = sdiv i64 [[SUB43]], 1 +// CHECK1-NEXT: [[MUL45:%.*]] = mul nsw i64 1, [[DIV44]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB46:%.*]] = sub nsw i64 [[TMP29]], 0 +// CHECK1-NEXT: [[DIV47:%.*]] = sdiv i64 [[SUB46]], 1 +// CHECK1-NEXT: [[MUL48:%.*]] = mul nsw i64 [[MUL45]], [[DIV47]] +// CHECK1-NEXT: [[MUL49:%.*]] = mul nsw i64 [[MUL48]], 128 +// CHECK1-NEXT: [[DIV50:%.*]] = sdiv i64 [[TMP27]], [[MUL49]] +// CHECK1-NEXT: [[MUL51:%.*]] = mul nsw i64 [[DIV50]], 1 +// CHECK1-NEXT: [[ADD52:%.*]] = add nsw i64 0, [[MUL51]] +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[ADD52]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[I37]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB53:%.*]] = sub nsw i64 [[TMP32]], 0 +// CHECK1-NEXT: [[DIV54:%.*]] = sdiv i64 [[SUB53]], 1 +// CHECK1-NEXT: [[MUL55:%.*]] = mul nsw i64 1, [[DIV54]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB56:%.*]] = sub nsw i64 [[TMP33]], 0 +// CHECK1-NEXT: [[DIV57:%.*]] = sdiv i64 [[SUB56]], 1 +// CHECK1-NEXT: [[MUL58:%.*]] = mul nsw i64 [[MUL55]], [[DIV57]] +// CHECK1-NEXT: [[MUL59:%.*]] = mul nsw i64 [[MUL58]], 128 +// CHECK1-NEXT: [[DIV60:%.*]] = sdiv i64 [[TMP31]], [[MUL59]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB61:%.*]] = sub nsw i64 [[TMP34]], 0 +// CHECK1-NEXT: [[DIV62:%.*]] = sdiv i64 [[SUB61]], 1 +// CHECK1-NEXT: [[MUL63:%.*]] = mul nsw i64 1, [[DIV62]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB64:%.*]] = sub nsw i64 [[TMP35]], 0 +// CHECK1-NEXT: [[DIV65:%.*]] = sdiv i64 [[SUB64]], 1 +// CHECK1-NEXT: [[MUL66:%.*]] = mul nsw i64 [[MUL63]], [[DIV65]] +// CHECK1-NEXT: [[MUL67:%.*]] = mul nsw i64 [[MUL66]], 128 +// CHECK1-NEXT: [[MUL68:%.*]] = mul nsw i64 [[DIV60]], [[MUL67]] +// CHECK1-NEXT: [[SUB69:%.*]] = sub nsw i64 [[TMP30]], [[MUL68]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB70:%.*]] = sub nsw i64 [[TMP36]], 0 +// CHECK1-NEXT: [[DIV71:%.*]] = sdiv i64 [[SUB70]], 1 +// CHECK1-NEXT: [[MUL72:%.*]] = mul nsw i64 1, [[DIV71]] +// CHECK1-NEXT: [[MUL73:%.*]] = mul nsw i64 [[MUL72]], 128 +// CHECK1-NEXT: [[DIV74:%.*]] = sdiv i64 [[SUB69]], [[MUL73]] +// CHECK1-NEXT: [[MUL75:%.*]] = mul nsw i64 [[DIV74]], 1 +// CHECK1-NEXT: [[ADD76:%.*]] = add nsw i64 0, [[MUL75]] +// CHECK1-NEXT: store i64 [[ADD76]], ptr [[DOTPERMUTED_0_IV___BEGIN438]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB77:%.*]] = sub nsw i64 [[TMP39]], 0 +// CHECK1-NEXT: [[DIV78:%.*]] = sdiv i64 [[SUB77]], 1 +// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 1, [[DIV78]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB80:%.*]] = sub nsw i64 [[TMP40]], 0 +// CHECK1-NEXT: [[DIV81:%.*]] = sdiv i64 [[SUB80]], 1 +// CHECK1-NEXT: [[MUL82:%.*]] = mul nsw i64 [[MUL79]], [[DIV81]] +// CHECK1-NEXT: [[MUL83:%.*]] = mul nsw i64 [[MUL82]], 128 +// CHECK1-NEXT: [[DIV84:%.*]] = sdiv i64 [[TMP38]], [[MUL83]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB85:%.*]] = sub nsw i64 [[TMP41]], 0 +// CHECK1-NEXT: [[DIV86:%.*]] = sdiv i64 [[SUB85]], 1 +// CHECK1-NEXT: [[MUL87:%.*]] = mul nsw i64 1, [[DIV86]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB88:%.*]] = sub nsw i64 [[TMP42]], 0 +// CHECK1-NEXT: [[DIV89:%.*]] = sdiv i64 [[SUB88]], 1 +// CHECK1-NEXT: [[MUL90:%.*]] = mul nsw i64 [[MUL87]], [[DIV89]] +// CHECK1-NEXT: [[MUL91:%.*]] = mul nsw i64 [[MUL90]], 128 +// CHECK1-NEXT: [[MUL92:%.*]] = mul nsw i64 [[DIV84]], [[MUL91]] +// CHECK1-NEXT: [[SUB93:%.*]] = sub nsw i64 [[TMP37]], [[MUL92]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB94:%.*]] = sub nsw i64 [[TMP45]], 0 +// CHECK1-NEXT: [[DIV95:%.*]] = sdiv i64 [[SUB94]], 1 +// CHECK1-NEXT: [[MUL96:%.*]] = mul nsw i64 1, [[DIV95]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB97:%.*]] = sub nsw i64 [[TMP46]], 0 +// CHECK1-NEXT: [[DIV98:%.*]] = sdiv i64 [[SUB97]], 1 +// CHECK1-NEXT: [[MUL99:%.*]] = mul nsw i64 [[MUL96]], [[DIV98]] +// CHECK1-NEXT: [[MUL100:%.*]] = mul nsw i64 [[MUL99]], 128 +// CHECK1-NEXT: [[DIV101:%.*]] = sdiv i64 [[TMP44]], [[MUL100]] +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB102:%.*]] = sub nsw i64 [[TMP47]], 0 +// CHECK1-NEXT: [[DIV103:%.*]] = sdiv i64 [[SUB102]], 1 +// CHECK1-NEXT: [[MUL104:%.*]] = mul nsw i64 1, [[DIV103]] +// CHECK1-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB105:%.*]] = sub nsw i64 [[TMP48]], 0 +// CHECK1-NEXT: [[DIV106:%.*]] = sdiv i64 [[SUB105]], 1 +// CHECK1-NEXT: [[MUL107:%.*]] = mul nsw i64 [[MUL104]], [[DIV106]] +// CHECK1-NEXT: [[MUL108:%.*]] = mul nsw i64 [[MUL107]], 128 +// CHECK1-NEXT: [[MUL109:%.*]] = mul nsw i64 [[DIV101]], [[MUL108]] +// CHECK1-NEXT: [[SUB110:%.*]] = sub nsw i64 [[TMP43]], [[MUL109]] +// CHECK1-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB111:%.*]] = sub nsw i64 [[TMP49]], 0 +// CHECK1-NEXT: [[DIV112:%.*]] = sdiv i64 [[SUB111]], 1 +// CHECK1-NEXT: [[MUL113:%.*]] = mul nsw i64 1, [[DIV112]] +// CHECK1-NEXT: [[MUL114:%.*]] = mul nsw i64 [[MUL113]], 128 +// CHECK1-NEXT: [[DIV115:%.*]] = sdiv i64 [[SUB110]], [[MUL114]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB116:%.*]] = sub nsw i64 [[TMP50]], 0 +// CHECK1-NEXT: [[DIV117:%.*]] = sdiv i64 [[SUB116]], 1 +// CHECK1-NEXT: [[MUL118:%.*]] = mul nsw i64 1, [[DIV117]] +// CHECK1-NEXT: [[MUL119:%.*]] = mul nsw i64 [[MUL118]], 128 +// CHECK1-NEXT: [[MUL120:%.*]] = mul nsw i64 [[DIV115]], [[MUL119]] +// CHECK1-NEXT: [[SUB121:%.*]] = sub nsw i64 [[SUB93]], [[MUL120]] +// CHECK1-NEXT: [[DIV122:%.*]] = sdiv i64 [[SUB121]], 128 +// CHECK1-NEXT: [[MUL123:%.*]] = mul nsw i64 [[DIV122]], 1 +// CHECK1-NEXT: [[ADD124:%.*]] = add nsw i64 0, [[MUL123]] +// CHECK1-NEXT: store i64 [[ADD124]], ptr [[DOTPERMUTED_1_IV___BEGIN339]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB125:%.*]] = sub nsw i64 [[TMP53]], 0 +// CHECK1-NEXT: [[DIV126:%.*]] = sdiv i64 [[SUB125]], 1 +// CHECK1-NEXT: [[MUL127:%.*]] = mul nsw i64 1, [[DIV126]] +// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB128:%.*]] = sub nsw i64 [[TMP54]], 0 +// CHECK1-NEXT: [[DIV129:%.*]] = sdiv i64 [[SUB128]], 1 +// CHECK1-NEXT: [[MUL130:%.*]] = mul nsw i64 [[MUL127]], [[DIV129]] +// CHECK1-NEXT: [[MUL131:%.*]] = mul nsw i64 [[MUL130]], 128 +// CHECK1-NEXT: [[DIV132:%.*]] = sdiv i64 [[TMP52]], [[MUL131]] +// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB133:%.*]] = sub nsw i64 [[TMP55]], 0 +// CHECK1-NEXT: [[DIV134:%.*]] = sdiv i64 [[SUB133]], 1 +// CHECK1-NEXT: [[MUL135:%.*]] = mul nsw i64 1, [[DIV134]] +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB136:%.*]] = sub nsw i64 [[TMP56]], 0 +// CHECK1-NEXT: [[DIV137:%.*]] = sdiv i64 [[SUB136]], 1 +// CHECK1-NEXT: [[MUL138:%.*]] = mul nsw i64 [[MUL135]], [[DIV137]] +// CHECK1-NEXT: [[MUL139:%.*]] = mul nsw i64 [[MUL138]], 128 +// CHECK1-NEXT: [[MUL140:%.*]] = mul nsw i64 [[DIV132]], [[MUL139]] +// CHECK1-NEXT: [[SUB141:%.*]] = sub nsw i64 [[TMP51]], [[MUL140]] +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB142:%.*]] = sub nsw i64 [[TMP59]], 0 +// CHECK1-NEXT: [[DIV143:%.*]] = sdiv i64 [[SUB142]], 1 +// CHECK1-NEXT: [[MUL144:%.*]] = mul nsw i64 1, [[DIV143]] +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB145:%.*]] = sub nsw i64 [[TMP60]], 0 +// CHECK1-NEXT: [[DIV146:%.*]] = sdiv i64 [[SUB145]], 1 +// CHECK1-NEXT: [[MUL147:%.*]] = mul nsw i64 [[MUL144]], [[DIV146]] +// CHECK1-NEXT: [[MUL148:%.*]] = mul nsw i64 [[MUL147]], 128 +// CHECK1-NEXT: [[DIV149:%.*]] = sdiv i64 [[TMP58]], [[MUL148]] +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB150:%.*]] = sub nsw i64 [[TMP61]], 0 +// CHECK1-NEXT: [[DIV151:%.*]] = sdiv i64 [[SUB150]], 1 +// CHECK1-NEXT: [[MUL152:%.*]] = mul nsw i64 1, [[DIV151]] +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB153:%.*]] = sub nsw i64 [[TMP62]], 0 +// CHECK1-NEXT: [[DIV154:%.*]] = sdiv i64 [[SUB153]], 1 +// CHECK1-NEXT: [[MUL155:%.*]] = mul nsw i64 [[MUL152]], [[DIV154]] +// CHECK1-NEXT: [[MUL156:%.*]] = mul nsw i64 [[MUL155]], 128 +// CHECK1-NEXT: [[MUL157:%.*]] = mul nsw i64 [[DIV149]], [[MUL156]] +// CHECK1-NEXT: [[SUB158:%.*]] = sub nsw i64 [[TMP57]], [[MUL157]] +// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB159:%.*]] = sub nsw i64 [[TMP63]], 0 +// CHECK1-NEXT: [[DIV160:%.*]] = sdiv i64 [[SUB159]], 1 +// CHECK1-NEXT: [[MUL161:%.*]] = mul nsw i64 1, [[DIV160]] +// CHECK1-NEXT: [[MUL162:%.*]] = mul nsw i64 [[MUL161]], 128 +// CHECK1-NEXT: [[DIV163:%.*]] = sdiv i64 [[SUB158]], [[MUL162]] +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB164:%.*]] = sub nsw i64 [[TMP64]], 0 +// CHECK1-NEXT: [[DIV165:%.*]] = sdiv i64 [[SUB164]], 1 +// CHECK1-NEXT: [[MUL166:%.*]] = mul nsw i64 1, [[DIV165]] +// CHECK1-NEXT: [[MUL167:%.*]] = mul nsw i64 [[MUL166]], 128 +// CHECK1-NEXT: [[MUL168:%.*]] = mul nsw i64 [[DIV163]], [[MUL167]] +// CHECK1-NEXT: [[SUB169:%.*]] = sub nsw i64 [[SUB141]], [[MUL168]] +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB170:%.*]] = sub nsw i64 [[TMP67]], 0 +// CHECK1-NEXT: [[DIV171:%.*]] = sdiv i64 [[SUB170]], 1 +// CHECK1-NEXT: [[MUL172:%.*]] = mul nsw i64 1, [[DIV171]] +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB173:%.*]] = sub nsw i64 [[TMP68]], 0 +// CHECK1-NEXT: [[DIV174:%.*]] = sdiv i64 [[SUB173]], 1 +// CHECK1-NEXT: [[MUL175:%.*]] = mul nsw i64 [[MUL172]], [[DIV174]] +// CHECK1-NEXT: [[MUL176:%.*]] = mul nsw i64 [[MUL175]], 128 +// CHECK1-NEXT: [[DIV177:%.*]] = sdiv i64 [[TMP66]], [[MUL176]] +// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB178:%.*]] = sub nsw i64 [[TMP69]], 0 +// CHECK1-NEXT: [[DIV179:%.*]] = sdiv i64 [[SUB178]], 1 +// CHECK1-NEXT: [[MUL180:%.*]] = mul nsw i64 1, [[DIV179]] +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB181:%.*]] = sub nsw i64 [[TMP70]], 0 +// CHECK1-NEXT: [[DIV182:%.*]] = sdiv i64 [[SUB181]], 1 +// CHECK1-NEXT: [[MUL183:%.*]] = mul nsw i64 [[MUL180]], [[DIV182]] +// CHECK1-NEXT: [[MUL184:%.*]] = mul nsw i64 [[MUL183]], 128 +// CHECK1-NEXT: [[MUL185:%.*]] = mul nsw i64 [[DIV177]], [[MUL184]] +// CHECK1-NEXT: [[SUB186:%.*]] = sub nsw i64 [[TMP65]], [[MUL185]] +// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB187:%.*]] = sub nsw i64 [[TMP73]], 0 +// CHECK1-NEXT: [[DIV188:%.*]] = sdiv i64 [[SUB187]], 1 +// CHECK1-NEXT: [[MUL189:%.*]] = mul nsw i64 1, [[DIV188]] +// CHECK1-NEXT: [[TMP74:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB190:%.*]] = sub nsw i64 [[TMP74]], 0 +// CHECK1-NEXT: [[DIV191:%.*]] = sdiv i64 [[SUB190]], 1 +// CHECK1-NEXT: [[MUL192:%.*]] = mul nsw i64 [[MUL189]], [[DIV191]] +// CHECK1-NEXT: [[MUL193:%.*]] = mul nsw i64 [[MUL192]], 128 +// CHECK1-NEXT: [[DIV194:%.*]] = sdiv i64 [[TMP72]], [[MUL193]] +// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK1-NEXT: [[SUB195:%.*]] = sub nsw i64 [[TMP75]], 0 +// CHECK1-NEXT: [[DIV196:%.*]] = sdiv i64 [[SUB195]], 1 +// CHECK1-NEXT: [[MUL197:%.*]] = mul nsw i64 1, [[DIV196]] +// CHECK1-NEXT: [[TMP76:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB198:%.*]] = sub nsw i64 [[TMP76]], 0 +// CHECK1-NEXT: [[DIV199:%.*]] = sdiv i64 [[SUB198]], 1 +// CHECK1-NEXT: [[MUL200:%.*]] = mul nsw i64 [[MUL197]], [[DIV199]] +// CHECK1-NEXT: [[MUL201:%.*]] = mul nsw i64 [[MUL200]], 128 +// CHECK1-NEXT: [[MUL202:%.*]] = mul nsw i64 [[DIV194]], [[MUL201]] +// CHECK1-NEXT: [[SUB203:%.*]] = sub nsw i64 [[TMP71]], [[MUL202]] +// CHECK1-NEXT: [[TMP77:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB204:%.*]] = sub nsw i64 [[TMP77]], 0 +// CHECK1-NEXT: [[DIV205:%.*]] = sdiv i64 [[SUB204]], 1 +// CHECK1-NEXT: [[MUL206:%.*]] = mul nsw i64 1, [[DIV205]] +// CHECK1-NEXT: [[MUL207:%.*]] = mul nsw i64 [[MUL206]], 128 +// CHECK1-NEXT: [[DIV208:%.*]] = sdiv i64 [[SUB203]], [[MUL207]] +// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK1-NEXT: [[SUB209:%.*]] = sub nsw i64 [[TMP78]], 0 +// CHECK1-NEXT: [[DIV210:%.*]] = sdiv i64 [[SUB209]], 1 +// CHECK1-NEXT: [[MUL211:%.*]] = mul nsw i64 1, [[DIV210]] +// CHECK1-NEXT: [[MUL212:%.*]] = mul nsw i64 [[MUL211]], 128 +// CHECK1-NEXT: [[MUL213:%.*]] = mul nsw i64 [[DIV208]], [[MUL212]] +// CHECK1-NEXT: [[SUB214:%.*]] = sub nsw i64 [[SUB186]], [[MUL213]] +// CHECK1-NEXT: [[DIV215:%.*]] = sdiv i64 [[SUB214]], 128 +// CHECK1-NEXT: [[MUL216:%.*]] = mul nsw i64 [[DIV215]], 128 +// CHECK1-NEXT: [[SUB217:%.*]] = sub nsw i64 [[SUB169]], [[MUL216]] +// CHECK1-NEXT: [[MUL218:%.*]] = mul nsw i64 [[SUB217]], 1 +// CHECK1-NEXT: [[ADD219:%.*]] = add nsw i64 0, [[MUL218]] +// CHECK1-NEXT: [[CONV220:%.*]] = trunc i64 [[ADD219]] to i32 +// CHECK1-NEXT: store i32 [[CONV220]], ptr [[J40]], align 4 +// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK1-NEXT: [[TMP80:%.*]] = load i64, ptr [[DOTPERMUTED_0_IV___BEGIN438]], align 8 +// CHECK1-NEXT: [[MUL221:%.*]] = mul nsw i64 [[TMP80]], 1 +// CHECK1-NEXT: [[ADD_PTR222:%.*]] = getelementptr inbounds double, ptr [[TMP79]], i64 [[MUL221]] +// CHECK1-NEXT: store ptr [[ADD_PTR222]], ptr [[__BEGIN4]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[__BEGIN4]], align 8 +// CHECK1-NEXT: store ptr [[TMP81]], ptr [[BB]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load i64, ptr [[DOTPERMUTED_1_IV___BEGIN339]], align 8 +// CHECK1-NEXT: [[MUL223:%.*]] = mul nsw i64 [[TMP83]], 1 +// CHECK1-NEXT: [[ADD_PTR224:%.*]] = getelementptr inbounds double, ptr [[TMP82]], i64 [[MUL223]] +// CHECK1-NEXT: store ptr [[ADD_PTR224]], ptr [[__BEGIN3]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[__BEGIN3]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = load double, ptr [[TMP84]], align 8 +// CHECK1-NEXT: store double [[TMP85]], ptr [[AA]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[I37]], align 4 +// CHECK1-NEXT: [[TMP87:%.*]] = load double, ptr [[C]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load double, ptr [[AA]], align 8 +// CHECK1-NEXT: [[TMP89:%.*]] = load double, ptr [[D]], align 8 +// CHECK1-NEXT: [[TMP90:%.*]] = load ptr, ptr [[BB]], align 8 +// CHECK1-NEXT: [[TMP91:%.*]] = load double, ptr [[TMP90]], align 8 +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[J40]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP86]], double noundef [[TMP87]], double noundef [[TMP88]], double noundef [[TMP89]], double noundef [[TMP91]], i32 noundef [[TMP92]]) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP93:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD225:%.*]] = add nsw i64 [[TMP93]], 1 +// CHECK1-NEXT: store i64 [[ADD225]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@body +// CHECK2-SAME: (...) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo10 +// CHECK2-SAME: () #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A:%.*]] = alloca [128 x double], align 16 +// CHECK2-NEXT: [[B:%.*]] = alloca [16 x double], align 16 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca double, align 8 +// CHECK2-NEXT: [[__RANGE3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[D:%.*]] = alloca double, align 8 +// CHECK2-NEXT: [[__RANGE4:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END4:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN4:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_14:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_15:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_24:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_26:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_28:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPERMUTED_0_IV___BEGIN4:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPERMUTED_1_IV___BEGIN3:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I37:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPERMUTED_0_IV___BEGIN438:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPERMUTED_1_IV___BEGIN339:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[J40:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[BB:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[AA:%.*]] = alloca double, align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK2-NEXT: store double 4.200000e+01, ptr [[C]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[__RANGE3]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 128 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END3]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY4]], ptr [[__BEGIN3]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP3]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY5]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END3]], align 8 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK2-NEXT: store i64 [[SUB8]], ptr [[DOTCAPTURE_EXPR_7]], align 8 +// CHECK2-NEXT: store double 4.200000e+01, ptr [[D]], align 8 +// CHECK2-NEXT: store ptr [[B]], ptr [[__RANGE4]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[__RANGE4]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [16 x double], ptr [[TMP7]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY9]], i64 16 +// CHECK2-NEXT: store ptr [[ADD_PTR10]], ptr [[__END4]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE4]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY11:%.*]] = getelementptr inbounds [16 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY11]], ptr [[__BEGIN4]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE4]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY13:%.*]] = getelementptr inbounds [16 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY13]], ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__END4]], align 8 +// CHECK2-NEXT: store ptr [[TMP10]], ptr [[DOTCAPTURE_EXPR_14]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_14]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST16:%.*]] = ptrtoint ptr [[TMP11]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST17:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB18:%.*]] = sub i64 [[SUB_PTR_LHS_CAST16]], [[SUB_PTR_RHS_CAST17]] +// CHECK2-NEXT: [[SUB_PTR_DIV19:%.*]] = sdiv exact i64 [[SUB_PTR_SUB18]], 8 +// CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i64 [[SUB_PTR_DIV19]], 1 +// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[SUB20]], 1 +// CHECK2-NEXT: [[DIV22:%.*]] = sdiv i64 [[ADD21]], 1 +// CHECK2-NEXT: [[SUB23:%.*]] = sub nsw i64 [[DIV22]], 1 +// CHECK2-NEXT: store i64 [[SUB23]], ptr [[DOTCAPTURE_EXPR_15]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_15]], align 8 +// CHECK2-NEXT: [[ADD25:%.*]] = add nsw i64 [[TMP13]], 1 +// CHECK2-NEXT: store i64 [[ADD25]], ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_7]], align 8 +// CHECK2-NEXT: [[ADD27:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD27]], ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB29:%.*]] = sub nsw i64 [[TMP15]], 0 +// CHECK2-NEXT: [[DIV30:%.*]] = sdiv i64 [[SUB29]], 1 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 128, [[DIV30]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB31:%.*]] = sub nsw i64 [[TMP16]], 0 +// CHECK2-NEXT: [[DIV32:%.*]] = sdiv i64 [[SUB31]], 1 +// CHECK2-NEXT: [[MUL33:%.*]] = mul nsw i64 [[MUL]], [[DIV32]] +// CHECK2-NEXT: [[MUL34:%.*]] = mul nsw i64 [[MUL33]], 128 +// CHECK2-NEXT: [[SUB35:%.*]] = sub nsw i64 [[MUL34]], 1 +// CHECK2-NEXT: store i64 [[SUB35]], ptr [[DOTCAPTURE_EXPR_28]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: store i64 0, ptr [[DOTPERMUTED_0_IV___BEGIN4]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTPERMUTED_1_IV___BEGIN3]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: land.lhs.true: +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[CMP36:%.*]] = icmp slt i64 0, [[TMP18]] +// CHECK2-NEXT: br i1 [[CMP36]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_28]], align 8 +// CHECK2-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_28]], align 8 +// CHECK2-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_28]], align 8 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[CMP42:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB43:%.*]] = sub nsw i64 [[TMP28]], 0 +// CHECK2-NEXT: [[DIV44:%.*]] = sdiv i64 [[SUB43]], 1 +// CHECK2-NEXT: [[MUL45:%.*]] = mul nsw i64 1, [[DIV44]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB46:%.*]] = sub nsw i64 [[TMP29]], 0 +// CHECK2-NEXT: [[DIV47:%.*]] = sdiv i64 [[SUB46]], 1 +// CHECK2-NEXT: [[MUL48:%.*]] = mul nsw i64 [[MUL45]], [[DIV47]] +// CHECK2-NEXT: [[MUL49:%.*]] = mul nsw i64 [[MUL48]], 128 +// CHECK2-NEXT: [[DIV50:%.*]] = sdiv i64 [[TMP27]], [[MUL49]] +// CHECK2-NEXT: [[MUL51:%.*]] = mul nsw i64 [[DIV50]], 1 +// CHECK2-NEXT: [[ADD52:%.*]] = add nsw i64 0, [[MUL51]] +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[ADD52]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[I37]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB53:%.*]] = sub nsw i64 [[TMP32]], 0 +// CHECK2-NEXT: [[DIV54:%.*]] = sdiv i64 [[SUB53]], 1 +// CHECK2-NEXT: [[MUL55:%.*]] = mul nsw i64 1, [[DIV54]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB56:%.*]] = sub nsw i64 [[TMP33]], 0 +// CHECK2-NEXT: [[DIV57:%.*]] = sdiv i64 [[SUB56]], 1 +// CHECK2-NEXT: [[MUL58:%.*]] = mul nsw i64 [[MUL55]], [[DIV57]] +// CHECK2-NEXT: [[MUL59:%.*]] = mul nsw i64 [[MUL58]], 128 +// CHECK2-NEXT: [[DIV60:%.*]] = sdiv i64 [[TMP31]], [[MUL59]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB61:%.*]] = sub nsw i64 [[TMP34]], 0 +// CHECK2-NEXT: [[DIV62:%.*]] = sdiv i64 [[SUB61]], 1 +// CHECK2-NEXT: [[MUL63:%.*]] = mul nsw i64 1, [[DIV62]] +// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB64:%.*]] = sub nsw i64 [[TMP35]], 0 +// CHECK2-NEXT: [[DIV65:%.*]] = sdiv i64 [[SUB64]], 1 +// CHECK2-NEXT: [[MUL66:%.*]] = mul nsw i64 [[MUL63]], [[DIV65]] +// CHECK2-NEXT: [[MUL67:%.*]] = mul nsw i64 [[MUL66]], 128 +// CHECK2-NEXT: [[MUL68:%.*]] = mul nsw i64 [[DIV60]], [[MUL67]] +// CHECK2-NEXT: [[SUB69:%.*]] = sub nsw i64 [[TMP30]], [[MUL68]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB70:%.*]] = sub nsw i64 [[TMP36]], 0 +// CHECK2-NEXT: [[DIV71:%.*]] = sdiv i64 [[SUB70]], 1 +// CHECK2-NEXT: [[MUL72:%.*]] = mul nsw i64 1, [[DIV71]] +// CHECK2-NEXT: [[MUL73:%.*]] = mul nsw i64 [[MUL72]], 128 +// CHECK2-NEXT: [[DIV74:%.*]] = sdiv i64 [[SUB69]], [[MUL73]] +// CHECK2-NEXT: [[MUL75:%.*]] = mul nsw i64 [[DIV74]], 1 +// CHECK2-NEXT: [[ADD76:%.*]] = add nsw i64 0, [[MUL75]] +// CHECK2-NEXT: store i64 [[ADD76]], ptr [[DOTPERMUTED_0_IV___BEGIN438]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB77:%.*]] = sub nsw i64 [[TMP39]], 0 +// CHECK2-NEXT: [[DIV78:%.*]] = sdiv i64 [[SUB77]], 1 +// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 1, [[DIV78]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB80:%.*]] = sub nsw i64 [[TMP40]], 0 +// CHECK2-NEXT: [[DIV81:%.*]] = sdiv i64 [[SUB80]], 1 +// CHECK2-NEXT: [[MUL82:%.*]] = mul nsw i64 [[MUL79]], [[DIV81]] +// CHECK2-NEXT: [[MUL83:%.*]] = mul nsw i64 [[MUL82]], 128 +// CHECK2-NEXT: [[DIV84:%.*]] = sdiv i64 [[TMP38]], [[MUL83]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB85:%.*]] = sub nsw i64 [[TMP41]], 0 +// CHECK2-NEXT: [[DIV86:%.*]] = sdiv i64 [[SUB85]], 1 +// CHECK2-NEXT: [[MUL87:%.*]] = mul nsw i64 1, [[DIV86]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB88:%.*]] = sub nsw i64 [[TMP42]], 0 +// CHECK2-NEXT: [[DIV89:%.*]] = sdiv i64 [[SUB88]], 1 +// CHECK2-NEXT: [[MUL90:%.*]] = mul nsw i64 [[MUL87]], [[DIV89]] +// CHECK2-NEXT: [[MUL91:%.*]] = mul nsw i64 [[MUL90]], 128 +// CHECK2-NEXT: [[MUL92:%.*]] = mul nsw i64 [[DIV84]], [[MUL91]] +// CHECK2-NEXT: [[SUB93:%.*]] = sub nsw i64 [[TMP37]], [[MUL92]] +// CHECK2-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB94:%.*]] = sub nsw i64 [[TMP45]], 0 +// CHECK2-NEXT: [[DIV95:%.*]] = sdiv i64 [[SUB94]], 1 +// CHECK2-NEXT: [[MUL96:%.*]] = mul nsw i64 1, [[DIV95]] +// CHECK2-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB97:%.*]] = sub nsw i64 [[TMP46]], 0 +// CHECK2-NEXT: [[DIV98:%.*]] = sdiv i64 [[SUB97]], 1 +// CHECK2-NEXT: [[MUL99:%.*]] = mul nsw i64 [[MUL96]], [[DIV98]] +// CHECK2-NEXT: [[MUL100:%.*]] = mul nsw i64 [[MUL99]], 128 +// CHECK2-NEXT: [[DIV101:%.*]] = sdiv i64 [[TMP44]], [[MUL100]] +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB102:%.*]] = sub nsw i64 [[TMP47]], 0 +// CHECK2-NEXT: [[DIV103:%.*]] = sdiv i64 [[SUB102]], 1 +// CHECK2-NEXT: [[MUL104:%.*]] = mul nsw i64 1, [[DIV103]] +// CHECK2-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB105:%.*]] = sub nsw i64 [[TMP48]], 0 +// CHECK2-NEXT: [[DIV106:%.*]] = sdiv i64 [[SUB105]], 1 +// CHECK2-NEXT: [[MUL107:%.*]] = mul nsw i64 [[MUL104]], [[DIV106]] +// CHECK2-NEXT: [[MUL108:%.*]] = mul nsw i64 [[MUL107]], 128 +// CHECK2-NEXT: [[MUL109:%.*]] = mul nsw i64 [[DIV101]], [[MUL108]] +// CHECK2-NEXT: [[SUB110:%.*]] = sub nsw i64 [[TMP43]], [[MUL109]] +// CHECK2-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB111:%.*]] = sub nsw i64 [[TMP49]], 0 +// CHECK2-NEXT: [[DIV112:%.*]] = sdiv i64 [[SUB111]], 1 +// CHECK2-NEXT: [[MUL113:%.*]] = mul nsw i64 1, [[DIV112]] +// CHECK2-NEXT: [[MUL114:%.*]] = mul nsw i64 [[MUL113]], 128 +// CHECK2-NEXT: [[DIV115:%.*]] = sdiv i64 [[SUB110]], [[MUL114]] +// CHECK2-NEXT: [[TMP50:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB116:%.*]] = sub nsw i64 [[TMP50]], 0 +// CHECK2-NEXT: [[DIV117:%.*]] = sdiv i64 [[SUB116]], 1 +// CHECK2-NEXT: [[MUL118:%.*]] = mul nsw i64 1, [[DIV117]] +// CHECK2-NEXT: [[MUL119:%.*]] = mul nsw i64 [[MUL118]], 128 +// CHECK2-NEXT: [[MUL120:%.*]] = mul nsw i64 [[DIV115]], [[MUL119]] +// CHECK2-NEXT: [[SUB121:%.*]] = sub nsw i64 [[SUB93]], [[MUL120]] +// CHECK2-NEXT: [[DIV122:%.*]] = sdiv i64 [[SUB121]], 128 +// CHECK2-NEXT: [[MUL123:%.*]] = mul nsw i64 [[DIV122]], 1 +// CHECK2-NEXT: [[ADD124:%.*]] = add nsw i64 0, [[MUL123]] +// CHECK2-NEXT: store i64 [[ADD124]], ptr [[DOTPERMUTED_1_IV___BEGIN339]], align 8 +// CHECK2-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP52:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB125:%.*]] = sub nsw i64 [[TMP53]], 0 +// CHECK2-NEXT: [[DIV126:%.*]] = sdiv i64 [[SUB125]], 1 +// CHECK2-NEXT: [[MUL127:%.*]] = mul nsw i64 1, [[DIV126]] +// CHECK2-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB128:%.*]] = sub nsw i64 [[TMP54]], 0 +// CHECK2-NEXT: [[DIV129:%.*]] = sdiv i64 [[SUB128]], 1 +// CHECK2-NEXT: [[MUL130:%.*]] = mul nsw i64 [[MUL127]], [[DIV129]] +// CHECK2-NEXT: [[MUL131:%.*]] = mul nsw i64 [[MUL130]], 128 +// CHECK2-NEXT: [[DIV132:%.*]] = sdiv i64 [[TMP52]], [[MUL131]] +// CHECK2-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB133:%.*]] = sub nsw i64 [[TMP55]], 0 +// CHECK2-NEXT: [[DIV134:%.*]] = sdiv i64 [[SUB133]], 1 +// CHECK2-NEXT: [[MUL135:%.*]] = mul nsw i64 1, [[DIV134]] +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB136:%.*]] = sub nsw i64 [[TMP56]], 0 +// CHECK2-NEXT: [[DIV137:%.*]] = sdiv i64 [[SUB136]], 1 +// CHECK2-NEXT: [[MUL138:%.*]] = mul nsw i64 [[MUL135]], [[DIV137]] +// CHECK2-NEXT: [[MUL139:%.*]] = mul nsw i64 [[MUL138]], 128 +// CHECK2-NEXT: [[MUL140:%.*]] = mul nsw i64 [[DIV132]], [[MUL139]] +// CHECK2-NEXT: [[SUB141:%.*]] = sub nsw i64 [[TMP51]], [[MUL140]] +// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB142:%.*]] = sub nsw i64 [[TMP59]], 0 +// CHECK2-NEXT: [[DIV143:%.*]] = sdiv i64 [[SUB142]], 1 +// CHECK2-NEXT: [[MUL144:%.*]] = mul nsw i64 1, [[DIV143]] +// CHECK2-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB145:%.*]] = sub nsw i64 [[TMP60]], 0 +// CHECK2-NEXT: [[DIV146:%.*]] = sdiv i64 [[SUB145]], 1 +// CHECK2-NEXT: [[MUL147:%.*]] = mul nsw i64 [[MUL144]], [[DIV146]] +// CHECK2-NEXT: [[MUL148:%.*]] = mul nsw i64 [[MUL147]], 128 +// CHECK2-NEXT: [[DIV149:%.*]] = sdiv i64 [[TMP58]], [[MUL148]] +// CHECK2-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB150:%.*]] = sub nsw i64 [[TMP61]], 0 +// CHECK2-NEXT: [[DIV151:%.*]] = sdiv i64 [[SUB150]], 1 +// CHECK2-NEXT: [[MUL152:%.*]] = mul nsw i64 1, [[DIV151]] +// CHECK2-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB153:%.*]] = sub nsw i64 [[TMP62]], 0 +// CHECK2-NEXT: [[DIV154:%.*]] = sdiv i64 [[SUB153]], 1 +// CHECK2-NEXT: [[MUL155:%.*]] = mul nsw i64 [[MUL152]], [[DIV154]] +// CHECK2-NEXT: [[MUL156:%.*]] = mul nsw i64 [[MUL155]], 128 +// CHECK2-NEXT: [[MUL157:%.*]] = mul nsw i64 [[DIV149]], [[MUL156]] +// CHECK2-NEXT: [[SUB158:%.*]] = sub nsw i64 [[TMP57]], [[MUL157]] +// CHECK2-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB159:%.*]] = sub nsw i64 [[TMP63]], 0 +// CHECK2-NEXT: [[DIV160:%.*]] = sdiv i64 [[SUB159]], 1 +// CHECK2-NEXT: [[MUL161:%.*]] = mul nsw i64 1, [[DIV160]] +// CHECK2-NEXT: [[MUL162:%.*]] = mul nsw i64 [[MUL161]], 128 +// CHECK2-NEXT: [[DIV163:%.*]] = sdiv i64 [[SUB158]], [[MUL162]] +// CHECK2-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB164:%.*]] = sub nsw i64 [[TMP64]], 0 +// CHECK2-NEXT: [[DIV165:%.*]] = sdiv i64 [[SUB164]], 1 +// CHECK2-NEXT: [[MUL166:%.*]] = mul nsw i64 1, [[DIV165]] +// CHECK2-NEXT: [[MUL167:%.*]] = mul nsw i64 [[MUL166]], 128 +// CHECK2-NEXT: [[MUL168:%.*]] = mul nsw i64 [[DIV163]], [[MUL167]] +// CHECK2-NEXT: [[SUB169:%.*]] = sub nsw i64 [[SUB141]], [[MUL168]] +// CHECK2-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB170:%.*]] = sub nsw i64 [[TMP67]], 0 +// CHECK2-NEXT: [[DIV171:%.*]] = sdiv i64 [[SUB170]], 1 +// CHECK2-NEXT: [[MUL172:%.*]] = mul nsw i64 1, [[DIV171]] +// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB173:%.*]] = sub nsw i64 [[TMP68]], 0 +// CHECK2-NEXT: [[DIV174:%.*]] = sdiv i64 [[SUB173]], 1 +// CHECK2-NEXT: [[MUL175:%.*]] = mul nsw i64 [[MUL172]], [[DIV174]] +// CHECK2-NEXT: [[MUL176:%.*]] = mul nsw i64 [[MUL175]], 128 +// CHECK2-NEXT: [[DIV177:%.*]] = sdiv i64 [[TMP66]], [[MUL176]] +// CHECK2-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB178:%.*]] = sub nsw i64 [[TMP69]], 0 +// CHECK2-NEXT: [[DIV179:%.*]] = sdiv i64 [[SUB178]], 1 +// CHECK2-NEXT: [[MUL180:%.*]] = mul nsw i64 1, [[DIV179]] +// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB181:%.*]] = sub nsw i64 [[TMP70]], 0 +// CHECK2-NEXT: [[DIV182:%.*]] = sdiv i64 [[SUB181]], 1 +// CHECK2-NEXT: [[MUL183:%.*]] = mul nsw i64 [[MUL180]], [[DIV182]] +// CHECK2-NEXT: [[MUL184:%.*]] = mul nsw i64 [[MUL183]], 128 +// CHECK2-NEXT: [[MUL185:%.*]] = mul nsw i64 [[DIV177]], [[MUL184]] +// CHECK2-NEXT: [[SUB186:%.*]] = sub nsw i64 [[TMP65]], [[MUL185]] +// CHECK2-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP72:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB187:%.*]] = sub nsw i64 [[TMP73]], 0 +// CHECK2-NEXT: [[DIV188:%.*]] = sdiv i64 [[SUB187]], 1 +// CHECK2-NEXT: [[MUL189:%.*]] = mul nsw i64 1, [[DIV188]] +// CHECK2-NEXT: [[TMP74:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB190:%.*]] = sub nsw i64 [[TMP74]], 0 +// CHECK2-NEXT: [[DIV191:%.*]] = sdiv i64 [[SUB190]], 1 +// CHECK2-NEXT: [[MUL192:%.*]] = mul nsw i64 [[MUL189]], [[DIV191]] +// CHECK2-NEXT: [[MUL193:%.*]] = mul nsw i64 [[MUL192]], 128 +// CHECK2-NEXT: [[DIV194:%.*]] = sdiv i64 [[TMP72]], [[MUL193]] +// CHECK2-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_24]], align 8 +// CHECK2-NEXT: [[SUB195:%.*]] = sub nsw i64 [[TMP75]], 0 +// CHECK2-NEXT: [[DIV196:%.*]] = sdiv i64 [[SUB195]], 1 +// CHECK2-NEXT: [[MUL197:%.*]] = mul nsw i64 1, [[DIV196]] +// CHECK2-NEXT: [[TMP76:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB198:%.*]] = sub nsw i64 [[TMP76]], 0 +// CHECK2-NEXT: [[DIV199:%.*]] = sdiv i64 [[SUB198]], 1 +// CHECK2-NEXT: [[MUL200:%.*]] = mul nsw i64 [[MUL197]], [[DIV199]] +// CHECK2-NEXT: [[MUL201:%.*]] = mul nsw i64 [[MUL200]], 128 +// CHECK2-NEXT: [[MUL202:%.*]] = mul nsw i64 [[DIV194]], [[MUL201]] +// CHECK2-NEXT: [[SUB203:%.*]] = sub nsw i64 [[TMP71]], [[MUL202]] +// CHECK2-NEXT: [[TMP77:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB204:%.*]] = sub nsw i64 [[TMP77]], 0 +// CHECK2-NEXT: [[DIV205:%.*]] = sdiv i64 [[SUB204]], 1 +// CHECK2-NEXT: [[MUL206:%.*]] = mul nsw i64 1, [[DIV205]] +// CHECK2-NEXT: [[MUL207:%.*]] = mul nsw i64 [[MUL206]], 128 +// CHECK2-NEXT: [[DIV208:%.*]] = sdiv i64 [[SUB203]], [[MUL207]] +// CHECK2-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_26]], align 8 +// CHECK2-NEXT: [[SUB209:%.*]] = sub nsw i64 [[TMP78]], 0 +// CHECK2-NEXT: [[DIV210:%.*]] = sdiv i64 [[SUB209]], 1 +// CHECK2-NEXT: [[MUL211:%.*]] = mul nsw i64 1, [[DIV210]] +// CHECK2-NEXT: [[MUL212:%.*]] = mul nsw i64 [[MUL211]], 128 +// CHECK2-NEXT: [[MUL213:%.*]] = mul nsw i64 [[DIV208]], [[MUL212]] +// CHECK2-NEXT: [[SUB214:%.*]] = sub nsw i64 [[SUB186]], [[MUL213]] +// CHECK2-NEXT: [[DIV215:%.*]] = sdiv i64 [[SUB214]], 128 +// CHECK2-NEXT: [[MUL216:%.*]] = mul nsw i64 [[DIV215]], 128 +// CHECK2-NEXT: [[SUB217:%.*]] = sub nsw i64 [[SUB169]], [[MUL216]] +// CHECK2-NEXT: [[MUL218:%.*]] = mul nsw i64 [[SUB217]], 1 +// CHECK2-NEXT: [[ADD219:%.*]] = add nsw i64 0, [[MUL218]] +// CHECK2-NEXT: [[CONV220:%.*]] = trunc i64 [[ADD219]] to i32 +// CHECK2-NEXT: store i32 [[CONV220]], ptr [[J40]], align 4 +// CHECK2-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK2-NEXT: [[TMP80:%.*]] = load i64, ptr [[DOTPERMUTED_0_IV___BEGIN438]], align 8 +// CHECK2-NEXT: [[MUL221:%.*]] = mul nsw i64 [[TMP80]], 1 +// CHECK2-NEXT: [[ADD_PTR222:%.*]] = getelementptr inbounds double, ptr [[TMP79]], i64 [[MUL221]] +// CHECK2-NEXT: store ptr [[ADD_PTR222]], ptr [[__BEGIN4]], align 8 +// CHECK2-NEXT: [[TMP81:%.*]] = load ptr, ptr [[__BEGIN4]], align 8 +// CHECK2-NEXT: store ptr [[TMP81]], ptr [[BB]], align 8 +// CHECK2-NEXT: [[TMP82:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP83:%.*]] = load i64, ptr [[DOTPERMUTED_1_IV___BEGIN339]], align 8 +// CHECK2-NEXT: [[MUL223:%.*]] = mul nsw i64 [[TMP83]], 1 +// CHECK2-NEXT: [[ADD_PTR224:%.*]] = getelementptr inbounds double, ptr [[TMP82]], i64 [[MUL223]] +// CHECK2-NEXT: store ptr [[ADD_PTR224]], ptr [[__BEGIN3]], align 8 +// CHECK2-NEXT: [[TMP84:%.*]] = load ptr, ptr [[__BEGIN3]], align 8 +// CHECK2-NEXT: [[TMP85:%.*]] = load double, ptr [[TMP84]], align 8 +// CHECK2-NEXT: store double [[TMP85]], ptr [[AA]], align 8 +// CHECK2-NEXT: [[TMP86:%.*]] = load i32, ptr [[I37]], align 4 +// CHECK2-NEXT: [[TMP87:%.*]] = load double, ptr [[C]], align 8 +// CHECK2-NEXT: [[TMP88:%.*]] = load double, ptr [[AA]], align 8 +// CHECK2-NEXT: [[TMP89:%.*]] = load double, ptr [[D]], align 8 +// CHECK2-NEXT: [[TMP90:%.*]] = load ptr, ptr [[BB]], align 8 +// CHECK2-NEXT: [[TMP91:%.*]] = load double, ptr [[TMP90]], align 8 +// CHECK2-NEXT: [[TMP92:%.*]] = load i32, ptr [[J40]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP86]], double noundef [[TMP87]], double noundef [[TMP88]], double noundef [[TMP89]], double noundef [[TMP91]], i32 noundef [[TMP92]]) +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP93:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[ADD225:%.*]] = add nsw i64 [[TMP93]], 1 +// CHECK2-NEXT: store i64 [[ADD225]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo2 +// CHECK2-SAME: (i32 noundef [[START1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPERMUTED_0_IV_J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPERMUTED_1_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTNEW_STEP7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK2-NEXT: [[SUB9:%.*]] = sub i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[SUB9]], 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTNEW_STEP7]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[SUB10]], [[TMP14]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP7]], align 4 +// CHECK2-NEXT: [[DIV12:%.*]] = udiv i32 [[ADD11]], [[TMP15]] +// CHECK2-NEXT: [[SUB13:%.*]] = sub i32 [[DIV12]], 1 +// CHECK2-NEXT: store i32 [[SUB13]], ptr [[DOTCAPTURE_EXPR_8]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_8]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add i32 [[TMP17]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP16]], [[ADD14]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END24:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTNEW_STEP7]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND16:%.*]] +// CHECK2: for.cond16: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD17:%.*]] = add i32 [[TMP22]], 1 +// CHECK2-NEXT: [[CMP18:%.*]] = icmp ult i32 [[TMP21]], [[ADD17]] +// CHECK2-NEXT: br i1 [[CMP18]], label [[FOR_BODY19:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body19: +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL20:%.*]] = mul i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[ADD21:%.*]] = add i32 [[TMP23]], [[MUL20]] +// CHECK2-NEXT: store i32 [[ADD21]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP26]], i32 noundef [[TMP27]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP28]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND16]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: br label [[FOR_INC22:%.*]] +// CHECK2: for.inc22: +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK2-NEXT: [[INC23:%.*]] = add i32 [[TMP29]], 1 +// CHECK2-NEXT: store i32 [[INC23]], ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2: for.end24: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo3 +// CHECK2-SAME: () #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPERMUTED_0_IV_J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPERMUTED_1_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 3 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK2-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP7]], 3 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 7, [[MUL2]] +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP8]], 4 +// CHECK2-NEXT: br i1 [[CMP4]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 7, [[MUL5]] +// CHECK2-NEXT: store i32 [[ADD6]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP10]], i32 noundef [[TMP11]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo4 +// CHECK2-SAME: () #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPERMUTED_0_IV_J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPERMUTED_1_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 15, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 15 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 15, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP8]], 4 +// CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL4]] +// CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4 +// CHECK2-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 7, [[MUL7]] +// CHECK2-NEXT: store i32 [[ADD8]], ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP10]], 4 +// CHECK2-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP11]], 3 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 7, [[MUL10]] +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP12]], i32 noundef [[TMP13]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_1_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo6 +// CHECK2-SAME: () #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPERMUTED_0_IV_K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPERMUTED_1_IV_J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 7, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 255, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 255 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 255, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 64 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP8]], 64 +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 64 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL6]] +// CHECK2-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB]], 16 +// CHECK2-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTPERMUTED_0_IV_K]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV10:%.*]] = sdiv i32 [[TMP10]], 64 +// CHECK2-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 64 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP9]], [[MUL11]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = sdiv i32 [[TMP12]], 64 +// CHECK2-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 64 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP11]], [[MUL14]] +// CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 16 +// CHECK2-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 16 +// CHECK2-NEXT: [[SUB18:%.*]] = sub nsw i32 [[SUB12]], [[MUL17]] +// CHECK2-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 4 +// CHECK2-NEXT: [[MUL20:%.*]] = mul nsw i32 [[DIV19]], 1 +// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i32 0, [[MUL20]] +// CHECK2-NEXT: store i32 [[ADD21]], ptr [[DOTPERMUTED_1_IV_J]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV22:%.*]] = sdiv i32 [[TMP14]], 64 +// CHECK2-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 64 +// CHECK2-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP13]], [[MUL23]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV25:%.*]] = sdiv i32 [[TMP16]], 64 +// CHECK2-NEXT: [[MUL26:%.*]] = mul nsw i32 [[DIV25]], 64 +// CHECK2-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP15]], [[MUL26]] +// CHECK2-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 16 +// CHECK2-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 16 +// CHECK2-NEXT: [[SUB30:%.*]] = sub nsw i32 [[SUB24]], [[MUL29]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV31:%.*]] = sdiv i32 [[TMP18]], 64 +// CHECK2-NEXT: [[MUL32:%.*]] = mul nsw i32 [[DIV31]], 64 +// CHECK2-NEXT: [[SUB33:%.*]] = sub nsw i32 [[TMP17]], [[MUL32]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV34:%.*]] = sdiv i32 [[TMP20]], 64 +// CHECK2-NEXT: [[MUL35:%.*]] = mul nsw i32 [[DIV34]], 64 +// CHECK2-NEXT: [[SUB36:%.*]] = sub nsw i32 [[TMP19]], [[MUL35]] +// CHECK2-NEXT: [[DIV37:%.*]] = sdiv i32 [[SUB36]], 16 +// CHECK2-NEXT: [[MUL38:%.*]] = mul nsw i32 [[DIV37]], 16 +// CHECK2-NEXT: [[SUB39:%.*]] = sub nsw i32 [[SUB33]], [[MUL38]] +// CHECK2-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 4 +// CHECK2-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 4 +// CHECK2-NEXT: [[SUB42:%.*]] = sub nsw i32 [[SUB30]], [[MUL41]] +// CHECK2-NEXT: [[MUL43:%.*]] = mul nsw i32 [[SUB42]], 3 +// CHECK2-NEXT: [[ADD44:%.*]] = add nsw i32 7, [[MUL43]] +// CHECK2-NEXT: store i32 [[ADD44]], ptr [[L]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_K]], align 4 +// CHECK2-NEXT: [[MUL45:%.*]] = mul nsw i32 [[TMP21]], 3 +// CHECK2-NEXT: [[ADD46:%.*]] = add nsw i32 7, [[MUL45]] +// CHECK2-NEXT: store i32 [[ADD46]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_J]], align 4 +// CHECK2-NEXT: [[MUL47:%.*]] = mul nsw i32 [[TMP22]], 3 +// CHECK2-NEXT: [[ADD48:%.*]] = add nsw i32 7, [[MUL47]] +// CHECK2-NEXT: store i32 [[ADD48]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[L]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP23]], i32 noundef [[TMP24]], i32 noundef [[TMP25]], i32 noundef [[TMP26]]) +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo9 +// CHECK2-SAME: () #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[ARR:%.*]] = alloca [128 x double], align 16 +// CHECK2-NEXT: [[C:%.*]] = alloca double, align 8 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPERMUTED_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPERMUTED_1_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store double 4.200000e+01, ptr [[C]], align 8 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP0]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 128 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY1]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK2-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTPERMUTED_0_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_I]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], 21 +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END15:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_I]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 2 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD6]], ptr [[I]], align 4 +// CHECK2-NEXT: store i64 0, ptr [[DOTPERMUTED_1_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND7:%.*]] +// CHECK2: for.cond7: +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPERMUTED_1_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP9]], 1 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i64 [[TMP8]], [[ADD8]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[FOR_BODY10:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body10: +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPERMUTED_1_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[MUL11:%.*]] = mul nsw i64 [[TMP11]], 1 +// CHECK2-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i64 [[MUL11]] +// CHECK2-NEXT: store ptr [[ADD_PTR12]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP12]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load double, ptr [[C]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP14]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(double noundef [[TMP13]], double noundef [[TMP15]], i32 noundef [[TMP16]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPERMUTED_1_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP17]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTPERMUTED_1_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: br label [[FOR_INC13:%.*]] +// CHECK2: for.inc13: +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_I]], align 4 +// CHECK2-NEXT: [[INC14:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: store i32 [[INC14]], ptr [[DOTPERMUTED_0_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2: for.end15: +// CHECK2-NEXT: ret void +// diff --git a/clang/test/OpenMP/interchange_messages.cpp b/clang/test/OpenMP/interchange_messages.cpp new file mode 100644 index 0000000000000..175c2f1efa744 --- /dev/null +++ b/clang/test/OpenMP/interchange_messages.cpp @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++17 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s + +void func() { + + // expected-warning@+1 {{extra tokens at the end of '#pragma omp interchange' are ignored}} + #pragma omp interchange foo + for (int i = 0; i < 7; ++i) + for (int j = 0; j < 13; ++j) + ; + + // expected-error@+1 {{unexpected OpenMP clause 'collapse' in directive '#pragma omp interchange'}} + #pragma omp interchange collapse(2) + for (int i = 0; i < 7; ++i) + for (int j = 0; j < 13; ++j) + ; + + { + // expected-error@+2 {{expected statement}} + #pragma omp interchange + } + + // expected-error@+2 {{statement after '#pragma omp interchange' must be a for loop}} + #pragma omp interchange + int b = 0; + + // expected-error@+3 {{statement after '#pragma omp interchange' must be a for loop}} + #pragma omp interchange + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+2 {{statement after '#pragma omp interchange' must be a for loop}} + #pragma omp interchange + for (int i = 0; i < 7; ++i) { + int k = 3; + for (int j = 0; j < 7; ++j) + ; + } + + // expected-error@+3 {{expected loop invariant expression}} + #pragma omp interchange + for (int i = 0; i < 7; ++i) + for (int j = i; j < 7; ++j) + ; + + // expected-error@+3 {{expected loop invariant expression}} + #pragma omp interchange + for (int i = 0; i < 7; ++i) + for (int j = 0; j < i; ++j) + ; + + // expected-error@+3 {{expected loop invariant expression}} + #pragma omp interchange + for (int i = 0; i < 7; ++i) + for (int j = 0; j < i; ++j) + ; + + // expected-error@+6 {{expected 3 for loops after '#pragma omp for', but found only 2}} + // expected-note@+1 {{as specified in 'collapse' clause}} + #pragma omp for collapse(3) + #pragma omp interchange + for (int i = 0; i < 7; ++i) + for (int j = 0; j < 13; ++j) + ; + + // expected-error@+2 {{statement after '#pragma omp interchange' must be a for loop}} + #pragma omp interchange + #pragma omp for + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+3 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'j'}} + #pragma omp interchange + for (int i = 0; i < 7; ++i) + for (int j = 0; j/3<7; ++j) + ; +} + diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 51784ed02b111..a6a002533936f 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2180,6 +2180,7 @@ class EnqueueVisitor : public ConstStmtVisitor, void VisitOMPTileDirective(const OMPTileDirective *D); void VisitOMPUnrollDirective(const OMPUnrollDirective *D); void VisitOMPReverseDirective(const OMPReverseDirective *D); + void VisitOMPInterchangeDirective(const OMPInterchangeDirective *D); void VisitOMPForDirective(const OMPForDirective *D); void VisitOMPForSimdDirective(const OMPForSimdDirective *D); void VisitOMPSectionsDirective(const OMPSectionsDirective *D); @@ -3226,6 +3227,11 @@ void EnqueueVisitor::VisitOMPReverseDirective(const OMPReverseDirective *D) { VisitOMPLoopTransformationDirective(D); } +void EnqueueVisitor::VisitOMPInterchangeDirective( + const OMPInterchangeDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) { VisitOMPLoopDirective(D); } @@ -6086,6 +6092,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPUnrollDirective"); case CXCursor_OMPReverseDirective: return cxstring::createRef("OMPReverseDirective"); + case CXCursor_OMPInterchangeDirective: + return cxstring::createRef("OMPInterchangeDirective"); case CXCursor_OMPForDirective: return cxstring::createRef("OMPForDirective"); case CXCursor_OMPForSimdDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 275714d8fbcdd..9d485ce85b829 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -675,6 +675,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPReverseDirectiveClass: K = CXCursor_OMPReverseDirective; break; + case Stmt::OMPInterchangeDirectiveClass: + K = CXCursor_OMPTileDirective; + break; case Stmt::OMPForDirectiveClass: K = CXCursor_OMPForDirective; break; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 0ad3d919103fe..3395f82ce1ba2 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -540,6 +540,9 @@ def OMP_Unroll : Directive<"unroll"> { def OMP_Reverse : Directive<"reverse"> { let association = AS_Loop; } +def OMP_Interchange : Directive<"interchange"> { + let association = AS_Loop; +} def OMP_For : Directive<"for"> { let allowedClauses = [ VersionedClause, diff --git a/openmp/runtime/test/transform/interchange/foreach.cpp b/openmp/runtime/test/transform/interchange/foreach.cpp new file mode 100644 index 0000000000000..17eaed6029d01 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/foreach.cpp @@ -0,0 +1,216 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp interchange + for (Reporter c{"C"}; auto &&v : Reporter("A")) + for (Reporter d{"D"}; auto &&w : Reporter("B")) + printf("v=%d w=%d\n", v, w); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [C] ctor +// CHECK-NEXT: [A] ctor +// CHECK-NEXT: [A] end() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] iterator distance: 3 +// CHECK-NEXT: [D] ctor +// CHECK-NEXT: [B] ctor +// CHECK-NEXT: [B] end() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] iterator distance: 3 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: v=0 w=0 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: v=1 w=0 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: v=2 w=0 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: v=0 w=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: v=1 w=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: v=2 w=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: v=0 w=2 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: v=1 w=2 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: v=2 w=2 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] dtor +// CHECK-NEXT: [D] dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] dtor +// CHECK-NEXT: [C] dtor +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/intfor.c b/openmp/runtime/test/transform/interchange/intfor.c new file mode 100644 index 0000000000000..b4842f0f82913 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/intfor.c @@ -0,0 +1,38 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp interchange + for (int i = 7; i < 17; i += 3) + for (int j = 8; j < 18; j += 3) + printf("i=%d j=%d\n", i, j); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=7 j=8 +// CHECK-NEXT: i=10 j=8 +// CHECK-NEXT: i=13 j=8 +// CHECK-NEXT: i=16 j=8 +// CHECK-NEXT: i=7 j=11 +// CHECK-NEXT: i=10 j=11 +// CHECK-NEXT: i=13 j=11 +// CHECK-NEXT: i=16 j=11 +// CHECK-NEXT: i=7 j=14 +// CHECK-NEXT: i=10 j=14 +// CHECK-NEXT: i=13 j=14 +// CHECK-NEXT: i=16 j=14 +// CHECK-NEXT: i=7 j=17 +// CHECK-NEXT: i=10 j=17 +// CHECK-NEXT: i=13 j=17 +// CHECK-NEXT: i=16 j=17 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/iterfor.cpp b/openmp/runtime/test/transform/interchange/iterfor.cpp new file mode 100644 index 0000000000000..51219a07402e3 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/iterfor.cpp @@ -0,0 +1,222 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + bool operator!=(const Iterator &that) const { + owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); + Reporter A("A"), B("B"); +#pragma omp interchange + for (auto it = A.begin(); it != A.end(); ++it) + for (auto jt = B.begin(); jt != B.end(); ++jt) + printf("i=%d j=%d\n", *it, *jt); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [A] ctor +// CHECK-NEXT: [B] ctor +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] end() +// CHECK-NEXT: [A] iterator distance: 3 +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] end() +// CHECK-NEXT: [B] iterator distance: 3 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=0 j=0 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=1 j=0 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: i=2 j=0 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=0 j=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=1 j=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: i=2 j=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=0 j=2 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=1 j=2 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: i=2 j=2 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: done +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] dtor +// CHECK-NEXT: [A] dtor diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-foreach.cpp new file mode 100644 index 0000000000000..54399d25a3f15 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-foreach.cpp @@ -0,0 +1,340 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(3) num_threads(1) + for (int i = 0; i < 2; ++i) +#pragma omp interchange + for (Reporter c{"C"}; auto &&v : Reporter("A")) + for (Reporter d{"D"}; auto &&w : Reporter("B")) + for (int k = 0; k < 2; ++k) + printf("i=%d v=%d w=%d k=%d\n", i, v, w, k); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [C] ctor +// CHECK-NEXT: [A] ctor +// CHECK-NEXT: [A] end() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] iterator distance: 3 +// CHECK-NEXT: [D] ctor +// CHECK-NEXT: [B] ctor +// CHECK-NEXT: [B] end() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] iterator distance: 3 +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: i=0 v=0 w=0 k=0 +// CHECK-NEXT: i=0 v=0 w=0 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: i=0 v=1 w=0 k=0 +// CHECK-NEXT: i=0 v=1 w=0 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: i=0 v=2 w=0 k=0 +// CHECK-NEXT: i=0 v=2 w=0 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: i=0 v=0 w=1 k=0 +// CHECK-NEXT: i=0 v=0 w=1 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: i=0 v=1 w=1 k=0 +// CHECK-NEXT: i=0 v=1 w=1 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: i=0 v=2 w=1 k=0 +// CHECK-NEXT: i=0 v=2 w=1 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: i=0 v=0 w=2 k=0 +// CHECK-NEXT: i=0 v=0 w=2 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: i=0 v=1 w=2 k=0 +// CHECK-NEXT: i=0 v=1 w=2 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: i=0 v=2 w=2 k=0 +// CHECK-NEXT: i=0 v=2 w=2 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: i=1 v=0 w=0 k=0 +// CHECK-NEXT: i=1 v=0 w=0 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: i=1 v=1 w=0 k=0 +// CHECK-NEXT: i=1 v=1 w=0 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: i=1 v=2 w=0 k=0 +// CHECK-NEXT: i=1 v=2 w=0 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: i=1 v=0 w=1 k=0 +// CHECK-NEXT: i=1 v=0 w=1 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: i=1 v=1 w=1 k=0 +// CHECK-NEXT: i=1 v=1 w=1 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: i=1 v=2 w=1 k=0 +// CHECK-NEXT: i=1 v=2 w=1 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: i=1 v=0 w=2 k=0 +// CHECK-NEXT: i=1 v=0 w=2 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: i=1 v=1 w=2 k=0 +// CHECK-NEXT: i=1 v=1 w=2 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: i=1 v=2 w=2 k=0 +// CHECK-NEXT: i=1 v=2 w=2 k=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] dtor +// CHECK-NEXT: [D] dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] dtor +// CHECK-NEXT: [C] dtor +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intfor.cpp b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intfor.cpp new file mode 100644 index 0000000000000..a4520afdb0f9c --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intfor.cpp @@ -0,0 +1,106 @@ +// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(4) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp interchange + for (int j = 0; j < 3; ++j) + for (int k = 0; k < 3; ++k) + for (int l = 0; l < 3; ++l) + printf("i=%d j=%d k=%d l=%d\n", i, j, k, l); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=0 j=0 k=0 l=0 +// CHECK-NEXT: i=0 j=0 k=0 l=1 +// CHECK-NEXT: i=0 j=0 k=0 l=2 +// CHECK-NEXT: i=0 j=1 k=0 l=0 +// CHECK-NEXT: i=0 j=1 k=0 l=1 +// CHECK-NEXT: i=0 j=1 k=0 l=2 +// CHECK-NEXT: i=0 j=2 k=0 l=0 +// CHECK-NEXT: i=0 j=2 k=0 l=1 +// CHECK-NEXT: i=0 j=2 k=0 l=2 +// CHECK-NEXT: i=0 j=0 k=1 l=0 +// CHECK-NEXT: i=0 j=0 k=1 l=1 +// CHECK-NEXT: i=0 j=0 k=1 l=2 +// CHECK-NEXT: i=0 j=1 k=1 l=0 +// CHECK-NEXT: i=0 j=1 k=1 l=1 +// CHECK-NEXT: i=0 j=1 k=1 l=2 +// CHECK-NEXT: i=0 j=2 k=1 l=0 +// CHECK-NEXT: i=0 j=2 k=1 l=1 +// CHECK-NEXT: i=0 j=2 k=1 l=2 +// CHECK-NEXT: i=0 j=0 k=2 l=0 +// CHECK-NEXT: i=0 j=0 k=2 l=1 +// CHECK-NEXT: i=0 j=0 k=2 l=2 +// CHECK-NEXT: i=0 j=1 k=2 l=0 +// CHECK-NEXT: i=0 j=1 k=2 l=1 +// CHECK-NEXT: i=0 j=1 k=2 l=2 +// CHECK-NEXT: i=0 j=2 k=2 l=0 +// CHECK-NEXT: i=0 j=2 k=2 l=1 +// CHECK-NEXT: i=0 j=2 k=2 l=2 +// CHECK-NEXT: i=1 j=0 k=0 l=0 +// CHECK-NEXT: i=1 j=0 k=0 l=1 +// CHECK-NEXT: i=1 j=0 k=0 l=2 +// CHECK-NEXT: i=1 j=1 k=0 l=0 +// CHECK-NEXT: i=1 j=1 k=0 l=1 +// CHECK-NEXT: i=1 j=1 k=0 l=2 +// CHECK-NEXT: i=1 j=2 k=0 l=0 +// CHECK-NEXT: i=1 j=2 k=0 l=1 +// CHECK-NEXT: i=1 j=2 k=0 l=2 +// CHECK-NEXT: i=1 j=0 k=1 l=0 +// CHECK-NEXT: i=1 j=0 k=1 l=1 +// CHECK-NEXT: i=1 j=0 k=1 l=2 +// CHECK-NEXT: i=1 j=1 k=1 l=0 +// CHECK-NEXT: i=1 j=1 k=1 l=1 +// CHECK-NEXT: i=1 j=1 k=1 l=2 +// CHECK-NEXT: i=1 j=2 k=1 l=0 +// CHECK-NEXT: i=1 j=2 k=1 l=1 +// CHECK-NEXT: i=1 j=2 k=1 l=2 +// CHECK-NEXT: i=1 j=0 k=2 l=0 +// CHECK-NEXT: i=1 j=0 k=2 l=1 +// CHECK-NEXT: i=1 j=0 k=2 l=2 +// CHECK-NEXT: i=1 j=1 k=2 l=0 +// CHECK-NEXT: i=1 j=1 k=2 l=1 +// CHECK-NEXT: i=1 j=1 k=2 l=2 +// CHECK-NEXT: i=1 j=2 k=2 l=0 +// CHECK-NEXT: i=1 j=2 k=2 l=1 +// CHECK-NEXT: i=1 j=2 k=2 l=2 +// CHECK-NEXT: i=2 j=0 k=0 l=0 +// CHECK-NEXT: i=2 j=0 k=0 l=1 +// CHECK-NEXT: i=2 j=0 k=0 l=2 +// CHECK-NEXT: i=2 j=1 k=0 l=0 +// CHECK-NEXT: i=2 j=1 k=0 l=1 +// CHECK-NEXT: i=2 j=1 k=0 l=2 +// CHECK-NEXT: i=2 j=2 k=0 l=0 +// CHECK-NEXT: i=2 j=2 k=0 l=1 +// CHECK-NEXT: i=2 j=2 k=0 l=2 +// CHECK-NEXT: i=2 j=0 k=1 l=0 +// CHECK-NEXT: i=2 j=0 k=1 l=1 +// CHECK-NEXT: i=2 j=0 k=1 l=2 +// CHECK-NEXT: i=2 j=1 k=1 l=0 +// CHECK-NEXT: i=2 j=1 k=1 l=1 +// CHECK-NEXT: i=2 j=1 k=1 l=2 +// CHECK-NEXT: i=2 j=2 k=1 l=0 +// CHECK-NEXT: i=2 j=2 k=1 l=1 +// CHECK-NEXT: i=2 j=2 k=1 l=2 +// CHECK-NEXT: i=2 j=0 k=2 l=0 +// CHECK-NEXT: i=2 j=0 k=2 l=1 +// CHECK-NEXT: i=2 j=0 k=2 l=2 +// CHECK-NEXT: i=2 j=1 k=2 l=0 +// CHECK-NEXT: i=2 j=1 k=2 l=1 +// CHECK-NEXT: i=2 j=1 k=2 l=2 +// CHECK-NEXT: i=2 j=2 k=2 l=0 +// CHECK-NEXT: i=2 j=2 k=2 l=1 +// CHECK-NEXT: i=2 j=2 k=2 l=2 +// CHECK-NEXT: done From 6c88e4396f51c609b65a6e682232a28439999218 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 24 May 2024 23:12:55 +0200 Subject: [PATCH 07/12] Address review comment --- clang/include/clang/AST/StmtOpenMP.h | 3 +-- clang/lib/AST/StmtOpenMP.cpp | 7 +++---- clang/lib/Serialization/ASTReaderStmt.cpp | 5 +++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index fb7f413162fad..e41a9e52b7674 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -5760,8 +5760,7 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective { /// /// \param C Context of the AST. /// \param NumClauses Number of clauses to allocate. - static OMPReverseDirective *CreateEmpty(const ASTContext &C, - unsigned NumClauses); + static OMPReverseDirective *CreateEmpty(const ASTContext &C); /// Gets/sets the associated loops after the transformation, i.e. after /// de-sugaring. diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 83b8a08e9af73..608ca14e446e5 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -460,11 +460,10 @@ OMPReverseDirective::Create(const ASTContext &C, SourceLocation StartLoc, return Dir; } -OMPReverseDirective *OMPReverseDirective::CreateEmpty(const ASTContext &C, - unsigned NumClauses) { +OMPReverseDirective *OMPReverseDirective::CreateEmpty(const ASTContext &C) { return createEmptyDirective( - C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, - SourceLocation(), SourceLocation()); + C, /*NumClauses=*/0, /*HasAssociatedStmt=*/true, + TransformedStmtOffset + 1, SourceLocation(), SourceLocation()); } OMPForSimdDirective * diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index ff72679f993e2..0834f79f940d1 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -3443,8 +3443,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { case STMT_OMP_REVERSE_DIRECTIVE: { assert(Record[ASTStmtReader::NumStmtFields] == 1 && "Reverse directive accepts only a single loop"); - unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; - S = OMPReverseDirective::CreateEmpty(Context, NumClauses); + assert(Record[ASTStmtReader::NumStmtFields + 1] == 0 && + "Reverse directive has no clauses"); + S = OMPReverseDirective::CreateEmpty(Context); break; } From 7ef1d9e939089f9e0cc1720e710bff13e1e98f5d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 24 May 2024 23:27:24 +0200 Subject: [PATCH 08/12] Address review comment --- clang/lib/Sema/SemaOpenMP.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index ef141003b7d61..9f076f56f1062 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -16000,8 +16000,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( PreInits); } - SmallVector PermutedIndVars; - PermutedIndVars.resize(NumLoops); + SmallVector PermutedIndVars(NumLoops); CaptureVars CopyTransformer(SemaRef); // Create the permuted loops from the inside to the outside of the From d84db479b925eed62972de975d62177f0f630226 Mon Sep 17 00:00:00 2001 From: "U-BERGUFFLEN\\meinersbur" Date: Thu, 18 Jul 2024 10:58:47 +0200 Subject: [PATCH 09/12] Compile fix --- llvm/include/llvm/Frontend/OpenMP/OMP.td | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index ddd50a32191eb..ed9c8cd6dac17 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -630,13 +630,6 @@ def OMP_EndDeclareVariant : Directive<"end declare variant"> { let association = AS_Delimited; let category = OMP_DeclareVariant.category; } -def OMP_Reverse : Directive<"reverse"> { - let association = AS_Loop; -} -# TODO: order alphabetically -def OMP_Interchange : Directive<"interchange"> { - let association = AS_Loop; -} def OMP_Depobj : Directive<"depobj"> { let allowedClauses = [ VersionedClause, @@ -742,6 +735,10 @@ def OMP_For : Directive<"for"> { let association = AS_Loop; let category = CA_Executable; } +def OMP_Interchange : Directive<"interchange"> { + let association = AS_Loop; + let category = CA_Executable; +} def OMP_interop : Directive<"interop"> { let allowedClauses = [ VersionedClause, @@ -1778,6 +1775,10 @@ def OMP_ParallelWorkshare : Directive<"parallel workshare"> { let leafConstructs = [OMP_Parallel, OMP_Workshare]; let category = CA_Executable; } +def OMP_Reverse : Directive<"reverse"> { + let association = AS_Loop; + let category = CA_Executable; +} def OMP_TargetParallel : Directive<"target parallel"> { let allowedClauses = [ VersionedClause, From af015178565b06713c354436bbe4b0d3ea135a35 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 18 Jul 2024 11:50:08 +0200 Subject: [PATCH 10/12] [Clang] Handle OMPInterchangeDirectiveClass in switch --- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 19c85352a6144..da3b9bb9d62fc 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1812,6 +1812,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass: case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: case Stmt::OMPTileDirectiveClass: + case Stmt::OMPInterchangeDirectiveClass: case Stmt::OMPInteropDirectiveClass: case Stmt::OMPDispatchDirectiveClass: case Stmt::OMPMaskedDirectiveClass: From f730b3d66340150a7cf62eeefdd7e2e6c9741d59 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 18 Jul 2024 12:27:25 +0200 Subject: [PATCH 11/12] Compile fix --- clang/lib/Sema/SemaOpenMP.cpp | 7 +++++++ clang/lib/Sema/TreeTransform.h | 11 ----------- llvm/include/llvm/Frontend/OpenMP/OMP.td | 4 ---- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 2c20584b38ffd..758e1103da310 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14859,6 +14859,13 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, return OMPReverseDirective::Create(Context, StartLoc, EndLoc, AStmt, ReversedFor, buildPreInits(Context, PreInits)); +} + +StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); + DeclContext *CurContext = SemaRef.CurContext; Scope *CurScope = SemaRef.getCurScope(); // Empty statement should only be possible if there already was an error. diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index fb907c3d35df3..84e846356e437 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9250,17 +9250,6 @@ TreeTransform::TransformOMPReverseDirective(OMPReverseDirective *D) { return Res; } -template -StmtResult -TreeTransform::TransformOMPReverseDirective(OMPReverseDirective *D) { - DeclarationNameInfo DirName; - getDerived().getSema().OpenMP().StartOpenMPDSABlock( - D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); - StmtResult Res = getDerived().TransformOMPExecutableDirective(D); - getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); - return Res; -} - template StmtResult TreeTransform::TransformOMPInterchangeDirective( OMPInterchangeDirective *D) { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index a4e36389250d4..99cef340f40df 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -1779,10 +1779,6 @@ def OMP_ParallelWorkshare : Directive<"parallel workshare"> { let leafConstructs = [OMP_Parallel, OMP_Workshare]; let category = CA_Executable; } -def OMP_Reverse : Directive<"reverse"> { - let association = AS_Loop; - let category = CA_Executable; -} def OMP_TargetParallel : Directive<"target parallel"> { let allowedClauses = [ VersionedClause, From 8922a0fd2d98d1ad5d5689f5edad3e04d163f871 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 18 Jul 2024 12:36:53 +0200 Subject: [PATCH 12/12] Use SmallString --- clang/lib/Sema/SemaOpenMP.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 758e1103da310..3bd981cb442aa 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14951,9 +14951,8 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( // Iteration variable for the permuted loop. Reuse the one from // checkOpenMPLoop which will also be used to update the original loop // variable. - std::string PermutedCntName = - (Twine(".permuted_") + llvm::utostr(TargetIdx) + ".iv." + OrigVarName) - .str(); + SmallString<64> PermutedCntName(".permuted_"); + PermutedCntName.append({llvm::utostr(TargetIdx), ".iv.", OrigVarName}); auto *PermutedCntDecl = cast(IterVarRef->getDecl()); PermutedCntDecl->setDeclName( &SemaRef.PP.getIdentifierTable().get(PermutedCntName)); @@ -14963,7 +14962,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( }; // For init-statement: - // \code{c} + // \code // auto .permuted_{target}.iv = 0 // \endcode ExprResult Zero = SemaRef.ActOnIntegerConstant(OrigVarLoc, 0); @@ -14978,8 +14977,8 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( return StmtError(); // For cond-expression: - // \code{c} - // .permuted_{target}.iv < NumIterations + // \code + // .permuted_{target}.iv < MakeNumIterations() // \endcode ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, SourceHelper.Cond->getExprLoc(), BO_LT, @@ -14988,7 +14987,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( return StmtError(); // For incr-statement: - // \code{c} + // \code // ++.tile.iv // \endcode ExprResult IncrStmt = SemaRef.BuildUnaryOp(