diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 35971fbacbf91d..3dd87632a82c62 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -555,6 +555,239 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, return bodyGenStatus; } +/// Allocate space for privatized reduction variables. +template +static void allocByValReductionVars( + T loop, ArrayRef reductionArgs, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, + SmallVectorImpl &reductionDecls, + SmallVectorImpl &privateReductionVariables, + DenseMap &reductionVariableMap, + llvm::ArrayRef isByRefs) { + llvm::IRBuilderBase::InsertPointGuard guard(builder); + builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); + + for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) { + if (isByRefs[i]) + continue; + llvm::Value *var = builder.CreateAlloca( + moduleTranslation.convertType(reductionDecls[i].getType())); + moduleTranslation.mapValue(reductionArgs[i], var); + privateReductionVariables[i] = var; + reductionVariableMap.try_emplace(loop.getReductionVars()[i], var); + } +} + +/// Map input argument to all reduction initialization regions +template +static void +mapInitializationArg(T loop, LLVM::ModuleTranslation &moduleTranslation, + SmallVectorImpl &reductionDecls, + unsigned i) { + // map input argument to the initialization region + mlir::omp::DeclareReductionOp &reduction = reductionDecls[i]; + Region &initializerRegion = reduction.getInitializerRegion(); + Block &entry = initializerRegion.front(); + assert(entry.getNumArguments() == 1 && + "the initialization region has one argument"); + + mlir::Value mlirSource = loop.getReductionVars()[i]; + llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource); + assert(llvmSource && "lookup reduction var"); + moduleTranslation.mapValue(entry.getArgument(0), llvmSource); +} + +/// Collect reduction info +template +static void collectReductionInfo( + T loop, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + SmallVectorImpl &reductionDecls, + SmallVectorImpl &owningReductionGens, + SmallVectorImpl &owningAtomicReductionGens, + const ArrayRef privateReductionVariables, + SmallVectorImpl &reductionInfos) { + unsigned numReductions = loop.getNumReductionVars(); + + for (unsigned i = 0; i < numReductions; ++i) { + owningReductionGens.push_back( + makeReductionGen(reductionDecls[i], builder, moduleTranslation)); + owningAtomicReductionGens.push_back( + makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); + } + + // Collect the reduction information. + reductionInfos.reserve(numReductions); + for (unsigned i = 0; i < numReductions; ++i) { + llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr; + if (owningAtomicReductionGens[i]) + atomicGen = owningAtomicReductionGens[i]; + llvm::Value *variable = + moduleTranslation.lookupValue(loop.getReductionVars()[i]); + reductionInfos.push_back( + {moduleTranslation.convertType(reductionDecls[i].getType()), variable, + privateReductionVariables[i], + /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar, + owningReductionGens[i], + /*ReductionGenClang=*/nullptr, atomicGen}); + } +} + +/// handling of DeclareReductionOp's cleanup region +static LogicalResult +inlineOmpRegionCleanup(llvm::SmallVectorImpl &cleanupRegions, + llvm::ArrayRef privateVariables, + LLVM::ModuleTranslation &moduleTranslation, + llvm::IRBuilderBase &builder, StringRef regionName, + bool shouldLoadCleanupRegionArg = true) { + for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) { + if (cleanupRegion->empty()) + continue; + + // map the argument to the cleanup region + Block &entry = cleanupRegion->front(); + + llvm::Instruction *potentialTerminator = + builder.GetInsertBlock()->empty() ? nullptr + : &builder.GetInsertBlock()->back(); + if (potentialTerminator && potentialTerminator->isTerminator()) + builder.SetInsertPoint(potentialTerminator); + llvm::Value *prviateVarValue = + shouldLoadCleanupRegionArg + ? builder.CreateLoad( + moduleTranslation.convertType(entry.getArgument(0).getType()), + privateVariables[i]) + : privateVariables[i]; + + moduleTranslation.mapValue(entry.getArgument(0), prviateVarValue); + + if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder, + moduleTranslation))) + return failure(); + + // clear block argument mapping in case it needs to be re-created with a + // different source for another use of the same reduction decl + moduleTranslation.forgetMapping(*cleanupRegion); + } + return success(); +} + +// TODO: not used by ParallelOp +template +static LogicalResult createReductionsAndCleanup( + OP op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, + SmallVectorImpl &reductionDecls, + ArrayRef privateReductionVariables, ArrayRef isByRef) { + // Process the reductions if required. + if (op.getNumReductionVars() == 0) + return success(); + + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + // Create the reduction generators. We need to own them here because + // ReductionInfo only accepts references to the generators. + SmallVector owningReductionGens; + SmallVector owningAtomicReductionGens; + SmallVector reductionInfos; + collectReductionInfo(op, builder, moduleTranslation, reductionDecls, + owningReductionGens, owningAtomicReductionGens, + privateReductionVariables, reductionInfos); + + // The call to createReductions below expects the block to have a + // terminator. Create an unreachable instruction to serve as terminator + // and remove it later. + llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); + builder.SetInsertPoint(tempTerminator); + llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = + ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, + isByRef, op.getNowait()); + if (!contInsertPoint.getBlock()) + return op->emitOpError() << "failed to convert reductions"; + auto nextInsertionPoint = + ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); + tempTerminator->eraseFromParent(); + builder.restoreIP(nextInsertionPoint); + + // after the construct, deallocate private reduction variables + SmallVector reductionRegions; + llvm::transform(reductionDecls, std::back_inserter(reductionRegions), + [](omp::DeclareReductionOp reductionDecl) { + return &reductionDecl.getCleanupRegion(); + }); + return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables, + moduleTranslation, builder, + "omp.reduction.cleanup"); + return success(); +} + +static ArrayRef getIsByRef(std::optional> attr) { + if (!attr) + return {}; + return *attr; +} + +// TODO: not used by omp.parallel +template +static LogicalResult allocAndInitializeReductionVars( + OP op, ArrayRef reductionArgs, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, + SmallVectorImpl &reductionDecls, + SmallVectorImpl &privateReductionVariables, + DenseMap &reductionVariableMap, + llvm::ArrayRef isByRef) { + if (op.getNumReductionVars() == 0) + return success(); + + allocByValReductionVars(op, reductionArgs, builder, moduleTranslation, + allocaIP, reductionDecls, privateReductionVariables, + reductionVariableMap, isByRef); + + // Before the loop, store the initial values of reductions into reduction + // variables. Although this could be done after allocas, we don't want to mess + // up with the alloca insertion point. + for (unsigned i = 0; i < op.getNumReductionVars(); ++i) { + SmallVector phis; + + // map block argument to initializer region + mapInitializationArg(op, moduleTranslation, reductionDecls, i); + + if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(), + "omp.reduction.neutral", builder, + moduleTranslation, &phis))) + return failure(); + assert(phis.size() == 1 && "expected one value to be yielded from the " + "reduction neutral element declaration region"); + if (isByRef[i]) { + // Allocate reduction variable (which is a pointer to the real reduction + // variable allocated in the inlined region) + llvm::Value *var = builder.CreateAlloca( + moduleTranslation.convertType(reductionDecls[i].getType())); + // Store the result of the inlined region to the allocated reduction var + // ptr + builder.CreateStore(phis[0], var); + + privateReductionVariables[i] = var; + moduleTranslation.mapValue(reductionArgs[i], phis[0]); + reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]); + } else { + // for by-ref case the store is inside of the reduction region + builder.CreateStore(phis[0], privateReductionVariables[i]); + // the rest was handled in allocByValReductionVars + } + + // forget the mapping for the initializer region because we might need a + // different mapping if this reduction declaration is re-used for a + // different variable + moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion()); + } + + return success(); +} + static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -565,13 +798,38 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, auto sectionsOp = cast(opInst); // TODO: Support the following clauses: private, firstprivate, lastprivate, - // reduction, allocate - if (!sectionsOp.getReductionVars().empty() || sectionsOp.getReductions() || - !sectionsOp.getAllocateVars().empty() || + // allocate + if (!sectionsOp.getAllocateVars().empty() || !sectionsOp.getAllocatorsVars().empty()) return emitError(sectionsOp.getLoc()) - << "reduction and allocate clauses are not supported for sections " - "construct"; + << "allocate clause is not supported for sections construct"; + + llvm::ArrayRef isByRef = getIsByRef(sectionsOp.getReductionVarsByref()); + assert(isByRef.size() == sectionsOp.getNumReductionVars()); + + SmallVector reductionDecls; + collectReductionDecls(sectionsOp, reductionDecls); + llvm::OpenMPIRBuilder::InsertPointTy allocaIP = + findAllocaInsertPoint(builder, moduleTranslation); + + SmallVector privateReductionVariables( + sectionsOp.getNumReductionVars()); + DenseMap reductionVariableMap; + + MutableArrayRef reductionArgs = + sectionsOp.getRegion().getArguments(); + + if (failed(allocAndInitializeReductionVars( + sectionsOp, reductionArgs, builder, moduleTranslation, allocaIP, + reductionDecls, privateReductionVariables, reductionVariableMap, + isByRef))) + return failure(); + + // Store the mapping between reduction variables and their private copies on + // ModuleTranslation stack. It can be then recovered when translating + // omp.reduce operations in a separate call. + LLVM::ModuleTranslation::SaveStack mappingGuard( + moduleTranslation, reductionVariableMap); LogicalResult bodyGenStatus = success(); SmallVector sectionCBs; @@ -582,9 +840,24 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, continue; Region ®ion = sectionOp.getRegion(); - auto sectionCB = [®ion, &builder, &moduleTranslation, &bodyGenStatus]( - InsertPointTy allocaIP, InsertPointTy codeGenIP) { + auto sectionCB = [§ionsOp, ®ion, &builder, &moduleTranslation, + &bodyGenStatus](InsertPointTy allocaIP, + InsertPointTy codeGenIP) { builder.restoreIP(codeGenIP); + + // map the omp.section reduction block argument to the omp.sections block + // arguments + // TODO: this assumes that the only block arguments are reduction + // variables + assert(region.getNumArguments() == + sectionsOp.getRegion().getNumArguments()); + for (auto [sectionsArg, sectionArg] : llvm::zip_equal( + sectionsOp.getRegion().getArguments(), region.getArguments())) { + llvm::Value *llvmVal = moduleTranslation.lookupValue(sectionsArg); + assert(llvmVal); + moduleTranslation.mapValue(sectionArg, llvmVal); + } + convertOmpOpRegions(region, "omp.section.region", builder, moduleTranslation, bodyGenStatus); }; @@ -613,13 +886,19 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, // called for variables which have destructors/finalizers. auto finiCB = [&](InsertPointTy codeGenIP) {}; - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + allocaIP = findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( ompLoc, allocaIP, sectionCBs, privCB, finiCB, false, sectionsOp.getNowait())); - return bodyGenStatus; + + if (failed(bodyGenStatus)) + return bodyGenStatus; + + // Process the reductions if required. + return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation, + allocaIP, reductionDecls, + privateReductionVariables, isByRef); } /// Converts an OpenMP single construct into LLVM IR using OpenMPIRBuilder. @@ -769,131 +1048,6 @@ convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, ompLoc, allocaIP, bodyCB)); return bodyGenStatus; } - -/// Allocate space for privatized reduction variables. -template -static void allocByValReductionVars( - T loop, ArrayRef reductionArgs, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, - llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, - SmallVectorImpl &reductionDecls, - SmallVectorImpl &privateReductionVariables, - DenseMap &reductionVariableMap, - llvm::ArrayRef isByRefs) { - llvm::IRBuilderBase::InsertPointGuard guard(builder); - builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); - - for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) { - if (isByRefs[i]) - continue; - llvm::Value *var = builder.CreateAlloca( - moduleTranslation.convertType(reductionDecls[i].getType())); - moduleTranslation.mapValue(reductionArgs[i], var); - privateReductionVariables[i] = var; - reductionVariableMap.try_emplace(loop.getReductionVars()[i], var); - } -} - -/// Map input argument to all reduction initialization regions -template -static void -mapInitializationArg(T loop, LLVM::ModuleTranslation &moduleTranslation, - SmallVectorImpl &reductionDecls, - unsigned i) { - // map input argument to the initialization region - mlir::omp::DeclareReductionOp &reduction = reductionDecls[i]; - Region &initializerRegion = reduction.getInitializerRegion(); - Block &entry = initializerRegion.front(); - assert(entry.getNumArguments() == 1 && - "the initialization region has one argument"); - - mlir::Value mlirSource = loop.getReductionVars()[i]; - llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource); - assert(llvmSource && "lookup reduction var"); - moduleTranslation.mapValue(entry.getArgument(0), llvmSource); -} - -/// Collect reduction info -template -static void collectReductionInfo( - T loop, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, - SmallVector &reductionDecls, - SmallVector &owningReductionGens, - SmallVector &owningAtomicReductionGens, - const SmallVector &privateReductionVariables, - SmallVector &reductionInfos) { - unsigned numReductions = loop.getNumReductionVars(); - - for (unsigned i = 0; i < numReductions; ++i) { - owningReductionGens.push_back( - makeReductionGen(reductionDecls[i], builder, moduleTranslation)); - owningAtomicReductionGens.push_back( - makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation)); - } - - // Collect the reduction information. - reductionInfos.reserve(numReductions); - for (unsigned i = 0; i < numReductions; ++i) { - llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy atomicGen = nullptr; - if (owningAtomicReductionGens[i]) - atomicGen = owningAtomicReductionGens[i]; - llvm::Value *variable = - moduleTranslation.lookupValue(loop.getReductionVars()[i]); - reductionInfos.push_back( - {moduleTranslation.convertType(reductionDecls[i].getType()), variable, - privateReductionVariables[i], - /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar, - owningReductionGens[i], - /*ReductionGenClang=*/nullptr, atomicGen}); - } -} - -/// handling of DeclareReductionOp's cleanup region -static LogicalResult -inlineOmpRegionCleanup(llvm::SmallVectorImpl &cleanupRegions, - llvm::ArrayRef privateVariables, - LLVM::ModuleTranslation &moduleTranslation, - llvm::IRBuilderBase &builder, StringRef regionName, - bool shouldLoadCleanupRegionArg = true) { - for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) { - if (cleanupRegion->empty()) - continue; - - // map the argument to the cleanup region - Block &entry = cleanupRegion->front(); - - llvm::Instruction *potentialTerminator = - builder.GetInsertBlock()->empty() ? nullptr - : &builder.GetInsertBlock()->back(); - if (potentialTerminator && potentialTerminator->isTerminator()) - builder.SetInsertPoint(potentialTerminator); - llvm::Value *prviateVarValue = - shouldLoadCleanupRegionArg - ? builder.CreateLoad( - moduleTranslation.convertType(entry.getArgument(0).getType()), - privateVariables[i]) - : privateVariables[i]; - - moduleTranslation.mapValue(entry.getArgument(0), prviateVarValue); - - if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder, - moduleTranslation))) - return failure(); - - // clear block argument mapping in case it needs to be re-created with a - // different source for another use of the same reduction decl - moduleTranslation.forgetMapping(*cleanupRegion); - } - return success(); -} - -static ArrayRef getIsByRef(std::optional> attr) { - if (!attr) - return {}; - return *attr; -} - /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, @@ -930,48 +1084,11 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, MutableArrayRef reductionArgs = wsloopOp.getRegion().getArguments(); - allocByValReductionVars(wsloopOp, reductionArgs, builder, moduleTranslation, - allocaIP, reductionDecls, privateReductionVariables, - reductionVariableMap, isByRef); - - // Before the loop, store the initial values of reductions into reduction - // variables. Although this could be done after allocas, we don't want to mess - // up with the alloca insertion point. - for (unsigned i = 0; i < wsloopOp.getNumReductionVars(); ++i) { - SmallVector phis; - - // map block argument to initializer region - mapInitializationArg(wsloopOp, moduleTranslation, reductionDecls, i); - - if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(), - "omp.reduction.neutral", builder, - moduleTranslation, &phis))) - return failure(); - assert(phis.size() == 1 && "expected one value to be yielded from the " - "reduction neutral element declaration region"); - if (isByRef[i]) { - // Allocate reduction variable (which is a pointer to the real reduction - // variable allocated in the inlined region) - llvm::Value *var = builder.CreateAlloca( - moduleTranslation.convertType(reductionDecls[i].getType())); - // Store the result of the inlined region to the allocated reduction var - // ptr - builder.CreateStore(phis[0], var); - - privateReductionVariables[i] = var; - moduleTranslation.mapValue(reductionArgs[i], phis[0]); - reductionVariableMap.try_emplace(wsloopOp.getReductionVars()[i], phis[0]); - } else { - // for by-ref case the store is inside of the reduction region - builder.CreateStore(phis[0], privateReductionVariables[i]); - // the rest was handled in allocByValReductionVars - } - - // forget the mapping for the initializer region because we might need a - // different mapping if this reduction declaration is re-used for a - // different variable - moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion()); - } + if (failed(allocAndInitializeReductionVars( + wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP, + reductionDecls, privateReductionVariables, reductionVariableMap, + isByRef))) + return failure(); // Store the mapping between reduction variables and their private copies on // ModuleTranslation stack. It can be then recovered when translating @@ -1064,42 +1181,9 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, builder.restoreIP(afterIP); // Process the reductions if required. - if (wsloopOp.getNumReductionVars() == 0) - return success(); - - // Create the reduction generators. We need to own them here because - // ReductionInfo only accepts references to the generators. - SmallVector owningReductionGens; - SmallVector owningAtomicReductionGens; - SmallVector reductionInfos; - collectReductionInfo(wsloopOp, builder, moduleTranslation, reductionDecls, - owningReductionGens, owningAtomicReductionGens, - privateReductionVariables, reductionInfos); - - // The call to createReductions below expects the block to have a - // terminator. Create an unreachable instruction to serve as terminator - // and remove it later. - llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); - builder.SetInsertPoint(tempTerminator); - llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = - ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, - isByRef, wsloopOp.getNowait()); - if (!contInsertPoint.getBlock()) - return wsloopOp->emitOpError() << "failed to convert reductions"; - auto nextInsertionPoint = - ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); - tempTerminator->eraseFromParent(); - builder.restoreIP(nextInsertionPoint); - - // after the workshare loop, deallocate private reduction variables - SmallVector reductionRegions; - llvm::transform(reductionDecls, std::back_inserter(reductionRegions), - [](omp::DeclareReductionOp reductionDecl) { - return &reductionDecl.getCleanupRegion(); - }); - return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables, - moduleTranslation, builder, - "omp.reduction.cleanup"); + return createReductionsAndCleanup(wsloopOp, builder, moduleTranslation, + allocaIP, reductionDecls, + privateReductionVariables, isByRef); } /// A RAII class that on construction replaces the region arguments of the diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 321de67aa48a18..3a1fc5efbd8230 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2093,6 +2093,8 @@ llvm.func @omp_sections_empty() -> () { omp.sections { omp.terminator } + // CHECK-NEXT: br label %entry + // CHECK: entry: // CHECK-NEXT: ret void llvm.return } diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir new file mode 100644 index 00000000000000..5682e7e96ab186 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir @@ -0,0 +1,214 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// nonesense minimised code simulating the control flow graph generated by flang +// for array reductions. The important thing here is that we are testing a byref +// reduction with a cleanup region, and the various regions contain multiple +// blocks +omp.declare_reduction @add_reduction_byref_box_Uxf32 : !llvm.ptr init { +^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr + omp.yield(%1 : !llvm.ptr) +} combiner { +^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %0 = llvm.mlir.constant(0 : i64) : i64 + %1 = llvm.mlir.constant(0 : index) : i64 + %2 = llvm.mlir.constant(1 : index) : i64 + llvm.br ^bb1(%0 : i64) +^bb1(%3: i64): // 2 preds: ^bb0, ^bb2 + %4 = llvm.icmp "sgt" %3, %1 : i64 + llvm.cond_br %4, ^bb2, ^bb3 +^bb2: // pred: ^bb1 + %5 = llvm.sub %3, %2 : i64 + llvm.br ^bb1(%5 : i64) +^bb3: // pred: ^bb1 + omp.yield(%arg0 : !llvm.ptr) +} cleanup { +^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(0 : i64) : i64 + %1 = llvm.ptrtoint %arg0 : !llvm.ptr to i64 + %2 = llvm.icmp "ne" %1, %0 : i64 + llvm.cond_br %2, ^bb1, ^bb2 +^bb1: // pred: ^bb0 + llvm.br ^bb2 +^bb2: // 2 preds: ^bb0, ^bb1 + omp.yield +} +llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attributes {fir.internal_name = "_QPsectionsreduction"} { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.mlir.constant(0 : index) : i64 + %2 = llvm.mlir.constant(1 : index) : i64 + omp.parallel { + %3 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr + omp.sections reduction(byref @add_reduction_byref_box_Uxf32 -> %3 : !llvm.ptr) { + ^bb0(%arg1: !llvm.ptr): + omp.section { + ^bb0(%arg2: !llvm.ptr): + llvm.br ^bb1(%0 : i64) + ^bb1(%4: i64): // 2 preds: ^bb0, ^bb2 + %5 = llvm.icmp "sgt" %4, %1 : i64 + llvm.cond_br %5, ^bb2, ^bb3 + ^bb2: // pred: ^bb1 + %6 = llvm.sub %4, %2 : i64 + llvm.br ^bb1(%6 : i64) + ^bb3: // pred: ^bb1 + omp.terminator + } + omp.section { + ^bb0(%arg2: !llvm.ptr): + llvm.br ^bb1(%0 : i64) + ^bb1(%4: i64): // 2 preds: ^bb0, ^bb2 + %5 = llvm.icmp "sgt" %4, %1 : i64 + llvm.cond_br %5, ^bb2, ^bb3 + ^bb2: // pred: ^bb1 + %6 = llvm.sub %4, %2 : i64 + llvm.br ^bb1(%6 : i64) + ^bb3: // pred: ^bb1 + omp.terminator + } + omp.terminator + } + omp.terminator + } + llvm.return +} + +// CHECK-LABEL: define internal void @sectionsreduction_..omp_par +// CHECK: omp.par.entry: +// CHECK: %[[VAL_6:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_7:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_8:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_9:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_10:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_11:.*]] = load i32, ptr %[[VAL_12:.*]], align 4 +// CHECK: store i32 %[[VAL_11]], ptr %[[VAL_10]], align 4 +// CHECK: %[[VAL_13:.*]] = load i32, ptr %[[VAL_10]], align 4 +// CHECK: %[[VAL_14:.*]] = alloca [1 x ptr], align 8 +// CHECK: br label %[[VAL_15:.*]] +// CHECK: omp.reduction.init: ; preds = %[[VAL_16:.*]] +// CHECK: br label %[[VAL_17:.*]] +// CHECK: omp.par.region: ; preds = %[[VAL_15]] +// CHECK: br label %[[VAL_18:.*]] +// CHECK: omp.par.region1: ; preds = %[[VAL_17]] +// CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 +// CHECK: %[[VAL_20:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 +// CHECK: %[[VAL_21:.*]] = alloca ptr, align 8 +// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8 +// CHECK: br label %[[VAL_22:.*]] +// CHECK: omp_section_loop.preheader: ; preds = %[[VAL_18]] +// CHECK: store i32 0, ptr %[[VAL_7]], align 4 +// CHECK: store i32 1, ptr %[[VAL_8]], align 4 +// CHECK: store i32 1, ptr %[[VAL_9]], align 4 +// CHECK: %[[VAL_23:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[VAL_23]], i32 34, ptr %[[VAL_6]], ptr %[[VAL_7]], ptr %[[VAL_8]], ptr %[[VAL_9]], i32 1, i32 0) +// CHECK: %[[VAL_24:.*]] = load i32, ptr %[[VAL_7]], align 4 +// CHECK: %[[VAL_25:.*]] = load i32, ptr %[[VAL_8]], align 4 +// CHECK: %[[VAL_26:.*]] = sub i32 %[[VAL_25]], %[[VAL_24]] +// CHECK: %[[VAL_27:.*]] = add i32 %[[VAL_26]], 1 +// CHECK: br label %[[VAL_28:.*]] +// CHECK: omp_section_loop.header: ; preds = %[[VAL_29:.*]], %[[VAL_22]] +// CHECK: %[[VAL_30:.*]] = phi i32 [ 0, %[[VAL_22]] ], [ %[[VAL_31:.*]], %[[VAL_29]] ] +// CHECK: br label %[[VAL_32:.*]] +// CHECK: omp_section_loop.cond: ; preds = %[[VAL_28]] +// CHECK: %[[VAL_33:.*]] = icmp ult i32 %[[VAL_30]], %[[VAL_27]] +// CHECK: br i1 %[[VAL_33]], label %[[VAL_34:.*]], label %[[VAL_35:.*]] +// CHECK: omp_section_loop.exit: ; preds = %[[VAL_32]] +// CHECK: call void @__kmpc_for_static_fini(ptr @1, i32 %[[VAL_23]]) +// CHECK: %[[VAL_36:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_36]]) +// CHECK: br label %[[VAL_37:.*]] +// CHECK: omp_section_loop.after: ; preds = %[[VAL_35]] +// CHECK: br label %[[VAL_38:.*]] +// CHECK: omp_section_loop.aftersections.fini: ; preds = %[[VAL_37]] +// CHECK: %[[VAL_39:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_14]], i64 0, i64 0 +// CHECK: store ptr %[[VAL_21]], ptr %[[VAL_39]], align 8 +// CHECK: %[[VAL_40:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: %[[VAL_41:.*]] = call i32 @__kmpc_reduce(ptr @1, i32 %[[VAL_40]], i32 1, i64 8, ptr %[[VAL_14]], ptr @.omp.reduction.func, ptr @.gomp_critical_user_.reduction.var) +// CHECK: switch i32 %[[VAL_41]], label %[[VAL_42:.*]] [ +// CHECK: i32 1, label %[[VAL_43:.*]] +// CHECK: i32 2, label %[[VAL_44:.*]] +// CHECK: ] +// CHECK: reduce.switch.atomic: ; preds = %[[VAL_38]] +// CHECK: unreachable +// CHECK: reduce.switch.nonatomic: ; preds = %[[VAL_38]] +// CHECK: %[[VAL_45:.*]] = load ptr, ptr %[[VAL_21]], align 8 +// CHECK: br label %[[VAL_46:.*]] +// CHECK: omp.reduction.nonatomic.body: ; preds = %[[VAL_43]] +// CHECK: br label %[[VAL_47:.*]] +// CHECK: omp.reduction.nonatomic.body16: ; preds = %[[VAL_48:.*]], %[[VAL_46]] +// CHECK: %[[VAL_49:.*]] = phi i64 [ %[[VAL_50:.*]], %[[VAL_48]] ], [ 0, %[[VAL_46]] ] +// CHECK: %[[VAL_51:.*]] = icmp sgt i64 %[[VAL_49]], 0 +// CHECK: br i1 %[[VAL_51]], label %[[VAL_48]], label %[[VAL_52:.*]] +// CHECK: omp.reduction.nonatomic.body18: ; preds = %[[VAL_47]] +// CHECK: br label %[[VAL_53:.*]] +// CHECK: omp.region.cont15: ; preds = %[[VAL_52]] +// CHECK: %[[VAL_54:.*]] = phi ptr [ %[[VAL_19]], %[[VAL_52]] ] +// CHECK: call void @__kmpc_end_reduce(ptr @1, i32 %[[VAL_40]], ptr @.gomp_critical_user_.reduction.var) +// CHECK: br label %[[VAL_42]] +// CHECK: omp.reduction.nonatomic.body17: ; preds = %[[VAL_47]] +// CHECK: %[[VAL_50]] = sub i64 %[[VAL_49]], 1 +// CHECK: br label %[[VAL_47]] +// CHECK: reduce.finalize: ; preds = %[[VAL_53]], %[[VAL_38]] +// CHECK: %[[VAL_55:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_55]]) +// CHECK: %[[VAL_56:.*]] = load ptr, ptr %[[VAL_21]], align 8 +// CHECK: br label %[[VAL_57:.*]] +// CHECK: omp.reduction.cleanup: ; preds = %[[VAL_42]] +// CHECK: %[[VAL_58:.*]] = ptrtoint ptr %[[VAL_56]] to i64 +// CHECK: %[[VAL_59:.*]] = icmp ne i64 %[[VAL_58]], 0 +// CHECK: br i1 %[[VAL_59]], label %[[VAL_60:.*]], label %[[VAL_61:.*]] +// CHECK: omp.reduction.cleanup22: ; preds = %[[VAL_60]], %[[VAL_57]] +// CHECK: br label %[[VAL_62:.*]] +// CHECK: omp.region.cont20: ; preds = %[[VAL_61]] +// CHECK: br label %[[VAL_63:.*]] +// CHECK: omp.region.cont: ; preds = %[[VAL_62]] +// CHECK: br label %[[VAL_64:.*]] +// CHECK: omp.par.pre_finalize: ; preds = %[[VAL_63]] +// CHECK: br label %[[VAL_65:.*]] +// CHECK: omp.reduction.cleanup21: ; preds = %[[VAL_57]] +// CHECK: br label %[[VAL_61]] +// CHECK: omp_section_loop.body: ; preds = %[[VAL_32]] +// CHECK: %[[VAL_66:.*]] = add i32 %[[VAL_30]], %[[VAL_24]] +// CHECK: %[[VAL_67:.*]] = mul i32 %[[VAL_66]], 1 +// CHECK: %[[VAL_68:.*]] = add i32 %[[VAL_67]], 0 +// CHECK: switch i32 %[[VAL_68]], label %[[VAL_69:.*]] [ +// CHECK: i32 0, label %[[VAL_70:.*]] +// CHECK: i32 1, label %[[VAL_71:.*]] +// CHECK: ] +// CHECK: omp_section_loop.body.case6: ; preds = %[[VAL_34]] +// CHECK: br label %[[VAL_72:.*]] +// CHECK: omp.section.region8: ; preds = %[[VAL_71]] +// CHECK: br label %[[VAL_73:.*]] +// CHECK: omp.section.region9: ; preds = %[[VAL_74:.*]], %[[VAL_72]] +// CHECK: %[[VAL_75:.*]] = phi i64 [ %[[VAL_76:.*]], %[[VAL_74]] ], [ 1, %[[VAL_72]] ] +// CHECK: %[[VAL_77:.*]] = icmp sgt i64 %[[VAL_75]], 0 +// CHECK: br i1 %[[VAL_77]], label %[[VAL_74]], label %[[VAL_78:.*]] +// CHECK: omp.section.region11: ; preds = %[[VAL_73]] +// CHECK: br label %[[VAL_79:.*]] +// CHECK: omp.region.cont7: ; preds = %[[VAL_78]] +// CHECK: br label %[[VAL_69]] +// CHECK: omp.section.region10: ; preds = %[[VAL_73]] +// CHECK: %[[VAL_76]] = sub i64 %[[VAL_75]], 1 +// CHECK: br label %[[VAL_73]] +// CHECK: omp_section_loop.body.case: ; preds = %[[VAL_34]] +// CHECK: br label %[[VAL_80:.*]] +// CHECK: omp.section.region: ; preds = %[[VAL_70]] +// CHECK: br label %[[VAL_81:.*]] +// CHECK: omp.section.region3: ; preds = %[[VAL_82:.*]], %[[VAL_80]] +// CHECK: %[[VAL_83:.*]] = phi i64 [ %[[VAL_84:.*]], %[[VAL_82]] ], [ 1, %[[VAL_80]] ] +// CHECK: %[[VAL_85:.*]] = icmp sgt i64 %[[VAL_83]], 0 +// CHECK: br i1 %[[VAL_85]], label %[[VAL_82]], label %[[VAL_86:.*]] +// CHECK: omp.section.region5: ; preds = %[[VAL_81]] +// CHECK: br label %[[VAL_87:.*]] +// CHECK: omp.region.cont2: ; preds = %[[VAL_86]] +// CHECK: br label %[[VAL_69]] +// CHECK: omp.section.region4: ; preds = %[[VAL_81]] +// CHECK: %[[VAL_84]] = sub i64 %[[VAL_83]], 1 +// CHECK: br label %[[VAL_81]] +// CHECK: omp_section_loop.body.sections.after: ; preds = %[[VAL_79]], %[[VAL_87]], %[[VAL_34]] +// CHECK: br label %[[VAL_29]] +// CHECK: omp_section_loop.inc: ; preds = %[[VAL_69]] +// CHECK: %[[VAL_31]] = add nuw i32 %[[VAL_30]], 1 +// CHECK: br label %[[VAL_28]] +// CHECK: omp.par.outlined.exit.exitStub: ; preds = %[[VAL_64]] +// CHECK: ret void diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir new file mode 100644 index 00000000000000..694180a5ced373 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir @@ -0,0 +1,152 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +omp.declare_reduction @add_reduction_f32 : f32 init { +^bb0(%arg0: f32): + %0 = llvm.mlir.constant(0.000000e+00 : f32) : f32 + omp.yield(%0 : f32) +} combiner { +^bb0(%arg0: f32, %arg1: f32): + %0 = llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 + omp.yield(%0 : f32) +} +llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attributes {fir.internal_name = "_QPsections"} { + %0 = llvm.mlir.constant(2.000000e+00 : f32) : f32 + %1 = llvm.mlir.constant(1.000000e+00 : f32) : f32 + omp.parallel { + omp.sections reduction(@add_reduction_f32 -> %arg0 : !llvm.ptr) { + ^bb0(%arg1: !llvm.ptr): + omp.section { + ^bb0(%arg2: !llvm.ptr): + %2 = llvm.load %arg2 : !llvm.ptr -> f32 + %3 = llvm.fadd %2, %1 {fastmathFlags = #llvm.fastmath} : f32 + llvm.store %3, %arg2 : f32, !llvm.ptr + omp.terminator + } + omp.section { + ^bb0(%arg2: !llvm.ptr): + %2 = llvm.load %arg2 : !llvm.ptr -> f32 + %3 = llvm.fadd %2, %0 {fastmathFlags = #llvm.fastmath} : f32 + llvm.store %3, %arg2 : f32, !llvm.ptr + omp.terminator + } + omp.terminator + } + omp.terminator + } + llvm.return +} + +// CHECK-LABEL: define internal void @sections_..omp_par +// CHECK: omp.par.entry: +// CHECK: %[[VAL_9:.*]] = getelementptr { ptr }, ptr %[[VAL_10:.*]], i32 0, i32 0 +// CHECK: %[[VAL_11:.*]] = load ptr, ptr %[[VAL_9]], align 8 +// CHECK: %[[VAL_12:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_13:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_14:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_15:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_16:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_17:.*]] = load i32, ptr %[[VAL_18:.*]], align 4 +// CHECK: store i32 %[[VAL_17]], ptr %[[VAL_16]], align 4 +// CHECK: %[[VAL_19:.*]] = load i32, ptr %[[VAL_16]], align 4 +// CHECK: %[[VAL_20:.*]] = alloca float, align 4 +// CHECK: %[[VAL_21:.*]] = alloca [1 x ptr], align 8 +// CHECK: br label %[[VAL_22:.*]] +// CHECK: omp.reduction.init: ; preds = %[[VAL_23:.*]] +// CHECK: br label %[[VAL_24:.*]] +// CHECK: omp.par.region: ; preds = %[[VAL_22]] +// CHECK: br label %[[VAL_25:.*]] +// CHECK: omp.par.region1: ; preds = %[[VAL_24]] +// CHECK: store float 0.000000e+00, ptr %[[VAL_20]], align 4 +// CHECK: br label %[[VAL_26:.*]] +// CHECK: omp_section_loop.preheader: ; preds = %[[VAL_25]] +// CHECK: store i32 0, ptr %[[VAL_13]], align 4 +// CHECK: store i32 1, ptr %[[VAL_14]], align 4 +// CHECK: store i32 1, ptr %[[VAL_15]], align 4 +// CHECK: %[[VAL_27:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[VAL_27]], i32 34, ptr %[[VAL_12]], ptr %[[VAL_13]], ptr %[[VAL_14]], ptr %[[VAL_15]], i32 1, i32 0) +// CHECK: %[[VAL_28:.*]] = load i32, ptr %[[VAL_13]], align 4 +// CHECK: %[[VAL_29:.*]] = load i32, ptr %[[VAL_14]], align 4 +// CHECK: %[[VAL_30:.*]] = sub i32 %[[VAL_29]], %[[VAL_28]] +// CHECK: %[[VAL_31:.*]] = add i32 %[[VAL_30]], 1 +// CHECK: br label %[[VAL_32:.*]] +// CHECK: omp_section_loop.header: ; preds = %[[VAL_33:.*]], %[[VAL_26]] +// CHECK: %[[VAL_34:.*]] = phi i32 [ 0, %[[VAL_26]] ], [ %[[VAL_35:.*]], %[[VAL_33]] ] +// CHECK: br label %[[VAL_36:.*]] +// CHECK: omp_section_loop.cond: ; preds = %[[VAL_32]] +// CHECK: %[[VAL_37:.*]] = icmp ult i32 %[[VAL_34]], %[[VAL_31]] +// CHECK: br i1 %[[VAL_37]], label %[[VAL_38:.*]], label %[[VAL_39:.*]] +// CHECK: omp_section_loop.exit: ; preds = %[[VAL_36]] +// CHECK: call void @__kmpc_for_static_fini(ptr @1, i32 %[[VAL_27]]) +// CHECK: %[[VAL_40:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_40]]) +// CHECK: br label %[[VAL_41:.*]] +// CHECK: omp_section_loop.after: ; preds = %[[VAL_39]] +// CHECK: br label %[[VAL_42:.*]] +// CHECK: omp_section_loop.aftersections.fini: ; preds = %[[VAL_41]] +// CHECK: %[[VAL_43:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_21]], i64 0, i64 0 +// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_43]], align 8 +// CHECK: %[[VAL_44:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: %[[VAL_45:.*]] = call i32 @__kmpc_reduce(ptr @1, i32 %[[VAL_44]], i32 1, i64 8, ptr %[[VAL_21]], ptr @.omp.reduction.func, ptr @.gomp_critical_user_.reduction.var) +// CHECK: switch i32 %[[VAL_45]], label %[[VAL_46:.*]] [ +// CHECK: i32 1, label %[[VAL_47:.*]] +// CHECK: i32 2, label %[[VAL_48:.*]] +// CHECK: ] +// CHECK: reduce.switch.atomic: ; preds = %[[VAL_42]] +// CHECK: unreachable +// CHECK: reduce.switch.nonatomic: ; preds = %[[VAL_42]] +// CHECK: %[[VAL_49:.*]] = load float, ptr %[[VAL_11]], align 4 +// CHECK: %[[VAL_50:.*]] = load float, ptr %[[VAL_20]], align 4 +// CHECK: %[[VAL_51:.*]] = fadd contract float %[[VAL_49]], %[[VAL_50]] +// CHECK: store float %[[VAL_51]], ptr %[[VAL_11]], align 4 +// CHECK: call void @__kmpc_end_reduce(ptr @1, i32 %[[VAL_44]], ptr @.gomp_critical_user_.reduction.var) +// CHECK: br label %[[VAL_46]] +// CHECK: reduce.finalize: ; preds = %[[VAL_47]], %[[VAL_42]] +// CHECK: %[[VAL_52:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_52]]) +// CHECK: br label %[[VAL_53:.*]] +// CHECK: omp.region.cont: ; preds = %[[VAL_46]] +// CHECK: br label %[[VAL_54:.*]] +// CHECK: omp.par.pre_finalize: ; preds = %[[VAL_53]] +// CHECK: br label %[[VAL_55:.*]] +// CHECK: omp_section_loop.body: ; preds = %[[VAL_36]] +// CHECK: %[[VAL_56:.*]] = add i32 %[[VAL_34]], %[[VAL_28]] +// CHECK: %[[VAL_57:.*]] = mul i32 %[[VAL_56]], 1 +// CHECK: %[[VAL_58:.*]] = add i32 %[[VAL_57]], 0 +// CHECK: switch i32 %[[VAL_58]], label %[[VAL_59:.*]] [ +// CHECK: i32 0, label %[[VAL_60:.*]] +// CHECK: i32 1, label %[[VAL_61:.*]] +// CHECK: ] +// CHECK: omp_section_loop.body.case3: ; preds = %[[VAL_38]] +// CHECK: br label %[[VAL_62:.*]] +// CHECK: omp.section.region5: ; preds = %[[VAL_61]] +// CHECK: %[[VAL_63:.*]] = load float, ptr %[[VAL_20]], align 4 +// CHECK: %[[VAL_64:.*]] = fadd contract float %[[VAL_63]], 2.000000e+00 +// CHECK: store float %[[VAL_64]], ptr %[[VAL_20]], align 4 +// CHECK: br label %[[VAL_65:.*]] +// CHECK: omp.region.cont4: ; preds = %[[VAL_62]] +// CHECK: br label %[[VAL_59]] +// CHECK: omp_section_loop.body.case: ; preds = %[[VAL_38]] +// CHECK: br label %[[VAL_66:.*]] +// CHECK: omp.section.region: ; preds = %[[VAL_60]] +// CHECK: %[[VAL_67:.*]] = load float, ptr %[[VAL_20]], align 4 +// CHECK: %[[VAL_68:.*]] = fadd contract float %[[VAL_67]], 1.000000e+00 +// CHECK: store float %[[VAL_68]], ptr %[[VAL_20]], align 4 +// CHECK: br label %[[VAL_69:.*]] +// CHECK: omp.region.cont2: ; preds = %[[VAL_66]] +// CHECK: br label %[[VAL_59]] +// CHECK: omp_section_loop.body.sections.after: ; preds = %[[VAL_65]], %[[VAL_69]], %[[VAL_38]] +// CHECK: br label %[[VAL_33]] +// CHECK: omp_section_loop.inc: ; preds = %[[VAL_59]] +// CHECK: %[[VAL_35]] = add nuw i32 %[[VAL_34]], 1 +// CHECK: br label %[[VAL_32]] +// CHECK: omp.par.outlined.exit.exitStub: ; preds = %[[VAL_54]] +// CHECK: ret void +// CHECK: %[[VAL_70:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_71:.*]], i64 0, i64 0 +// CHECK: %[[VAL_72:.*]] = load ptr, ptr %[[VAL_70]], align 8 +// CHECK: %[[VAL_73:.*]] = load float, ptr %[[VAL_72]], align 4 +// CHECK: %[[VAL_74:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_75:.*]], i64 0, i64 0 +// CHECK: %[[VAL_76:.*]] = load ptr, ptr %[[VAL_74]], align 8 +// CHECK: %[[VAL_77:.*]] = load float, ptr %[[VAL_76]], align 4 +// CHECK: %[[VAL_78:.*]] = fadd contract float %[[VAL_73]], %[[VAL_77]] +// CHECK: store float %[[VAL_78]], ptr %[[VAL_72]], align 4 +// CHECK: ret void