diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h index 34c8e79173bcd..e31121260acda 100644 --- a/flang/include/flang/Optimizer/Support/Utils.h +++ b/flang/include/flang/Optimizer/Support/Utils.h @@ -133,6 +133,25 @@ inline void intrinsicTypeTODO(fir::FirOpBuilder &builder, mlir::Type type, fir::numericMlirTypeToFortran(builder, type, loc, intrinsicName) + " in " + intrinsicName); } + +using MinlocBodyOpGeneratorTy = llvm::function_ref &)>; +using InitValGeneratorTy = llvm::function_ref; +using AddrGeneratorTy = llvm::function_ref; + +// Produces a loop nest for a Minloc intrinsic. +void genMinMaxlocReductionLoop(fir::FirOpBuilder &builder, mlir::Value array, + InitValGeneratorTy initVal, + MinlocBodyOpGeneratorTy genBody, + fir::AddrGeneratorTy getAddrFn, unsigned rank, + mlir::Type elementType, mlir::Location loc, + mlir::Type maskElemType, mlir::Value resultArr, + bool maskMayBeLogicalScalar); + } // namespace fir #endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index a7bf250215384..3f4ec4f3bccc8 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -20,6 +20,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/Support/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" @@ -807,6 +808,203 @@ class ReductionElementalConversion : public mlir::OpRewritePattern { } }; +// Look for minloc(mask=elemental) and generate the minloc loop with +// inlined elemental. +// %e = hlfir.elemental %shape ({ ... }) +// %m = hlfir.minloc %array mask %e +class MinMaxlocElementalConversion + : public mlir::OpRewritePattern { +public: + using mlir::OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(hlfir::MinlocOp minloc, + mlir::PatternRewriter &rewriter) const override { + if (!minloc.getMask() || minloc.getDim() || minloc.getBack()) + return rewriter.notifyMatchFailure(minloc, "Did not find valid minloc"); + + auto elemental = minloc.getMask().getDefiningOp(); + if (!elemental || hlfir::elementalOpMustProduceTemp(elemental)) + return rewriter.notifyMatchFailure(minloc, "Did not find elemental"); + + mlir::Value array = minloc.getArray(); + + unsigned rank = mlir::cast(minloc.getType()).getShape()[0]; + mlir::Type arrayType = array.getType(); + if (!arrayType.isa()) + return rewriter.notifyMatchFailure( + minloc, "Currently requires a boxed type input"); + mlir::Type elementType = hlfir::getFortranElementType(arrayType); + if (!fir::isa_trivial(elementType)) + return rewriter.notifyMatchFailure( + minloc, "Character arrays are currently not handled"); + + mlir::Location loc = minloc.getLoc(); + fir::FirOpBuilder builder{rewriter, minloc.getOperation()}; + mlir::Value resultArr = builder.createTemporary( + loc, fir::SequenceType::get( + rank, hlfir::getFortranElementType(minloc.getType()))); + + auto init = [](fir::FirOpBuilder builder, mlir::Location loc, + mlir::Type elementType) { + if (auto ty = elementType.dyn_cast()) { + const llvm::fltSemantics &sem = ty.getFloatSemantics(); + return builder.createRealConstant( + loc, elementType, + llvm::APFloat::getLargest(sem, /*Negative=*/false)); + } + unsigned bits = elementType.getIntOrFloatBitWidth(); + int64_t maxInt = llvm::APInt::getSignedMaxValue(bits).getSExtValue(); + return builder.createIntegerConstant(loc, elementType, maxInt); + }; + + auto genBodyOp = + [&rank, &resultArr, &elemental]( + fir::FirOpBuilder builder, mlir::Location loc, + mlir::Type elementType, mlir::Value array, mlir::Value flagRef, + mlir::Value reduction, + const llvm::SmallVectorImpl &indices) -> mlir::Value { + // We are in the innermost loop: generate the elemental inline + mlir::Value oneIdx = + builder.createIntegerConstant(loc, builder.getIndexType(), 1); + llvm::SmallVector oneBasedIndices; + llvm::transform( + indices, std::back_inserter(oneBasedIndices), [&](mlir::Value V) { + return builder.create(loc, V, oneIdx); + }); + hlfir::YieldElementOp yield = + hlfir::inlineElementalOp(loc, builder, elemental, oneBasedIndices); + mlir::Value maskElem = yield.getElementValue(); + yield->erase(); + + mlir::Type ifCompatType = builder.getI1Type(); + mlir::Value ifCompatElem = + builder.create(loc, ifCompatType, maskElem); + + llvm::SmallVector resultsTy = {elementType, elementType}; + fir::IfOp maskIfOp = + builder.create(loc, elementType, ifCompatElem, + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&maskIfOp.getThenRegion().front()); + + // Set flag that mask was true at some point + mlir::Value flagSet = builder.createIntegerConstant( + loc, mlir::cast(flagRef.getType()).getEleTy(), 1); + builder.create(loc, flagSet, flagRef); + mlir::Value addr = hlfir::getElementAt(loc, builder, hlfir::Entity{array}, + oneBasedIndices); + mlir::Value elem = builder.create(loc, addr); + + // Compare with the max reduction value + mlir::Value cmp; + if (elementType.isa()) { + cmp = builder.create( + loc, mlir::arith::CmpFPredicate::OLT, elem, reduction); + } else if (elementType.isa()) { + cmp = builder.create( + loc, mlir::arith::CmpIPredicate::slt, elem, reduction); + } else { + llvm_unreachable("unsupported type"); + } + + // Set the new coordinate to the result + fir::IfOp ifOp = builder.create(loc, elementType, cmp, + /*withElseRegion*/ true); + + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + mlir::Type resultElemTy = + hlfir::getFortranElementType(resultArr.getType()); + mlir::Type returnRefTy = builder.getRefType(resultElemTy); + mlir::IndexType idxTy = builder.getIndexType(); + + for (unsigned int i = 0; i < rank; ++i) { + mlir::Value index = builder.createIntegerConstant(loc, idxTy, i + 1); + mlir::Value resultElemAddr = builder.create( + loc, returnRefTy, resultArr, index); + mlir::Value fortranIndex = builder.create( + loc, resultElemTy, oneBasedIndices[i]); + builder.create(loc, fortranIndex, resultElemAddr); + } + builder.create(loc, elem); + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + builder.create(loc, reduction); + builder.setInsertionPointAfter(ifOp); + + // Close the mask if + builder.create(loc, ifOp.getResult(0)); + builder.setInsertionPointToStart(&maskIfOp.getElseRegion().front()); + builder.create(loc, reduction); + builder.setInsertionPointAfter(maskIfOp); + + return maskIfOp.getResult(0); + }; + auto getAddrFn = [](fir::FirOpBuilder builder, mlir::Location loc, + const mlir::Type &resultElemType, mlir::Value resultArr, + mlir::Value index) { + mlir::Type resultRefTy = builder.getRefType(resultElemType); + mlir::Value oneIdx = + builder.createIntegerConstant(loc, builder.getIndexType(), 1); + index = builder.create(loc, index, oneIdx); + return builder.create(loc, resultRefTy, resultArr, + index); + }; + + // Initialize the result + mlir::Type resultElemTy = hlfir::getFortranElementType(resultArr.getType()); + mlir::Type resultRefTy = builder.getRefType(resultElemTy); + mlir::Value returnValue = + builder.createIntegerConstant(loc, resultElemTy, 0); + for (unsigned int i = 0; i < rank; ++i) { + mlir::Value index = + builder.createIntegerConstant(loc, builder.getIndexType(), i + 1); + mlir::Value resultElemAddr = builder.create( + loc, resultRefTy, resultArr, index); + builder.create(loc, returnValue, resultElemAddr); + } + + fir::genMinMaxlocReductionLoop(builder, array, init, genBodyOp, getAddrFn, + rank, elementType, loc, builder.getI1Type(), + resultArr, false); + + mlir::Value asExpr = builder.create( + loc, resultArr, builder.createBool(loc, false)); + + // Check all the users - the destroy is no longer required, and any assign + // can use resultArr directly so that VariableAssignBufferization in this + // pass can optimize the results. Other operations are replaces with an + // AsExpr for the temporary resultArr. + llvm::SmallVector destroys; + llvm::SmallVector assigns; + for (auto user : minloc->getUsers()) { + if (auto destroy = mlir::dyn_cast(user)) + destroys.push_back(destroy); + else if (auto assign = mlir::dyn_cast(user)) + assigns.push_back(assign); + } + + // Check if the minloc was the only user of the elemental (apart from a + // destroy), and remove it if so. + mlir::Operation::user_range elemUsers = elemental->getUsers(); + hlfir::DestroyOp elemDestroy; + if (std::distance(elemUsers.begin(), elemUsers.end()) == 2) { + elemDestroy = mlir::dyn_cast(*elemUsers.begin()); + if (!elemDestroy) + elemDestroy = mlir::dyn_cast(*++elemUsers.begin()); + } + + for (auto d : destroys) + rewriter.eraseOp(d); + for (auto a : assigns) + a.setOperand(0, resultArr); + rewriter.replaceOp(minloc, asExpr); + if (elemDestroy) { + rewriter.eraseOp(elemDestroy); + rewriter.eraseOp(elemental); + } + return mlir::success(); + } +}; + class OptimizedBufferizationPass : public hlfir::impl::OptimizedBufferizationBase< OptimizedBufferizationPass> { @@ -832,6 +1030,7 @@ class OptimizedBufferizationPass patterns.insert>(context); patterns.insert>(context); patterns.insert>(context); + patterns.insert(context); if (mlir::failed(mlir::applyPatternsAndFoldGreedily( func, std::move(patterns), config))) { diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp index c89ee6d5e2039..2301e7146f141 100644 --- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp +++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp @@ -31,6 +31,7 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/HLFIR/HLFIRDialect.h" +#include "flang/Optimizer/Support/Utils.h" #include "flang/Optimizer/Transforms/Passes.h" #include "flang/Runtime/entry-names.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -243,8 +244,6 @@ static std::optional getArgElementType(mlir::Value val) { using BodyOpGeneratorTy = llvm::function_ref; -using InitValGeneratorTy = llvm::function_ref; using ContinueLoopGenTy = llvm::function_ref( fir::FirOpBuilder &, mlir::Location, mlir::Value)>; @@ -266,7 +265,7 @@ using ContinueLoopGenTy = llvm::function_ref( template static void genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp, - InitValGeneratorTy initVal, ContinueLoopGenTy loopCond, + fir::InitValGeneratorTy initVal, ContinueLoopGenTy loopCond, T unorderedOrInitialLoopCond, BodyOpGeneratorTy genBody, unsigned rank, mlir::Type elementType, mlir::Location loc) { @@ -353,28 +352,22 @@ genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp, // Return the reduction value from the function. builder.create(loc, results[resultIndex]); } -using MinMaxlocBodyOpGeneratorTy = llvm::function_ref &)>; - -static void genMinMaxlocReductionLoop( - fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp, - InitValGeneratorTy initVal, MinMaxlocBodyOpGeneratorTy genBody, - unsigned rank, mlir::Type elementType, mlir::Location loc, bool hasMask, - mlir::Type maskElemType, mlir::Value resultArr) { +void fir::genMinMaxlocReductionLoop( + fir::FirOpBuilder &builder, mlir::Value array, + fir::InitValGeneratorTy initVal, fir::MinlocBodyOpGeneratorTy genBody, + fir::AddrGeneratorTy getAddrFn, unsigned rank, mlir::Type elementType, + mlir::Location loc, mlir::Type maskElemType, mlir::Value resultArr, + bool maskMayBeLogicalScalar) { mlir::IndexType idxTy = builder.getIndexType(); - mlir::Block::BlockArgListType args = funcOp.front().getArguments(); - mlir::Value arg = args[1]; - mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0); fir::SequenceType::Shape flatShape(rank, fir::SequenceType::getUnknownExtent()); mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType); mlir::Type boxArrTy = fir::BoxType::get(arrTy); - mlir::Value array = builder.create(loc, boxArrTy, arg); + array = builder.create(loc, boxArrTy, array); mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType()); mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1); @@ -382,13 +375,6 @@ static void genMinMaxlocReductionLoop( mlir::Value flagRef = builder.createTemporary(loc, resultElemType); builder.create(loc, zero, flagRef); - mlir::Value mask; - if (hasMask) { - mlir::Type maskTy = fir::SequenceType::get(flatShape, maskElemType); - mlir::Type boxMaskTy = fir::BoxType::get(maskTy); - mask = builder.create(loc, boxMaskTy, args[2]); - } - mlir::Value init = initVal(builder, loc, elementType); llvm::SmallVector bounds; @@ -431,44 +417,8 @@ static void genMinMaxlocReductionLoop( // Reverse the indices such that they are ordered as: // std::reverse(indices.begin(), indices.end()); - // We are in the innermost loop: generate the reduction body. - if (hasMask) { - mlir::Type logicalRef = builder.getRefType(maskElemType); - mlir::Value maskAddr = - builder.create(loc, logicalRef, mask, indices); - mlir::Value maskElem = builder.create(loc, maskAddr); - - // fir::IfOp requires argument to be I1 - won't accept logical or any other - // Integer. - mlir::Type ifCompatType = builder.getI1Type(); - mlir::Value ifCompatElem = - builder.create(loc, ifCompatType, maskElem); - - llvm::SmallVector resultsTy = {elementType, elementType}; - fir::IfOp ifOp = builder.create(loc, elementType, ifCompatElem, - /*withElseRegion=*/true); - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - } - - // Set flag that mask was true at some point - builder.create(loc, flagSet, flagRef); - mlir::Type eleRefTy = builder.getRefType(elementType); - mlir::Value addr = - builder.create(loc, eleRefTy, array, indices); - mlir::Value elem = builder.create(loc, addr); - mlir::Value reductionVal = - genBody(builder, loc, elementType, elem, init, indices); - - if (hasMask) { - fir::IfOp ifOp = - mlir::dyn_cast(builder.getBlock()->getParentOp()); - builder.create(loc, reductionVal); - builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - builder.create(loc, init); - reductionVal = ifOp.getResult(0); - builder.setInsertionPointAfter(ifOp); - } + genBody(builder, loc, elementType, array, flagRef, init, indices); // Unwind the loop nest and insert ResultOp on each level // to return the updated value of the reduction to the enclosing @@ -483,13 +433,15 @@ static void genMinMaxlocReductionLoop( builder.setInsertionPointAfter(loop.getOperation()); } // End of loop nest. The insertion point is after the outermost loop. - if (fir::IfOp ifOp = - mlir::dyn_cast(builder.getBlock()->getParentOp())) { - builder.create(loc, reductionVal); - builder.setInsertionPointAfter(ifOp); - // Redefine flagSet to escape scope of ifOp - flagSet = builder.createIntegerConstant(loc, resultElemType, 1); - reductionVal = ifOp.getResult(0); + if (maskMayBeLogicalScalar) { + if (fir::IfOp ifOp = + mlir::dyn_cast(builder.getBlock()->getParentOp())) { + builder.create(loc, reductionVal); + builder.setInsertionPointAfter(ifOp); + // Redefine flagSet to escape scope of ifOp + flagSet = builder.createIntegerConstant(loc, resultElemType, 1); + reductionVal = ifOp.getResult(0); + } } // Check for case where array was full of max values. @@ -521,28 +473,12 @@ static void genMinMaxlocReductionLoop( // Load output array with 1s instead of 0s for (unsigned int i = 0; i < rank; ++i) { - mlir::Type resultRefTy = builder.getRefType(resultElemType); - // mlir::Value one = builder.createIntegerConstant(loc, resultElemType, 1); mlir::Value index = builder.createIntegerConstant(loc, idxTy, i); mlir::Value resultElemAddr = - builder.create(loc, resultRefTy, resultArr, index); + getAddrFn(builder, loc, resultElemType, resultArr, index); builder.create(loc, flagSet, resultElemAddr); } builder.setInsertionPointAfter(ifMaskTrueOp); - // Store newly created output array to the reference passed in - fir::SequenceType::Shape resultShape(1, rank); - mlir::Type outputArrTy = fir::SequenceType::get(resultShape, resultElemType); - mlir::Type outputHeapTy = fir::HeapType::get(outputArrTy); - mlir::Type outputBoxTy = fir::BoxType::get(outputHeapTy); - mlir::Type outputRefTy = builder.getRefType(outputBoxTy); - - mlir::Value outputArrNone = args[0]; - mlir::Value outputArr = - builder.create(loc, outputRefTy, outputArrNone); - - // Store nearly created array to output array - builder.create(loc, resultArr, outputArr); - builder.create(loc); } static llvm::SmallVector nopLoopCond(fir::FirOpBuilder &builder, @@ -791,6 +727,14 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder, mlir::Type resultRefTy = builder.getRefType(resultElemTy); + if (maskRank > 0) { + fir::SequenceType::Shape flatShape(rank, + fir::SequenceType::getUnknownExtent()); + mlir::Type maskTy = fir::SequenceType::get(flatShape, maskElemType); + mlir::Type boxMaskTy = fir::BoxType::get(maskTy); + mask = builder.create(loc, boxMaskTy, mask); + } + for (unsigned int i = 0; i < rank; ++i) { mlir::Value index = builder.createIntegerConstant(loc, idxTy, i); mlir::Value resultElemAddr = @@ -799,24 +743,51 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder, } auto genBodyOp = - [&rank, &resultArr, - isMax](fir::FirOpBuilder builder, mlir::Location loc, - mlir::Type elementType, mlir::Value elem1, mlir::Value elem2, - llvm::SmallVector indices) - -> mlir::Value { + [&rank, &resultArr, isMax, &mask, &maskElemType, &maskRank]( + fir::FirOpBuilder builder, mlir::Location loc, mlir::Type elementType, + mlir::Value array, mlir::Value flagRef, mlir::Value reduction, + const llvm::SmallVectorImpl &indices) -> mlir::Value { + // We are in the innermost loop: generate the reduction body. + if (maskRank > 0) { + mlir::Type logicalRef = builder.getRefType(maskElemType); + mlir::Value maskAddr = + builder.create(loc, logicalRef, mask, indices); + mlir::Value maskElem = builder.create(loc, maskAddr); + + // fir::IfOp requires argument to be I1 - won't accept logical or any + // other Integer. + mlir::Type ifCompatType = builder.getI1Type(); + mlir::Value ifCompatElem = + builder.create(loc, ifCompatType, maskElem); + + llvm::SmallVector resultsTy = {elementType, elementType}; + fir::IfOp ifOp = builder.create(loc, elementType, ifCompatElem, + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + } + + // Set flag that mask was true at some point + mlir::Value flagSet = builder.createIntegerConstant( + loc, mlir::cast(flagRef.getType()).getEleTy(), 1); + builder.create(loc, flagSet, flagRef); + mlir::Type eleRefTy = builder.getRefType(elementType); + mlir::Value addr = + builder.create(loc, eleRefTy, array, indices); + mlir::Value elem = builder.create(loc, addr); + mlir::Value cmp; if (elementType.isa()) { cmp = builder.create( loc, isMax ? mlir::arith::CmpFPredicate::OGT : mlir::arith::CmpFPredicate::OLT, - elem1, elem2); + elem, reduction); } else if (elementType.isa()) { cmp = builder.create( loc, isMax ? mlir::arith::CmpIPredicate::sgt : mlir::arith::CmpIPredicate::slt, - elem1, elem2); + elem, reduction); } else { llvm_unreachable("unsupported type"); } @@ -841,11 +812,24 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder, builder.create(loc, convert, one); builder.create(loc, fortranIndex, resultElemAddr); } - builder.create(loc, elem1); + builder.create(loc, elem); builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - builder.create(loc, elem2); + builder.create(loc, reduction); builder.setInsertionPointAfter(ifOp); - return ifOp.getResult(0); + mlir::Value reductionVal = ifOp.getResult(0); + + // Close the mask if needed + if (maskRank > 0) { + fir::IfOp ifOp = + mlir::dyn_cast(builder.getBlock()->getParentOp()); + builder.create(loc, reductionVal); + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + builder.create(loc, reduction); + reductionVal = ifOp.getResult(0); + builder.setInsertionPointAfter(ifOp); + } + + return reductionVal; }; // if mask is a logical scalar, we can check its value before the main loop @@ -879,12 +863,30 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder, builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); } + auto getAddrFn = [](fir::FirOpBuilder builder, mlir::Location loc, + const mlir::Type &resultElemType, mlir::Value resultArr, + mlir::Value index) { + mlir::Type resultRefTy = builder.getRefType(resultElemType); + return builder.create(loc, resultRefTy, resultArr, + index); + }; + + genMinMaxlocReductionLoop(builder, funcOp.front().getArgument(1), init, + genBodyOp, getAddrFn, rank, elementType, loc, + maskElemType, resultArr, maskRank == 0); + + // Store newly created output array to the reference passed in + fir::SequenceType::Shape resultShape(1, rank); + mlir::Type outputArrTy = fir::SequenceType::get(resultShape, resultElemTy); + mlir::Type outputHeapTy = fir::HeapType::get(outputArrTy); + mlir::Type outputBoxTy = fir::BoxType::get(outputHeapTy); + mlir::Type outputRefTy = builder.getRefType(outputBoxTy); + mlir::Value outputArr = builder.create( + loc, outputRefTy, funcOp.front().getArgument(0)); - // bit of a hack - maskRank is set to -1 for absent mask arg, so don't - // generate high level mask or element by element mask. - bool hasMask = maskRank > 0; - genMinMaxlocReductionLoop(builder, funcOp, init, genBodyOp, rank, elementType, - loc, hasMask, maskElemType, resultArr); + // Store nearly created array to output array + builder.create(loc, resultArr, outputArr); + builder.create(loc); } /// Generate function type for the simplified version of RTNAME(DotProduct) diff --git a/flang/test/HLFIR/minloc-elemental.fir b/flang/test/HLFIR/minloc-elemental.fir new file mode 100644 index 0000000000000..cb483d51e05d1 --- /dev/null +++ b/flang/test/HLFIR/minloc-elemental.fir @@ -0,0 +1,426 @@ +// RUN: fir-opt %s -opt-bufferization | FileCheck %s + +func.func @_QPtest(%arg0: !fir.box> {fir.bindc_name = "array"}, %arg1: !fir.ref {fir.bindc_name = "val"}, %arg2: !fir.box> {fir.bindc_name = "m"}) { + %c0 = arith.constant 0 : index + %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %3 = fir.load %2#0 : !fir.ref + %4:3 = fir.box_dims %0#0, %c0 : (!fir.box>, index) -> (index, index, index) + %5 = fir.shape %4#1 : (index) -> !fir.shape<1> + %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr> { + ^bb0(%arg3: index): + %8 = hlfir.designate %0#0 (%arg3) : (!fir.box>, index) -> !fir.ref + %9 = fir.load %8 : !fir.ref + %10 = arith.cmpi sge, %9, %3 : i32 + %11 = fir.convert %10 : (i1) -> !fir.logical<4> + hlfir.yield_element %11 : !fir.logical<4> + } + %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath} : (!fir.box>, !hlfir.expr>) -> !hlfir.expr<1xi32> + hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box> + hlfir.destroy %7 : !hlfir.expr<1xi32> + hlfir.destroy %6 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box> {fir.bindc_name = "array"}, %arg1: !fir.ref {fir.bindc_name = "val"}, %arg2: !fir.box> {fir.bindc_name = "m"}) { +// CHECK-NEXT: %c2147483647_i32 = arith.constant 2147483647 : i32 +// CHECK-NEXT: %c1_i32 = arith.constant 1 : i32 +// CHECK-NEXT: %c0 = arith.constant 0 : index +// CHECK-NEXT: %c1 = arith.constant 1 : index +// CHECK-NEXT: %c0_i32 = arith.constant 0 : i32 +// CHECK-NEXT: %[[V0:.*]] = fir.alloca i32 +// CHECK-NEXT: %[[RES:.*]] = fir.alloca !fir.array<1xi32> +// CHECK-NEXT: %[[V1:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK-NEXT: %[[V2:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK-NEXT: %[[V3:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK-NEXT: %[[V4:.*]] = fir.load %[[V3]]#0 : !fir.ref +// CHECK-NEXT: %[[V8:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref>, index) -> !fir.ref +// CHECK-NEXT: fir.store %c0_i32 to %[[V8]] : !fir.ref +// CHECK-NEXT: fir.store %c0_i32 to %[[V0]] : !fir.ref +// CHECK-NEXT: %[[V9:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box>, index) -> (index, index, index) +// CHECK-NEXT: %[[V10:.*]] = arith.subi %[[V9]]#1, %c1 : index +// CHECK-NEXT: %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) { +// CHECK-NEXT: %[[V14:.*]] = arith.addi %arg3, %c1 : index +// CHECK-NEXT: %[[V15:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box>, index) -> !fir.ref +// CHECK-NEXT: %[[V16:.*]] = fir.load %[[V15]] : !fir.ref +// CHECK-NEXT: %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32 +// CHECK-NEXT: %[[V18:.*]] = fir.if %[[V17]] -> (i32) { +// CHECK-NEXT: fir.store %c1_i32 to %[[V0]] : !fir.ref +// CHECK-NEXT: %[[DIMS:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box>, index) -> (index, index, index) +// CHECK-NEXT: %[[SUB:.*]] = arith.subi %[[DIMS]]#0, %c1 : index +// CHECK-NEXT: %[[ADD:.*]] = arith.addi %[[V14]], %[[SUB]] : index +// CHECK-NEXT: %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[ADD]]) : (!fir.box>, index) -> !fir.ref +// CHECK-NEXT: %[[V20:.*]] = fir.load %[[V19]] : !fir.ref +// CHECK-NEXT: %[[V21:.*]] = arith.cmpi slt, %[[V20]], %arg4 : i32 +// CHECK-NEXT: %[[V22:.*]] = fir.if %[[V21]] -> (i32) { +// CHECK-NEXT: %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref>, index) -> !fir.ref +// CHECK-NEXT: %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32 +// CHECK-NEXT: fir.store %[[V24]] to %[[V23]] : !fir.ref +// CHECK-NEXT: fir.result %[[V20]] : i32 +// CHECK-NEXT: } else { +// CHECK-NEXT: fir.result %arg4 : i32 +// CHECK-NEXT: } +// CHECK-NEXT: fir.result %[[V22]] : i32 +// CHECK-NEXT: } else { +// CHECK-NEXT: fir.result %arg4 : i32 +// CHECK-NEXT: } +// CHECK-NEXT: fir.result %[[V18]] : i32 +// CHECK-NEXT: } +// CHECK-NEXT: %[[V12:.*]] = fir.load %[[V0]] : !fir.ref +// CHECK-NEXT: %[[V13:.*]] = arith.cmpi eq, %[[V12]], %c1_i32 : i32 +// CHECK-NEXT: fir.if %[[V13]] { +// CHECK-NEXT: %[[V14:.*]] = arith.cmpi eq, %[[V11]], %c2147483647_i32 : i32 +// CHECK-NEXT: fir.if %[[V14]] { +// CHECK-NEXT: %[[V15:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref>, index) -> !fir.ref +// CHECK-NEXT: fir.store %c1_i32 to %[[V15]] : !fir.ref +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: %[[BD:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box>, index) -> (index, index, index) +// CHECK-NEXT: fir.do_loop %arg3 = %c1 to %[[BD]]#1 step %c1 unordered { +// CHECK-NEXT: %[[V13:.*]] = hlfir.designate %[[RES]] (%arg3) : (!fir.ref>, index) -> !fir.ref +// CHECK-NEXT: %[[V14:.*]] = fir.load %[[V13]] : !fir.ref +// CHECK-NEXT: %[[V15:.*]] = hlfir.designate %[[V2]]#0 (%arg3) : (!fir.box>, index) -> !fir.ref +// CHECK-NEXT: hlfir.assign %[[V14]] to %[[V15]] : i32, !fir.ref +// CHECK-NEXT: } +// CHECK-NEXT: return +// CHECK-NEXT: } + + +func.func @_QPtest_kind2(%arg0: !fir.box> {fir.bindc_name = "array"}, %arg1: !fir.ref {fir.bindc_name = "val"}, %arg2: !fir.box> {fir.bindc_name = "m"}) { + %c0 = arith.constant 0 : index + %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %3 = fir.load %2#0 : !fir.ref + %4:3 = fir.box_dims %0#0, %c0 : (!fir.box>, index) -> (index, index, index) + %5 = fir.shape %4#1 : (index) -> !fir.shape<1> + %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr> { + ^bb0(%arg3: index): + %8 = hlfir.designate %0#0 (%arg3) : (!fir.box>, index) -> !fir.ref + %9 = fir.load %8 : !fir.ref + %10 = arith.cmpi sge, %9, %3 : i32 + %11 = fir.convert %10 : (i1) -> !fir.logical<4> + hlfir.yield_element %11 : !fir.logical<4> + } + %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath} : (!fir.box>, !hlfir.expr>) -> !hlfir.expr<1xi16> + hlfir.assign %7 to %1#0 : !hlfir.expr<1xi16>, !fir.box> + hlfir.destroy %7 : !hlfir.expr<1xi16> + hlfir.destroy %6 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPtest_kind2(%arg0: !fir.box> {fir.bindc_name = "array"}, %arg1: !fir.ref {fir.bindc_name = "val"}, %arg2: !fir.box> {fir.bindc_name = "m"}) { +// CHECK-NEXT: %c2147483647_i32 = arith.constant 2147483647 : i32 +// CHECK-NEXT: %c1_i16 = arith.constant 1 : i16 +// CHECK-NEXT: %c0 = arith.constant 0 : index +// CHECK-NEXT: %c1 = arith.constant 1 : index +// CHECK-NEXT: %c0_i16 = arith.constant 0 : i16 +// CHECK-NEXT: %[[V0:.*]] = fir.alloca i16 +// CHECK-NEXT: %[[RES:.*]] = fir.alloca !fir.array<1xi16> +// CHECK-NEXT: %[[V1:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK-NEXT: %[[V2:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK-NEXT: %[[V3:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK-NEXT: %[[V4:.*]] = fir.load %[[V3]]#0 : !fir.ref +// CHECK-NEXT: %[[V8:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref>, index) -> !fir.ref +// CHECK-NEXT: fir.store %c0_i16 to %[[V8]] : !fir.ref +// CHECK-NEXT: fir.store %c0_i16 to %[[V0]] : !fir.ref +// CHECK-NEXT: %[[V9:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box>, index) -> (index, index, index) +// CHECK-NEXT: %[[V10:.*]] = arith.subi %[[V9]]#1, %c1 : index +// CHECK-NEXT: %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) { +// CHECK-NEXT: %[[V14:.*]] = arith.addi %arg3, %c1 : index +// CHECK-NEXT: %[[V15:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box>, index) -> !fir.ref +// CHECK-NEXT: %[[V16:.*]] = fir.load %[[V15]] : !fir.ref +// CHECK-NEXT: %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32 +// CHECK-NEXT: %[[V18:.*]] = fir.if %[[V17]] -> (i32) { +// CHECK-NEXT: fir.store %c1_i16 to %[[V0]] : !fir.ref +// CHECK-NEXT: %[[DIMS:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box>, index) -> (index, index, index) +// CHECK-NEXT: %[[SUB:.*]] = arith.subi %[[DIMS]]#0, %c1 : index +// CHECK-NEXT: %[[ADD:.*]] = arith.addi %[[V14]], %[[SUB]] : index +// CHECK-NEXT: %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[ADD]]) : (!fir.box>, index) -> !fir.ref +// CHECK-NEXT: %[[V20:.*]] = fir.load %[[V19]] : !fir.ref +// CHECK-NEXT: %[[V21:.*]] = arith.cmpi slt, %[[V20]], %arg4 : i32 +// CHECK-NEXT: %[[V22:.*]] = fir.if %[[V21]] -> (i32) { +// CHECK-NEXT: %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref>, index) -> !fir.ref +// CHECK-NEXT: %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i16 +// CHECK-NEXT: fir.store %[[V24]] to %[[V23]] : !fir.ref +// CHECK-NEXT: fir.result %[[V20]] : i32 +// CHECK-NEXT: } else { +// CHECK-NEXT: fir.result %arg4 : i32 +// CHECK-NEXT: } +// CHECK-NEXT: fir.result %[[V22]] : i32 +// CHECK-NEXT: } else { +// CHECK-NEXT: fir.result %arg4 : i32 +// CHECK-NEXT: } +// CHECK-NEXT: fir.result %[[V18]] : i32 +// CHECK-NEXT: } +// CHECK-NEXT: %[[V12:.*]] = fir.load %[[V0]] : !fir.ref +// CHECK-NEXT: %[[V13:.*]] = arith.cmpi eq, %[[V12]], %c1_i16 : i16 +// CHECK-NEXT: fir.if %[[V13]] { +// CHECK-NEXT: %[[V14:.*]] = arith.cmpi eq, %[[V11]], %c2147483647_i32 : i32 +// CHECK-NEXT: fir.if %[[V14]] { +// CHECK-NEXT: %[[V15:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref>, index) -> !fir.ref +// CHECK-NEXT: fir.store %c1_i16 to %[[V15]] : !fir.ref +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: %[[BD:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box>, index) -> (index, index, index) +// CHECK-NEXT: fir.do_loop %arg3 = %c1 to %[[BD]]#1 step %c1 unordered { +// CHECK-NEXT: %[[V13:.*]] = hlfir.designate %[[RES]] (%arg3) : (!fir.ref>, index) -> !fir.ref +// CHECK-NEXT: %[[V14:.*]] = fir.load %[[V13]] : !fir.ref +// CHECK-NEXT: %[[V15:.*]] = hlfir.designate %[[V2]]#0 (%arg3) : (!fir.box>, index) -> !fir.ref +// CHECK-NEXT: hlfir.assign %[[V14]] to %[[V15]] : i16, !fir.ref +// CHECK-NEXT: } +// CHECK-NEXT: return + + +func.func @_QPtest_kind2_convert(%arg0: !fir.box> {fir.bindc_name = "array"}, %arg1: !fir.ref {fir.bindc_name = "val"}, %arg2: !fir.box> {fir.bindc_name = "m"}) { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %3 = fir.load %2#0 : !fir.ref + %4:3 = fir.box_dims %0#0, %c0 : (!fir.box>, index) -> (index, index, index) + %5 = fir.shape %4#1 : (index) -> !fir.shape<1> + %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr> { + ^bb0(%arg3: index): + %10 = hlfir.designate %0#0 (%arg3) : (!fir.box>, index) -> !fir.ref + %11 = fir.load %10 : !fir.ref + %12 = arith.cmpi sge, %11, %3 : i32 + %13 = fir.convert %12 : (i1) -> !fir.logical<4> + hlfir.yield_element %13 : !fir.logical<4> + } + %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath} : (!fir.box>, !hlfir.expr>) -> !hlfir.expr<1xi16> + %8 = fir.shape %c1 : (index) -> !fir.shape<1> + %9 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg3: index): + %10 = hlfir.apply %7, %arg3 : (!hlfir.expr<1xi16>, index) -> i16 + %11 = fir.convert %10 : (i16) -> i32 + hlfir.yield_element %11 : i32 + } + hlfir.assign %9 to %1#0 : !hlfir.expr, !fir.box> + hlfir.destroy %9 : !hlfir.expr + hlfir.destroy %7 : !hlfir.expr<1xi16> + hlfir.destroy %6 : !hlfir.expr> + return +} +// The minloc has other uses, not an assign that gets optimized out. +// CHECK-LABEL: _QPtest_kind2_convert +// CHECK-SAME: (%arg0: !fir.box> {fir.bindc_name = "array"}, %arg1: !fir.ref {fir.bindc_name = "val"}, %arg2: !fir.box> {fir.bindc_name = "m"}) { +// CHECK-NEXT: %false = arith.constant false +// CHECK-NEXT: %c2147483647_i32 = arith.constant 2147483647 : i32 +// CHECK-NEXT: %c1_i16 = arith.constant 1 : i16 +// CHECK-NEXT: %c0 = arith.constant 0 : index +// CHECK-NEXT: %c0_i16 = arith.constant 0 : i16 +// CHECK-NEXT: %c1 = arith.constant 1 : index +// CHECK-NEXT: %[[V0:.*]] = fir.alloca i16 +// CHECK-NEXT: %[[V1:.*]] = fir.alloca !fir.array<1xi16> +// CHECK-NEXT: %[[V2:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK-NEXT: %[[V3:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK-NEXT: %[[V4:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK-NEXT: %[[V5:.*]] = fir.load %[[V4]]#0 : !fir.ref +// CHECK-NEXT: %[[V6:.*]] = hlfir.designate %[[V1]] (%c1) : (!fir.ref>, index) -> !fir.ref +// CHECK-NEXT: fir.store %c0_i16 to %[[V6]] : !fir.ref +// CHECK-NEXT: fir.store %c0_i16 to %[[V0]] : !fir.ref +// CHECK-NEXT: %[[V7:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box>, index) -> (index, index, index) +// CHECK-NEXT: %[[V8:.*]] = arith.subi %[[V7]]#1, %c1 : index +// CHECK-NEXT: %[[V9:.*]] = fir.do_loop %arg3 = %c0 to %[[V8]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) { +// CHECK-NEXT: %[[V15:.*]] = arith.addi %arg3, %c1 : index +// CHECK-NEXT: %[[V16:.*]] = hlfir.designate %[[V2]]#0 (%[[V15]]) : (!fir.box>, index) -> !fir.ref +// CHECK-NEXT: %[[V17:.*]] = fir.load %[[V16]] : !fir.ref +// CHECK-NEXT: %[[V18:.*]] = arith.cmpi sge, %[[V17]], %[[V5]] : i32 +// CHECK-NEXT: %[[V19:.*]] = fir.if %[[V18]] -> (i32) { +// CHECK-NEXT: fir.store %c1_i16 to %[[V0]] : !fir.ref +// CHECK-NEXT: %[[V20:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box>, index) -> (index, index, index) +// CHECK-NEXT: %[[V21:.*]] = arith.subi %[[V20]]#0, %c1 : index +// CHECK-NEXT: %[[V22:.*]] = arith.addi %[[V15]], %[[V21]] : index +// CHECK-NEXT: %[[V23:.*]] = hlfir.designate %[[V2]]#0 (%[[V22]]) : (!fir.box>, index) -> !fir.ref +// CHECK-NEXT: %[[V24:.*]] = fir.load %[[V23]] : !fir.ref +// CHECK-NEXT: %[[V25:.*]] = arith.cmpi slt, %[[V24]], %arg4 : i32 +// CHECK-NEXT: %[[V26:.*]] = fir.if %[[V25]] -> (i32) { +// CHECK-NEXT: %[[V27:.*]] = hlfir.designate %[[V1]] (%c1) : (!fir.ref>, index) -> !fir.ref +// CHECK-NEXT: %[[V28:.*]] = fir.convert %[[V15]] : (index) -> i16 +// CHECK-NEXT: fir.store %[[V28]] to %[[V27]] : !fir.ref +// CHECK-NEXT: fir.result %[[V24]] : i32 +// CHECK-NEXT: } else { +// CHECK-NEXT: fir.result %arg4 : i32 +// CHECK-NEXT: } +// CHECK-NEXT: fir.result %[[V26]] : i32 +// CHECK-NEXT: } else { +// CHECK-NEXT: fir.result %arg4 : i32 +// CHECK-NEXT: } +// CHECK-NEXT: fir.result %[[V19]] : i32 +// CHECK-NEXT: } +// CHECK-NEXT: %[[V10:.*]] = fir.load %[[V0]] : !fir.ref +// CHECK-NEXT: %[[V11:.*]] = arith.cmpi eq, %[[V10]], %c1_i16 : i16 +// CHECK-NEXT: fir.if %[[V11]] { +// CHECK-NEXT: %[[V15]] = arith.cmpi eq, %[[V9]], %c2147483647_i32 : i32 +// CHECK-NEXT: fir.if %[[V15]] { +// CHECK-NEXT: %[[V16]] = hlfir.designate %[[V1]] (%c1) : (!fir.ref>, index) -> !fir.ref +// CHECK-NEXT: fir.store %c1_i16 to %[[V16]] : !fir.ref +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: %[[V12:.*]] = hlfir.as_expr %[[V1]] move %false : (!fir.ref>, i1) -> !hlfir.expr<1xi16> +// CHECK-NEXT: %[[V13:.*]] = fir.shape %c1 : (index) -> !fir.shape<1> +// CHECK-NEXT: %[[V14:.*]] = hlfir.elemental %[[V13]] unordered : (!fir.shape<1>) -> !hlfir.expr { +// CHECK-NEXT: ^bb0(%arg3: index): +// CHECK-NEXT: %[[V15:.*]] = hlfir.apply %[[V12]], %arg3 : (!hlfir.expr<1xi16>, index) -> i16 +// CHECK-NEXT: %[[V16:.*]] = fir.convert %[[V15]] : (i16) -> i32 +// CHECK-NEXT: hlfir.yield_element %[[V16]] : i32 +// CHECK-NEXT: } +// CHECK-NEXT: hlfir.assign %[[V14]] to %[[V3]]#0 : !hlfir.expr, !fir.box> +// CHECK-NEXT: hlfir.destroy %[[V14]] : !hlfir.expr +// CHECK-NEXT: return + + + +func.func @_QPtest_float(%arg0: !fir.box> {fir.bindc_name = "array"}, %arg1: !fir.ref {fir.bindc_name = "val"}, %arg2: !fir.box> {fir.bindc_name = "m"}) { + %c0 = arith.constant 0 : index + %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %3 = fir.load %2#0 : !fir.ref + %4:3 = fir.box_dims %0#0, %c0 : (!fir.box>, index) -> (index, index, index) + %5 = fir.shape %4#1 : (index) -> !fir.shape<1> + %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr> { + ^bb0(%arg3: index): + %8 = hlfir.designate %0#0 (%arg3) : (!fir.box>, index) -> !fir.ref + %9 = fir.load %8 : !fir.ref + %10 = arith.cmpf oge, %9, %3 : f32 + %11 = fir.convert %10 : (i1) -> !fir.logical<4> + hlfir.yield_element %11 : !fir.logical<4> + } + %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath} : (!fir.box>, !hlfir.expr>) -> !hlfir.expr<1xi32> + hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box> + hlfir.destroy %7 : !hlfir.expr<1xi32> + hlfir.destroy %6 : !hlfir.expr> + return +} +// CHECK-LABEL: _QPtest_float +// CHECK: %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10:.*]] step %c1 iter_args(%arg4 = %cst) -> (f32) { +// CHECK-NEXT: %[[V14:.*]] = arith.addi %arg3, %c1 : index +// CHECK-NEXT: %[[V15:.*]] = hlfir.designate %[[V1:.*]]#0 (%[[V14]]) : (!fir.box>, index) -> !fir.ref +// CHECK-NEXT: %[[V16:.*]] = fir.load %[[V15]] : !fir.ref +// CHECK-NEXT: %[[V17:.*]] = arith.cmpf oge, %[[V16]], %[[V4:.*]] : f32 +// CHECK-NEXT: %[[V18:.*]] = fir.if %[[V17]] -> (f32) { +// CHECK-NEXT: fir.store %c1_i32 to %[[V0:.*]] : !fir.ref +// CHECK-NEXT: %[[DIMS:.*]]:3 = fir.box_dims %2#0, %c0 : (!fir.box>, index) -> (index, index, index) +// CHECK-NEXT: %[[SUB:.*]] = arith.subi %[[DIMS]]#0, %c1 : index +// CHECK-NEXT: %[[ADD:.*]] = arith.addi %[[V14]], %[[SUB]] : index +// CHECK-NEXT: %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[ADD]]) : (!fir.box>, index) -> !fir.ref +// CHECK-NEXT: %[[V20:.*]] = fir.load %[[V19]] : !fir.ref +// CHECK-NEXT: %[[V21:.*]] = arith.cmpf olt, %[[V20]], %arg4 fastmath : f32 +// CHECK-NEXT: %[[V22:.*]] = fir.if %[[V21]] -> (f32) { +// CHECK-NEXT: %[[V23:.*]] = hlfir.designate %{{.}} (%c1) : (!fir.ref>, index) -> !fir.ref +// CHECK-NEXT: %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32 +// CHECK-NEXT: fir.store %[[V24]] to %[[V23]] : !fir.ref +// CHECK-NEXT: fir.result %[[V20]] : f32 +// CHECK-NEXT: } else { +// CHECK-NEXT: fir.result %arg4 : f32 +// CHECK-NEXT: } +// CHECK-NEXT: fir.result %[[V22]] : f32 +// CHECK-NEXT: } else { +// CHECK-NEXT: fir.result %arg4 : f32 +// CHECK-NEXT: } +// CHECK-NEXT: fir.result %[[V18]] : f32 +// CHECK-NEXT: } + + +func.func @_QPtest_assignshape(%arg0: !fir.ref> {fir.bindc_name = "array"}, %arg1: !fir.ref {fir.bindc_name = "val"}, %arg2: !fir.ref> {fir.bindc_name = "m"}) { + %c2 = arith.constant 2 : index + %c1 = arith.constant 1 : index + %c3 = arith.constant 3 : index + %0 = fir.shape %c3, %c3 : (index, index) -> !fir.shape<2> + %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFtestEarray"} : (!fir.ref>, !fir.shape<2>) -> (!fir.ref>, !fir.ref>) + %2 = fir.shape %c3 : (index) -> !fir.shape<1> + %3:2 = hlfir.declare %arg2(%2) {uniq_name = "_QFtestEm"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + %4:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %5 = fir.load %4#0 : !fir.ref + %6 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<3x3x!fir.logical<4>> { + ^bb0(%arg3: index, %arg4: index): + %10 = hlfir.designate %1#0 (%arg3, %arg4) : (!fir.ref>, index, index) -> !fir.ref + %11 = fir.load %10 : !fir.ref + %12 = arith.cmpf oge, %11, %5 : f32 + %13 = fir.convert %12 : (i1) -> !fir.logical<4> + hlfir.yield_element %13 : !fir.logical<4> + } + %7 = hlfir.minloc %1#0 mask %6 {fastmath = #arith.fastmath} : (!fir.ref>, !hlfir.expr<3x3x!fir.logical<4>>) -> !hlfir.expr<2xi32> + %8 = fir.shape %c2 : (index) -> !fir.shape<1> + %9 = hlfir.designate %3#0 (%c1:%c2:%c1) shape %8 : (!fir.ref>, index, index, index, !fir.shape<1>) -> !fir.ref> + hlfir.assign %7 to %9 : !hlfir.expr<2xi32>, !fir.ref> + hlfir.destroy %7 : !hlfir.expr<2xi32> + hlfir.destroy %6 : !hlfir.expr<3x3x!fir.logical<4>> + return +} +// Not supported as the input is not a box +// CHECK-LABEL: _QPtest_assignshape +// CHECK: hlfir.minloc + + +func.func @_QFPtest_character(%arg0: !fir.box>> {fir.bindc_name = "b"}, %arg1: !fir.box> {fir.bindc_name = "c"}, %arg2: !fir.ref {fir.bindc_name = "val"}) -> i32 { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %0:2 = hlfir.declare %arg0 typeparams %c1 {uniq_name = "_QFFtestEb"} : (!fir.box>>, index) -> (!fir.box>>, !fir.box>>) + %1:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestEc"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2 = fir.alloca !fir.array<1xi32> {bindc_name = "m", uniq_name = "_QFFtestEm"} + %3 = fir.shape %c1 : (index) -> !fir.shape<1> + %4:2 = hlfir.declare %2(%3) {uniq_name = "_QFFtestEm"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + %5 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"} + %6:2 = hlfir.declare %5 {uniq_name = "_QFFtestEtest"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %7:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %8 = fir.load %7#0 : !fir.ref + %9:3 = fir.box_dims %1#0, %c0 : (!fir.box>, index) -> (index, index, index) + %10 = fir.shape %9#1 : (index) -> !fir.shape<1> + %11 = hlfir.elemental %10 unordered : (!fir.shape<1>) -> !hlfir.expr> { + ^bb0(%arg3: index): + %16 = hlfir.designate %1#0 (%arg3) : (!fir.box>, index) -> !fir.ref + %17 = fir.load %16 : !fir.ref + %18 = arith.cmpi eq, %17, %8 : i32 + %19 = fir.convert %18 : (i1) -> !fir.logical<4> + hlfir.yield_element %19 : !fir.logical<4> + } + %12 = hlfir.minloc %0#0 mask %11 {fastmath = #arith.fastmath} : (!fir.box>>, !hlfir.expr>) -> !hlfir.expr<1xi32> + hlfir.assign %12 to %4#0 : !hlfir.expr<1xi32>, !fir.ref> + hlfir.destroy %12 : !hlfir.expr<1xi32> + hlfir.destroy %11 : !hlfir.expr> + %13 = hlfir.designate %4#0 (%c1) : (!fir.ref>, index) -> !fir.ref + %14 = fir.load %13 : !fir.ref + hlfir.assign %14 to %6#0 : i32, !fir.ref + %15 = fir.load %6#1 : !fir.ref + return %15 : i32 +} +// Characters are not supported at the moment +// CHECK-LABEL: _QFPtest_character +// CHECK: hlfir.minloc + + +func.func @_QPtest_parts(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.box> {fir.bindc_name = "mask"}) -> f32 { + %c1 = arith.constant 1 : index + %c5 = arith.constant 5 : index + %c0 = arith.constant 0 : index + %c5_i32 = arith.constant 5 : i32 + %0:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEmask"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %1 = fir.alloca f32 {bindc_name = "test", uniq_name = "_QFtestEtest"} + %2:2 = hlfir.declare %1 {uniq_name = "_QFtestEtest"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEx"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %4:3 = fir.box_dims %0#0, %c0 : (!fir.box>, index) -> (index, index, index) + %5 = fir.shape %4#1 : (index) -> !fir.shape<1> + %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr> { + ^bb0(%arg2: index): + %11 = hlfir.designate %0#0 (%arg2) : (!fir.box>, index) -> !fir.ref + %12 = fir.load %11 : !fir.ref + %13 = arith.cmpi sge, %12, %c5_i32 : i32 + %14 = fir.convert %13 : (i1) -> !fir.logical<4> + hlfir.yield_element %14 : !fir.logical<4> + } + %7 = hlfir.minloc %3#0 mask %6 {fastmath = #arith.fastmath} : (!fir.box>, !hlfir.expr>) -> !hlfir.expr<1xi32> + %8 = fir.shape %c1 : (index) -> !fir.shape<1> + %9 = hlfir.designate %3#0 (%c5:%c5:%c1) shape %8 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %7 to %9 : !hlfir.expr<1xi32>, !fir.box> + hlfir.destroy %7 : !hlfir.expr<1xi32> + hlfir.destroy %6 : !hlfir.expr> + %10 = fir.load %2#1 : !fir.ref + return %10 : f32 +} +// Characters are not supported at the moment +// CHECK-LABEL: _QPtest_parts +// CHECK: fir.do_loop %{{.*}} = %c0 to %{{.*}} step %c1 iter_args(%{{.*}} = %c2147483647_i32) -> (i32) { + diff --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir index 0bd6ac7c436ff..cd059cc797a3f 100644 --- a/flang/test/Transforms/simplifyintrinsics.fir +++ b/flang/test/Transforms/simplifyintrinsics.fir @@ -1760,6 +1760,7 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref> {fir.bindc_ // CHECK: %[[OUTARR:.*]] = fir.allocmem !fir.array<1xi32> // CHECK: %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1> // CHECK: %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> +// CHECK: %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box) -> !fir.box>> // CHECK: %[[OUTARR_IDX0:.*]] = arith.constant 0 : index // CHECK: %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box>>, index) -> !fir.ref // CHECK: fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref @@ -1768,7 +1769,6 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref> {fir.bindc_ // CHECK: %[[FLAG_SET:.*]] = arith.constant 1 : i32 // CHECK: %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32 // CHECK: fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref -// CHECK: %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box) -> !fir.box>> // CHECK: %[[MAX:.*]] = arith.constant 2147483647 : i32 // CHECK: %[[CINDEX_1:.*]] = arith.constant 1 : index // CHECK: %[[DIM_INDEX0:.*]] = arith.constant 0 : index @@ -1779,7 +1779,8 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref> {fir.bindc_ // CHECK: %[[MASK_ITEMVAL:.*]] = fir.load %[[MASK_ITEM]] : !fir.ref> // CHECK: %[[MASK_IF_ITEM:.*]] = fir.convert %[[MASK_ITEMVAL]] : (!fir.logical<4>) -> i1 // CHECK: %[[IF_MASK:.*]] = fir.if %[[MASK_IF_ITEM]] -> (i32) { -// CHECK: fir.store %[[FLAG_SET]] to %[[FLAG_ALLOC]] : !fir.ref +// CHECK: %[[FLAG_SET2:.*]] = arith.constant 1 : i32 +// CHECK: fir.store %[[FLAG_SET2]] to %[[FLAG_ALLOC]] : !fir.ref // CHECK: %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box>, index) -> !fir.ref // CHECK: %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref // CHECK: %[[NEW_MIN:.*]] = arith.cmpi slt, %[[INARR_ITEMVAL]], %[[MIN]] : i32 @@ -2423,6 +2424,7 @@ func.func @_QPtestmaxloc_works1d(%arg0: !fir.ref> {fir.bindc_ // CHECK: %[[OUTARR:.*]] = fir.allocmem !fir.array<1xi32> // CHECK: %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1> // CHECK: %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> +// CHECK: %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box) -> !fir.box>> // CHECK: %[[OUTARR_IDX0:.*]] = arith.constant 0 : index // CHECK: %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box>>, index) -> !fir.ref // CHECK: fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref @@ -2431,7 +2433,6 @@ func.func @_QPtestmaxloc_works1d(%arg0: !fir.ref> {fir.bindc_ // CHECK: %[[FLAG_SET:.*]] = arith.constant 1 : i32 // CHECK: %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32 // CHECK: fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref -// CHECK: %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box) -> !fir.box>> // CHECK: %[[MAX:.*]] = arith.constant -2147483648 : i32 // CHECK: %[[CINDEX_1:.*]] = arith.constant 1 : index // CHECK: %[[DIM_INDEX0:.*]] = arith.constant 0 : index @@ -2442,7 +2443,8 @@ func.func @_QPtestmaxloc_works1d(%arg0: !fir.ref> {fir.bindc_ // CHECK: %[[MASK_ITEMVAL:.*]] = fir.load %[[MASK_ITEM]] : !fir.ref> // CHECK: %[[MASK_IF_ITEM:.*]] = fir.convert %[[MASK_ITEMVAL]] : (!fir.logical<4>) -> i1 // CHECK: %[[IF_MASK:.*]] = fir.if %[[MASK_IF_ITEM]] -> (i32) { -// CHECK: fir.store %[[FLAG_SET]] to %[[FLAG_ALLOC]] : !fir.ref +// CHECK: %[[FLAG_SET2:.*]] = arith.constant 1 : i32 +// CHECK: fir.store %[[FLAG_SET2]] to %[[FLAG_ALLOC]] : !fir.ref // CHECK: %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box>, index) -> !fir.ref // CHECK: %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref // CHECK: %[[NEW_MIN:.*]] = arith.cmpi sgt, %[[INARR_ITEMVAL]], %[[MIN]] : i32