From f0bc575e73a634f3058d6317ecca0ab6e0c4ebad Mon Sep 17 00:00:00 2001 From: Vijay Kandiah Date: Tue, 9 Dec 2025 10:55:29 -0800 Subject: [PATCH 1/6] [mlir][acc] Add loop tiling utilities for OpenACC Add utilities in OpenACCUtilsTiling.h/.cpp to support tiling transformations on acc.loop operations: - uncollapseLoops: Expand collapsed loops with multiple IVs into nested loop structures when tile count exceeds collapse count - tileACCLoops: Transform loop nests into tile and element loops based on provided tile sizes, with automatic resolution of unknown tile sizes (tile(*) represented as -1) These utilities prepare for the ACCLoopTiling pass which handles the OpenACC loop tile directive. --- .../mlir/Dialect/OpenACC/OpenACCUtilsTiling.h | 83 +++++ mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt | 3 + .../OpenACC/Utils/OpenACCUtilsTiling.cpp | 313 ++++++++++++++++ mlir/unittests/Dialect/OpenACC/CMakeLists.txt | 1 + .../OpenACC/OpenACCUtilsTilingTest.cpp | 349 ++++++++++++++++++ 5 files changed, 749 insertions(+) create mode 100644 mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h create mode 100644 mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp create mode 100644 mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h new file mode 100644 index 0000000000000..3152526cc0582 --- /dev/null +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h @@ -0,0 +1,83 @@ +//===- OpenACCUtilsTiling.h - OpenACC Loop Tiling Utilities -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains utility functions for tiling OpenACC loops. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_OPENACC_OPENACCUTILSTILING_H_ +#define MLIR_DIALECT_OPENACC_OPENACCUTILSTILING_H_ + +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/IR/PatternMatch.h" +#include "llvm/ADT/SmallVector.h" + +namespace mlir { +namespace acc { + +/// Uncollapse tile loops with multiple IVs and collapseCount < tileCount. +/// This is used to prepare loops for tiling when the collapse count is less +/// than the tile count. +/// +/// \param origLoop The original loop operation to uncollapse. +/// \param tileCount The number of tile dimensions. +/// \param collapseCount The collapse count from the original loop. +/// \param rewriter The rewriter to use for modifications. +/// \return A vector of uncollapsed loop operations. +llvm::SmallVector +uncollapseLoops(mlir::acc::LoopOp origLoop, unsigned tileCount, + unsigned collapseCount, mlir::RewriterBase &rewriter); + +/// Tile ACC loops according to the given tile sizes. +/// +/// Tiling a 2-level nested loop will create two 'tile' loops containing two +/// 'element' loops. The transformation looks like: +/// +/// Before Tiling: +/// \code +/// #pragma acc loop tile(tile_size1, tile_size2) +/// for (i = lb1; i < ub1; i += step1) { // original loop +/// for (j = lb2; j < ub2; j += step2) { +/// a[i,j] = i + j; +/// } +/// } +/// \endcode +/// +/// After Tiling: +/// \code +/// for (i = lb1; i < ub1; i += (step1 * tile_size1)) { // tile loop 1 +/// for (j = lb2; j < ub2; j += (step2 * tile_size2)) { // tile loop 2 +/// for (ii = i; ii < min(ub1, (step1 * tile_size1) + i); ii += step1) { +/// // element loop 1 +/// for (jj = j; jj < min(ub2, (step2 * tile_size2) + j); jj += step2) +/// { // element loop 2 +/// a[ii,jj] = i + j; +/// } +/// } +/// } +/// } +/// \endcode +/// +/// Unknown tile sizes (represented as -1 in OpenACC for `tile(*)`) are +/// resolved to the provided default tile size. +/// +/// \param tileLoops The loops to tile (outermost first). +/// \param tileSizes The tile sizes for each dimension. Values of -1 are +/// treated as unknown and resolved to defaultTileSize. +/// \param defaultTileSize The default tile size to use for unknown (*) tiles. +/// \param rewriter The rewriter to use for modifications. +/// \return The outermost loop after tiling. +mlir::acc::LoopOp tileACCLoops(llvm::SmallVector &tileLoops, + const llvm::SmallVector &tileSizes, + int32_t defaultTileSize, + mlir::RewriterBase &rewriter); + +} // namespace acc +} // namespace mlir + +#endif // MLIR_DIALECT_OPENACC_OPENACCUTILSTILING_H_ diff --git a/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt index 68e124625921f..c3de4f7e3e282 100644 --- a/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_dialect_library(MLIROpenACCUtils + OpenACCUtilsTiling.cpp OpenACCUtils.cpp ADDITIONAL_HEADER_DIRS @@ -14,7 +15,9 @@ add_mlir_dialect_library(MLIROpenACCUtils MLIROpenACCTypeInterfacesIncGen LINK_LIBS PUBLIC + MLIRArithDialect MLIROpenACCDialect MLIRIR MLIRSupport + MLIRTransformUtils ) diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp new file mode 100644 index 0000000000000..f939ec1c58cfd --- /dev/null +++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp @@ -0,0 +1,313 @@ +//===- OpenACCUtilsTiling.cpp - OpenACC Loop Tiling Utilities -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains utility functions for tiling OpenACC loops. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/OpenACC/OpenACCUtilsTiling.h" + +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" +#include "mlir/Transforms/RegionUtils.h" + +// Resolve unknown tile sizes (represented as -1 for tile(*)) to the default. +static mlir::Value resolveUnknownTileSize(mlir::Value tileSize, + int32_t defaultTileSize, + mlir::RewriterBase &rewriter, + mlir::Location loc) { + auto constVal = mlir::getConstantIntValue(tileSize); + if (constVal && *constVal < 0) { + return mlir::arith::ConstantOp::create( + rewriter, loc, rewriter.getI32Type(), + rewriter.getI32IntegerAttr(defaultTileSize)); + } + return tileSize; +} + +// Remove vector/worker attributes from loop +static void removeWorkerVectorFromLoop(mlir::acc::LoopOp loop) { + if (loop.hasVector() || loop.getVectorValue()) { + loop.removeVectorAttr(); + loop.removeVectorOperandsDeviceTypeAttr(); + } else if (loop.hasWorker() || loop.getWorkerValue()) { + loop.removeWorkerAttr(); + loop.removeWorkerNumOperandsDeviceTypeAttr(); + } +} + +// Create a new ACC loop with new steps, lb, ub from original loop +static mlir::acc::LoopOp +createACCLoopFromOriginal(mlir::acc::LoopOp origLoop, + mlir::RewriterBase &rewriter, mlir::ValueRange lb, + mlir::ValueRange ub, mlir::ValueRange step, + mlir::DenseBoolArrayAttr inclusiveUBAttr, + mlir::acc::CombinedConstructsTypeAttr combinedAttr, + mlir::Location loc, bool preserveCollapse) { + mlir::ArrayAttr collapseAttr = mlir::ArrayAttr{}; + mlir::ArrayAttr collapseDeviceTypeAttr = mlir::ArrayAttr{}; + if (preserveCollapse) { + collapseAttr = origLoop.getCollapseAttr(); + collapseDeviceTypeAttr = origLoop.getCollapseDeviceTypeAttr(); + } + auto newLoop = mlir::acc::LoopOp::create( + rewriter, loc, origLoop->getResultTypes(), lb, ub, step, inclusiveUBAttr, + collapseAttr, collapseDeviceTypeAttr, origLoop.getGangOperands(), + origLoop.getGangOperandsArgTypeAttr(), + origLoop.getGangOperandsSegmentsAttr(), + origLoop.getGangOperandsDeviceTypeAttr(), origLoop.getWorkerNumOperands(), + origLoop.getWorkerNumOperandsDeviceTypeAttr(), + origLoop.getVectorOperands(), origLoop.getVectorOperandsDeviceTypeAttr(), + origLoop.getSeqAttr(), origLoop.getIndependentAttr(), + origLoop.getAuto_Attr(), origLoop.getGangAttr(), origLoop.getWorkerAttr(), + origLoop.getVectorAttr(), mlir::ValueRange{}, mlir::DenseI32ArrayAttr{}, + mlir::ArrayAttr{}, origLoop.getCacheOperands(), + origLoop.getPrivateOperands(), origLoop.getFirstprivateOperands(), + origLoop.getReductionOperands(), combinedAttr); + return newLoop; +} + +// Create inner loop inside input loop +static mlir::acc::LoopOp +createInnerLoop(mlir::acc::LoopOp inputLoop, mlir::RewriterBase &rewriter, + mlir::ValueRange lb, mlir::ValueRange ub, mlir::ValueRange step, + mlir::DenseBoolArrayAttr inclusiveUBAttr, mlir::Location loc) { + mlir::acc::LoopOp elementLoop = createACCLoopFromOriginal( + inputLoop, rewriter, lb, ub, step, inclusiveUBAttr, + mlir::acc::CombinedConstructsTypeAttr{}, loc, /*preserveCollapse*/ false); + + // Remove gang/worker attributes from inner loops + rewriter.startOpModification(elementLoop); + if (inputLoop.hasGang() || + inputLoop.getGangValue(mlir::acc::GangArgType::Num) || + inputLoop.getGangValue(mlir::acc::GangArgType::Dim) || + inputLoop.getGangValue(mlir::acc::GangArgType::Static)) { + elementLoop.removeGangAttr(); + elementLoop.removeGangOperandsArgTypeAttr(); + elementLoop.removeGangOperandsSegmentsAttr(); + elementLoop.removeGangOperandsDeviceTypeAttr(); + } + if (inputLoop.hasVector() || inputLoop.getVectorValue()) { + elementLoop.removeWorkerAttr(); + elementLoop.removeWorkerNumOperandsDeviceTypeAttr(); + } + rewriter.finalizeOpModification(elementLoop); + + // Create empty block in elementLoop and add IV argument + mlir::Block *blk = rewriter.createBlock(&elementLoop.getRegion(), + elementLoop.getRegion().begin()); + rewriter.setInsertionPointToEnd(blk); + mlir::acc::YieldOp::create(rewriter, loc); + elementLoop.getBody().addArgument( + inputLoop.getBody().getArgument(0).getType(), loc); + + return elementLoop; +} + +// Move ops from source to target Loop and replace uses of IVs +static void moveOpsAndReplaceIVs(mlir::acc::LoopOp sourceLoop, + mlir::acc::LoopOp targetLoop, + llvm::ArrayRef newIVs, + llvm::ArrayRef origIVs, + size_t nOps, mlir::RewriterBase &rewriter) { + // Move ops from source to target loop [begin, begin + nOps - 1) + mlir::Block::iterator begin = sourceLoop.getBody().begin(); + targetLoop.getBody().getOperations().splice( + targetLoop.getBody().getOperations().begin(), + sourceLoop.getBody().getOperations(), begin, std::next(begin, nOps - 1)); + + // Replace uses of origIV with newIV + for (auto [i, newIV] : llvm::enumerate(newIVs)) + mlir::replaceAllUsesInRegionWith(origIVs[i], newIV, targetLoop.getRegion()); +} + +mlir::acc::LoopOp +mlir::acc::tileACCLoops(llvm::SmallVector &tileLoops, + const llvm::SmallVector &tileSizes, + int32_t defaultTileSize, mlir::RewriterBase &rewriter) { + // Tile collapsed and/or nested loops + mlir::acc::LoopOp outerLoop = tileLoops[0]; + const mlir::Location loc = outerLoop.getLoc(); + + // Resolve unknown tile sizes (tile(*) represented as -1) + llvm::SmallVector resolvedTileSizes; + rewriter.setInsertionPoint(outerLoop); + for (mlir::Value tileSize : tileSizes) { + resolvedTileSizes.push_back( + resolveUnknownTileSize(tileSize, defaultTileSize, rewriter, loc)); + } + + mlir::acc::LoopOp innerLoop = tileLoops[tileLoops.size() - 1]; + llvm::SmallVector origIVs; + llvm::SmallVector origSteps; + llvm::SmallVector origUBs; + llvm::SmallVector newSteps; + llvm::SmallVector newUBs; + llvm::SmallVector newIVs; + size_t nOps = innerLoop.getBody().getOperations().size(); + + // Extract original inclusiveUBs + llvm::SmallVector inclusiveUBs; + for (auto tileLoop : tileLoops) { + for (auto [j, step] : llvm::enumerate(tileLoop.getStep())) { + // inclusiveUBs are present on the IR from Fortran frontend for DO loops + // but might not be present from other frontends (python) + // So check if it exists + if (tileLoop.getInclusiveUpperboundAttr()) + inclusiveUBs.push_back( + tileLoop.getInclusiveUpperboundAttr().asArrayRef()[j]); + else + inclusiveUBs.push_back(false); + } + } + + // Extract original ivs, UBs, steps, and calculate new steps + rewriter.setInsertionPoint(outerLoop); + for (auto [i, tileLoop] : llvm::enumerate(tileLoops)) { + for (auto arg : tileLoop.getBody().getArguments()) + origIVs.push_back(arg); + for (auto ub : tileLoop.getUpperbound()) + origUBs.push_back(ub); + + llvm::SmallVector currentLoopSteps; + for (auto [j, step] : llvm::enumerate(tileLoop.getStep())) { + origSteps.push_back(step); + if (i + j >= resolvedTileSizes.size()) { + currentLoopSteps.push_back(step); + } else { + mlir::Value tileSize = resolvedTileSizes[i + j]; + auto newLoopStep = + mlir::arith::MulIOp::create(rewriter, loc, step, tileSize); + currentLoopSteps.push_back(newLoopStep); + newSteps.push_back(newLoopStep); + } + } + + rewriter.startOpModification(tileLoop); + tileLoop.getStepMutable().clear(); + tileLoop.getStepMutable().append(currentLoopSteps); + rewriter.finalizeOpModification(tileLoop); + } + + // Calculate new upper bounds for element loops + for (size_t i = 0; i < newSteps.size(); i++) { + rewriter.setInsertionPoint(innerLoop.getBody().getTerminator()); + // UpperBound: min(origUB, origIV+(originalStep*tile_size)) + auto stepped = + mlir::arith::AddIOp::create(rewriter, loc, origIVs[i], newSteps[i]); + mlir::Value newUB = stepped; + if (inclusiveUBs[i]) { + // Handle InclusiveUB + // UpperBound: min(origUB, origIV+(originalStep*tile_size - 1)) + auto c1 = mlir::arith::ConstantOp::create( + rewriter, loc, newSteps[i].getType(), + rewriter.getIntegerAttr(newSteps[i].getType(), 1)); + newUB = mlir::arith::SubIOp::create(rewriter, loc, stepped, c1); + } + newUBs.push_back( + mlir::arith::MinSIOp::create(rewriter, loc, origUBs[i], newUB)); + } + + // Create and insert nested elementLoopOps before terminator of outer loopOp + mlir::acc::LoopOp currentLoop = innerLoop; + for (size_t i = 0; i < resolvedTileSizes.size(); i++) { + rewriter.setInsertionPoint(currentLoop.getBody().getTerminator()); + mlir::DenseBoolArrayAttr inclusiveUBAttr = mlir::DenseBoolArrayAttr{}; + if (inclusiveUBs[i]) + inclusiveUBAttr = rewriter.getDenseBoolArrayAttr({true}); + + mlir::acc::LoopOp elementLoop = + createInnerLoop(innerLoop, rewriter, mlir::ValueRange{origIVs[i]}, + mlir::ValueRange{newUBs[i]}, + mlir::ValueRange{origSteps[i]}, inclusiveUBAttr, loc); + + // Remove vector/worker attributes from inner element loops except + // outermost element loop + if (i > 0) { + rewriter.startOpModification(elementLoop); + removeWorkerVectorFromLoop(elementLoop); + rewriter.finalizeOpModification(elementLoop); + } + newIVs.push_back(elementLoop.getBody().getArgument(0)); + currentLoop = elementLoop; + } + + // Remove vector/worker attributes from outer tile loops + for (auto tileLoop : tileLoops) { + rewriter.startOpModification(tileLoop); + removeWorkerVectorFromLoop(tileLoop); + rewriter.finalizeOpModification(tileLoop); + } + + // Move ops from inner tile loop to inner element loop and replace IV uses + moveOpsAndReplaceIVs(innerLoop, currentLoop, newIVs, origIVs, nOps, rewriter); + + return outerLoop; +} + +llvm::SmallVector +mlir::acc::uncollapseLoops(mlir::acc::LoopOp origLoop, unsigned tileCount, + unsigned collapseCount, + mlir::RewriterBase &rewriter) { + llvm::SmallVector newLoops; + llvm::SmallVector newIVs; + mlir::Location loc = origLoop.getLoc(); + llvm::SmallVector newInclusiveUBs; + llvm::SmallVector lbs, ubs, steps; + for (unsigned i = 0; i < collapseCount; i++) { + // inclusiveUpperbound attribute might not be set, default to false + bool inclusiveUB = false; + if (origLoop.getInclusiveUpperboundAttr()) + inclusiveUB = origLoop.getInclusiveUpperboundAttr().asArrayRef()[i]; + newInclusiveUBs.push_back(inclusiveUB); + lbs.push_back(origLoop.getLowerbound()[i]); + ubs.push_back(origLoop.getUpperbound()[i]); + steps.push_back(origLoop.getStep()[i]); + } + mlir::acc::LoopOp outerLoop = createACCLoopFromOriginal( + origLoop, rewriter, lbs, ubs, steps, + rewriter.getDenseBoolArrayAttr(newInclusiveUBs), + origLoop.getCombinedAttr(), loc, /*preserveCollapse*/ true); + mlir::Block *blk = rewriter.createBlock(&outerLoop.getRegion(), + outerLoop.getRegion().begin()); + rewriter.setInsertionPointToEnd(blk); + mlir::acc::YieldOp::create(rewriter, loc); + for (unsigned i = 0; i < collapseCount; i++) { + outerLoop.getBody().addArgument(origLoop.getBody().getArgument(i).getType(), + loc); + newIVs.push_back(outerLoop.getBody().getArgument(i)); + } + newLoops.push_back(outerLoop); + + mlir::acc::LoopOp currentLoopOp = outerLoop; + for (unsigned i = collapseCount; i < tileCount; i++) { + rewriter.setInsertionPoint(currentLoopOp.getBody().getTerminator()); + bool inclusiveUB = false; + if (origLoop.getInclusiveUpperboundAttr()) + inclusiveUB = origLoop.getInclusiveUpperboundAttr().asArrayRef()[i]; + mlir::DenseBoolArrayAttr inclusiveUBAttr = + rewriter.getDenseBoolArrayAttr({inclusiveUB}); + mlir::acc::LoopOp innerLoop = createInnerLoop( + origLoop, rewriter, mlir::ValueRange{origLoop.getLowerbound()[i]}, + mlir::ValueRange{origLoop.getUpperbound()[i]}, + mlir::ValueRange{origLoop.getStep()[i]}, inclusiveUBAttr, loc); + newIVs.push_back(innerLoop.getBody().getArgument(0)); + newLoops.push_back(innerLoop); + currentLoopOp = innerLoop; + } + // Move ops from origLoop to innermost loop and replace uses of IVs + size_t nOps = origLoop.getBody().getOperations().size(); + llvm::SmallVector origIVs; + for (auto arg : origLoop.getBody().getArguments()) + origIVs.push_back(arg); + moveOpsAndReplaceIVs(origLoop, currentLoopOp, newIVs, origIVs, nOps, + rewriter); + + return newLoops; +} diff --git a/mlir/unittests/Dialect/OpenACC/CMakeLists.txt b/mlir/unittests/Dialect/OpenACC/CMakeLists.txt index c8c2bb96b0539..060c8b8d2679d 100644 --- a/mlir/unittests/Dialect/OpenACC/CMakeLists.txt +++ b/mlir/unittests/Dialect/OpenACC/CMakeLists.txt @@ -2,6 +2,7 @@ add_mlir_unittest(MLIROpenACCTests OpenACCOpsTest.cpp OpenACCOpsInterfacesTest.cpp OpenACCUtilsTest.cpp + OpenACCUtilsTilingTest.cpp ) mlir_target_link_libraries(MLIROpenACCTests PRIVATE diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp new file mode 100644 index 0000000000000..287af9fafd5b7 --- /dev/null +++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp @@ -0,0 +1,349 @@ +//===- OpenACCUtilsTilingTest.cpp - Unit tests for loop tiling utilities --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/OpenACC/OpenACCUtilsTiling.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OwningOpRef.h" +#include "gtest/gtest.h" + +using namespace mlir; +using namespace mlir::acc; + +//===----------------------------------------------------------------------===// +// Test Fixture +//===----------------------------------------------------------------------===// + +class OpenACCUtilsTilingTest : public ::testing::Test { +protected: + OpenACCUtilsTilingTest() : b(&context), loc(UnknownLoc::get(&context)) { + context.loadDialect(); + } + + // Create a simple LoopOp with specified bounds using the simple builder + acc::LoopOp createLoopOp(OpBuilder &builder, ValueRange lbs, ValueRange ubs, + ValueRange steps) { + auto loopOp = acc::LoopOp::create(builder, loc, lbs, ubs, steps, + acc::LoopParMode::loop_independent); + + // Add body block with IV arguments and yield + Region ®ion = loopOp.getRegion(); + Block *block = builder.createBlock(®ion, region.begin()); + for (Value lb : lbs) + block->addArgument(lb.getType(), loc); + builder.setInsertionPointToEnd(block); + acc::YieldOp::create(builder, loc); + + return loopOp; + } + + // Helper to count nested acc.loop ops within a loop + unsigned countNestedLoops(acc::LoopOp loop) { + unsigned count = 0; + loop.getBody().walk([&](acc::LoopOp) { ++count; }); + return count; + } + + // Helper to collect all nested acc.loop ops in order + SmallVector collectNestedLoops(acc::LoopOp loop) { + SmallVector loops; + loop.getBody().walk([&](acc::LoopOp nestedLoop) { + loops.push_back(nestedLoop); + }); + return loops; + } + + MLIRContext context; + OpBuilder b; + Location loc; +}; + +//===----------------------------------------------------------------------===// +// tileACCLoops Tests +//===----------------------------------------------------------------------===// + +TEST_F(OpenACCUtilsTilingTest, tileACCLoopsSingleLoop) { + // Create a module to hold the function + OwningOpRef module = ModuleOp::create(loc); + Block *moduleBlock = module->getBody(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(moduleBlock); + + // Create a function + auto funcType = b.getFunctionType({}, {}); + OwningOpRef funcOp = + func::FuncOp::create(b, loc, "test_func", funcType); + Block *funcBlock = funcOp->addEntryBlock(); + + b.setInsertionPointToStart(funcBlock); + + // Create loop bounds + Value lb = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0)); + Value ub = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(100)); + Value step = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1)); + Value tileSize = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(4)); + + // Create the loop + acc::LoopOp loopOp = createLoopOp(b, {lb}, {ub}, {step}); + + // Tile the loop using IRRewriter + IRRewriter rewriter(&context); + rewriter.setInsertionPoint(loopOp); + + SmallVector loopsToTile = {loopOp}; + SmallVector tileSizes = {tileSize}; + + acc::LoopOp tiledLoop = tileACCLoops(loopsToTile, tileSizes, /*defaultTileSize=*/128, rewriter); + + // Verify the tiled loop was created + EXPECT_TRUE(tiledLoop != nullptr); + EXPECT_FALSE(tiledLoop.getBody().empty()); + + // After tiling a single loop with tile(4), we should have: + // - 1 tile loop (the outer loop) + // - 1 element loop nested inside + // Total: 1 nested loop inside the tile loop + EXPECT_EQ(countNestedLoops(tiledLoop), 1u); + + // The tile loop (outer) should have 1 IV + EXPECT_EQ(tiledLoop.getBody().getNumArguments(), 1u); + + // Collect nested loops and verify + auto nestedLoops = collectNestedLoops(tiledLoop); + EXPECT_EQ(nestedLoops.size(), 1u); + if (!nestedLoops.empty()) { + // The element loop should have 1 IV + EXPECT_EQ(nestedLoops[0].getBody().getNumArguments(), 1u); + } +} + +TEST_F(OpenACCUtilsTilingTest, tileACCLoopsNestedLoops) { + // Create a module to hold the function + OwningOpRef module = ModuleOp::create(loc); + Block *moduleBlock = module->getBody(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(moduleBlock); + + // Create a function + auto funcType = b.getFunctionType({}, {}); + OwningOpRef funcOp = + func::FuncOp::create(b, loc, "test_func", funcType); + Block *funcBlock = funcOp->addEntryBlock(); + + b.setInsertionPointToStart(funcBlock); + + // Create loop bounds for outer loop + Value lb1 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0)); + Value ub1 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(100)); + Value step1 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1)); + + // Create loop bounds for inner loop + Value lb2 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0)); + Value ub2 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(50)); + Value step2 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1)); + + // Tile sizes + Value tileSize1 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(4)); + Value tileSize2 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(8)); + + // Create outer loop + acc::LoopOp outerLoop = createLoopOp(b, {lb1}, {ub1}, {step1}); + + // Create inner loop inside outer loop + b.setInsertionPoint(outerLoop.getBody().getTerminator()); + acc::LoopOp innerLoop = createLoopOp(b, {lb2}, {ub2}, {step2}); + + // Tile the loops + IRRewriter rewriter(&context); + rewriter.setInsertionPoint(outerLoop); + + SmallVector loopsToTile = {outerLoop, innerLoop}; + SmallVector tileSizes = {tileSize1, tileSize2}; + + acc::LoopOp tiledLoop = tileACCLoops(loopsToTile, tileSizes, /*defaultTileSize=*/128, rewriter); + + // Verify the tiled loop nest was created + EXPECT_TRUE(tiledLoop != nullptr); + EXPECT_FALSE(tiledLoop.getBody().empty()); + + // After tiling a 2-level nested loop with tile(4,8), we should have: + // tile_loop_1 -> tile_loop_2 -> element_loop_1 -> element_loop_2 + // Total: 3 nested loops inside the outermost tile loop + unsigned nestedCount = countNestedLoops(tiledLoop); + EXPECT_EQ(nestedCount, 3u); + + // The outermost tile loop should have 1 IV + EXPECT_EQ(tiledLoop.getBody().getNumArguments(), 1u); + + // Collect all nested loops and verify each has 1 IV + auto nestedLoops = collectNestedLoops(tiledLoop); + EXPECT_EQ(nestedLoops.size(), 3u); + for (auto loop : nestedLoops) { + EXPECT_EQ(loop.getBody().getNumArguments(), 1u); + } +} + +//===----------------------------------------------------------------------===// +// uncollapseLoops Tests +//===----------------------------------------------------------------------===// + +TEST_F(OpenACCUtilsTilingTest, uncollapseLoopsBasic) { + // Create a module to hold the function + OwningOpRef module = ModuleOp::create(loc); + Block *moduleBlock = module->getBody(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(moduleBlock); + + // Create a function + auto funcType = b.getFunctionType({}, {}); + OwningOpRef funcOp = + func::FuncOp::create(b, loc, "test_func", funcType); + Block *funcBlock = funcOp->addEntryBlock(); + + b.setInsertionPointToStart(funcBlock); + + // Create loop bounds for a collapsed 2-level loop + Value lb1 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0)); + Value ub1 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(10)); + Value step1 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1)); + Value lb2 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0)); + Value ub2 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(20)); + Value step2 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1)); + + // Create a collapsed loop with 2 IVs + acc::LoopOp collapsedLoop = + createLoopOp(b, {lb1, lb2}, {ub1, ub2}, {step1, step2}); + + // Set the collapse attribute + collapsedLoop.setCollapseForDeviceTypes(&context, {acc::DeviceType::None}, + llvm::APInt(64, 1)); + + // Uncollapse the loop: tileCount=2, collapseCount=1 + IRRewriter rewriter(&context); + rewriter.setInsertionPoint(collapsedLoop); + + SmallVector uncollapsedLoops = uncollapseLoops( + collapsedLoop, /*tileCount=*/2, /*collapseCount=*/1, rewriter); + + // Should produce 2 loops (one outer with collapse=1, one inner) + EXPECT_EQ(uncollapsedLoops.size(), 2u); + + if (uncollapsedLoops.size() >= 2) { + // Verify the outer loop has 1 IV (collapseCount=1) + acc::LoopOp outerLoop = uncollapsedLoops[0]; + EXPECT_EQ(outerLoop.getBody().getNumArguments(), 1u); + EXPECT_EQ(outerLoop.getLowerbound().size(), 1u); + EXPECT_EQ(outerLoop.getUpperbound().size(), 1u); + EXPECT_EQ(outerLoop.getStep().size(), 1u); + + // Verify the inner loop has 1 IV + acc::LoopOp innerLoop = uncollapsedLoops[1]; + EXPECT_EQ(innerLoop.getBody().getNumArguments(), 1u); + EXPECT_EQ(innerLoop.getLowerbound().size(), 1u); + EXPECT_EQ(innerLoop.getUpperbound().size(), 1u); + EXPECT_EQ(innerLoop.getStep().size(), 1u); + + // Verify nesting: inner loop should be inside outer loop + unsigned nestedCount = countNestedLoops(outerLoop); + EXPECT_EQ(nestedCount, 1u); + } +} + +TEST_F(OpenACCUtilsTilingTest, uncollapseLoopsThreeLevels) { + // Test uncollapsing with 3 levels: collapse(2) with tile(3) + OwningOpRef module = ModuleOp::create(loc); + Block *moduleBlock = module->getBody(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(moduleBlock); + + auto funcType = b.getFunctionType({}, {}); + OwningOpRef funcOp = + func::FuncOp::create(b, loc, "test_func", funcType); + Block *funcBlock = funcOp->addEntryBlock(); + + b.setInsertionPointToStart(funcBlock); + + // Create 3 sets of bounds + Value lb1 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0)); + Value ub1 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(10)); + Value step1 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1)); + Value lb2 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0)); + Value ub2 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(20)); + Value step2 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1)); + Value lb3 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0)); + Value ub3 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(30)); + Value step3 = + arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1)); + + // Create a collapsed loop with 3 IVs + acc::LoopOp collapsedLoop = createLoopOp(b, {lb1, lb2, lb3}, {ub1, ub2, ub3}, + {step1, step2, step3}); + + // Set collapse(2) + collapsedLoop.setCollapseForDeviceTypes(&context, {acc::DeviceType::None}, + llvm::APInt(64, 2)); + + // Uncollapse: tileCount=3, collapseCount=2 + // This should create: outer loop with 2 IVs, then 1 inner loop + IRRewriter rewriter(&context); + rewriter.setInsertionPoint(collapsedLoop); + + SmallVector uncollapsedLoops = uncollapseLoops( + collapsedLoop, /*tileCount=*/3, /*collapseCount=*/2, rewriter); + + // Should produce 2 loops + EXPECT_EQ(uncollapsedLoops.size(), 2u); + + if (uncollapsedLoops.size() >= 2) { + // Outer loop should have 2 IVs (from collapse=2) + acc::LoopOp outerLoop = uncollapsedLoops[0]; + EXPECT_EQ(outerLoop.getBody().getNumArguments(), 2u); + EXPECT_EQ(outerLoop.getLowerbound().size(), 2u); + + // Inner loop should have 1 IV (the 3rd dimension) + acc::LoopOp innerLoop = uncollapsedLoops[1]; + EXPECT_EQ(innerLoop.getBody().getNumArguments(), 1u); + EXPECT_EQ(innerLoop.getLowerbound().size(), 1u); + } +} From ec1c2946c4bc73c7ff84120342dc25f4010760ef Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Tue, 9 Dec 2025 11:01:44 -0800 Subject: [PATCH 2/6] Fix braces --- mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp index f939ec1c58cfd..afd1a32e5087b 100644 --- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp +++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp @@ -23,11 +23,10 @@ static mlir::Value resolveUnknownTileSize(mlir::Value tileSize, mlir::RewriterBase &rewriter, mlir::Location loc) { auto constVal = mlir::getConstantIntValue(tileSize); - if (constVal && *constVal < 0) { + if (constVal && *constVal < 0) return mlir::arith::ConstantOp::create( rewriter, loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(defaultTileSize)); - } return tileSize; } From 71f5758d6ce0901726b4d60dbace77455f3360aa Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Tue, 9 Dec 2025 11:02:07 -0800 Subject: [PATCH 3/6] Fix API comment --- mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h index 3152526cc0582..6fcb706aa3488 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h @@ -63,7 +63,7 @@ uncollapseLoops(mlir::acc::LoopOp origLoop, unsigned tileCount, /// } /// \endcode /// -/// Unknown tile sizes (represented as -1 in OpenACC for `tile(*)`) are +/// Unknown tile sizes (represented as -1 in acc dialect for `tile(*)`) are /// resolved to the provided default tile size. /// /// \param tileLoops The loops to tile (outermost first). From 32c12cb27f38b7013bdc02b13a01906e87746d03 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Tue, 9 Dec 2025 11:02:59 -0800 Subject: [PATCH 4/6] Fix formatting of test --- .../Dialect/OpenACC/OpenACCUtilsTilingTest.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp index 287af9fafd5b7..07f2ca67d43bc 100644 --- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp +++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp @@ -58,9 +58,8 @@ class OpenACCUtilsTilingTest : public ::testing::Test { // Helper to collect all nested acc.loop ops in order SmallVector collectNestedLoops(acc::LoopOp loop) { SmallVector loops; - loop.getBody().walk([&](acc::LoopOp nestedLoop) { - loops.push_back(nestedLoop); - }); + loop.getBody().walk( + [&](acc::LoopOp nestedLoop) { loops.push_back(nestedLoop); }); return loops; } @@ -109,7 +108,8 @@ TEST_F(OpenACCUtilsTilingTest, tileACCLoopsSingleLoop) { SmallVector loopsToTile = {loopOp}; SmallVector tileSizes = {tileSize}; - acc::LoopOp tiledLoop = tileACCLoops(loopsToTile, tileSizes, /*defaultTileSize=*/128, rewriter); + acc::LoopOp tiledLoop = + tileACCLoops(loopsToTile, tileSizes, /*defaultTileSize=*/128, rewriter); // Verify the tiled loop was created EXPECT_TRUE(tiledLoop != nullptr); @@ -185,7 +185,8 @@ TEST_F(OpenACCUtilsTilingTest, tileACCLoopsNestedLoops) { SmallVector loopsToTile = {outerLoop, innerLoop}; SmallVector tileSizes = {tileSize1, tileSize2}; - acc::LoopOp tiledLoop = tileACCLoops(loopsToTile, tileSizes, /*defaultTileSize=*/128, rewriter); + acc::LoopOp tiledLoop = + tileACCLoops(loopsToTile, tileSizes, /*defaultTileSize=*/128, rewriter); // Verify the tiled loop nest was created EXPECT_TRUE(tiledLoop != nullptr); @@ -317,8 +318,8 @@ TEST_F(OpenACCUtilsTilingTest, uncollapseLoopsThreeLevels) { arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1)); // Create a collapsed loop with 3 IVs - acc::LoopOp collapsedLoop = createLoopOp(b, {lb1, lb2, lb3}, {ub1, ub2, ub3}, - {step1, step2, step3}); + acc::LoopOp collapsedLoop = + createLoopOp(b, {lb1, lb2, lb3}, {ub1, ub2, ub3}, {step1, step2, step3}); // Set collapse(2) collapsedLoop.setCollapseForDeviceTypes(&context, {acc::DeviceType::None}, From a408794bdaaa2cb7cfd657040338d956325c3252 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Tue, 9 Dec 2025 11:04:36 -0800 Subject: [PATCH 5/6] Fix brace issues in test --- mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp index 07f2ca67d43bc..95bc1eab7d3fe 100644 --- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp +++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp @@ -127,10 +127,9 @@ TEST_F(OpenACCUtilsTilingTest, tileACCLoopsSingleLoop) { // Collect nested loops and verify auto nestedLoops = collectNestedLoops(tiledLoop); EXPECT_EQ(nestedLoops.size(), 1u); - if (!nestedLoops.empty()) { - // The element loop should have 1 IV + // The element loop should have 1 IV + if (!nestedLoops.empty()) EXPECT_EQ(nestedLoops[0].getBody().getNumArguments(), 1u); - } } TEST_F(OpenACCUtilsTilingTest, tileACCLoopsNestedLoops) { @@ -204,9 +203,8 @@ TEST_F(OpenACCUtilsTilingTest, tileACCLoopsNestedLoops) { // Collect all nested loops and verify each has 1 IV auto nestedLoops = collectNestedLoops(tiledLoop); EXPECT_EQ(nestedLoops.size(), 3u); - for (auto loop : nestedLoops) { + for (auto loop : nestedLoops) EXPECT_EQ(loop.getBody().getNumArguments(), 1u); - } } //===----------------------------------------------------------------------===// From 97839355253307f5a60a6cdead2242c03e3e722d Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Tue, 9 Dec 2025 11:05:58 -0800 Subject: [PATCH 6/6] One more braces issue --- mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp index afd1a32e5087b..bf82d247028b9 100644 --- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp +++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp @@ -137,10 +137,9 @@ mlir::acc::tileACCLoops(llvm::SmallVector &tileLoops, // Resolve unknown tile sizes (tile(*) represented as -1) llvm::SmallVector resolvedTileSizes; rewriter.setInsertionPoint(outerLoop); - for (mlir::Value tileSize : tileSizes) { + for (mlir::Value tileSize : tileSizes) resolvedTileSizes.push_back( resolveUnknownTileSize(tileSize, defaultTileSize, rewriter, loc)); - } mlir::acc::LoopOp innerLoop = tileLoops[tileLoops.size() - 1]; llvm::SmallVector origIVs;