diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp index fe806186ce25d..8b50ad8e583c5 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp @@ -502,10 +502,19 @@ ComputeRegionOp::wireHoistedValueThroughIns(Value value) { bool ComputeRegionOp::isEffectivelySerial() { auto *ctx = getContext(); + // If there are no launch arguments, the compute region's parallelism + // has not yet been planned - and thus return NOT effectively serial. + if (getLaunchArgs().empty()) + return false; + if (getLaunchArg(GPUParallelDimAttr::seqDim(ctx))) return true; auto checkDim = [&](GPUParallelDimAttr dim) -> bool { + // Launch dimensions without an explicit `acc.par_width` for that dimension + // means that no such parallelism is assigned and thus defaults to width 1. + if (!getLaunchArg(dim)) + return true; auto val = getKnownConstantLaunchArg(dim); return val && *val == 1; }; diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCCGOpsTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCCGOpsTest.cpp index 44283c9091b4d..382e42f64fc40 100644 --- a/mlir/unittests/Dialect/OpenACC/OpenACCCGOpsTest.cpp +++ b/mlir/unittests/Dialect/OpenACC/OpenACCCGOpsTest.cpp @@ -93,11 +93,79 @@ class OpenACCCGOpsTest : public ::testing::Test { YieldOp::create(regionBuilder, loc); } + /// Build a single `acc.compute_region` with the given launch arguments. + ComputeRegionOp makeComputeRegion(HostContext &host, ValueRange launchArgs) { + Region sourceRegion; + populateSourceRegionSingleBlock(sourceRegion, context, loc, std::nullopt, + false); + IRMapping mapping; + // Setting an empty origin since it should not be relevant for the tests. + ComputeRegionOp cr = buildComputeRegion( + loc, launchArgs, {}, "", sourceRegion, host.rewriter, mapping); + return cr; + } + MLIRContext context; OpBuilder b; Location loc; }; +//===----------------------------------------------------------------------===// +// ComputeRegionOp::isEffectivelySerial +//===----------------------------------------------------------------------===// + +TEST_F(OpenACCCGOpsTest, IsEffectivelySerialFalseWhenNoLaunchArgs) { + HostContext host(context, loc, b); + ComputeRegionOp cr = makeComputeRegion(host, {}); + // No launch configuration yet - not treated as effectively serial. + EXPECT_FALSE(cr.isEffectivelySerial()); + EXPECT_TRUE(succeeded(host.module->verify())); +} + +TEST_F(OpenACCCGOpsTest, IsEffectivelySerialSparseDimsConstantOne) { + HostContext host(context, loc, b); + Value c1 = arith::ConstantIndexOp::create(host.rewriter, loc, 1); + Value pwBx = ParWidthOp::create(host.rewriter, loc, c1, + GPUParallelDimAttr::blockXDim(&context)); + Value pwTy = ParWidthOp::create(host.rewriter, loc, c1, + GPUParallelDimAttr::threadYDim(&context)); + Value pwTx = ParWidthOp::create(host.rewriter, loc, c1, + GPUParallelDimAttr::threadXDim(&context)); + ComputeRegionOp cr = makeComputeRegion(host, {pwBx, pwTy, pwTx}); + EXPECT_TRUE(cr.isEffectivelySerial()); + EXPECT_TRUE(succeeded(host.module->verify())); +} + +TEST_F(OpenACCCGOpsTest, + IsEffectivelySerialFalseWhenThreadXWidthGreaterThanOne) { + HostContext host(context, loc, b); + Value c2 = arith::ConstantIndexOp::create(host.rewriter, loc, 2); + Value pwTx = ParWidthOp::create(host.rewriter, loc, c2, + GPUParallelDimAttr::threadXDim(&context)); + ComputeRegionOp cr = makeComputeRegion(host, pwTx); + EXPECT_FALSE(cr.isEffectivelySerial()); + EXPECT_TRUE(succeeded(host.module->verify())); +} + +TEST_F(OpenACCCGOpsTest, IsEffectivelySerialFalseWhenLaunchWidthUnknown) { + HostContext host(context, loc, b); + Value pwTx = ParWidthOp::create(host.rewriter, loc, Value(), + GPUParallelDimAttr::threadXDim(&context)); + ComputeRegionOp cr = makeComputeRegion(host, pwTx); + EXPECT_FALSE(cr.isEffectivelySerial()); + EXPECT_TRUE(succeeded(host.module->verify())); +} + +TEST_F(OpenACCCGOpsTest, IsEffectivelySerialTrueWhenSeqLaunchPresent) { + HostContext host(context, loc, b); + Value c7 = arith::ConstantIndexOp::create(host.rewriter, loc, 7); + Value pwSeq = ParWidthOp::create(host.rewriter, loc, c7, + GPUParallelDimAttr::seqDim(&context)); + ComputeRegionOp cr = makeComputeRegion(host, pwSeq); + EXPECT_TRUE(cr.isEffectivelySerial()); + EXPECT_TRUE(succeeded(host.module->verify())); +} + //===----------------------------------------------------------------------===// // ComputeRegionOp::wireHoistedValueThroughIns //===----------------------------------------------------------------------===//