Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -2727,4 +2727,44 @@ def GPU_SetCsrPointersOp : GPU_Op<"set_csr_pointers", [GPU_AsyncOpInterface]> {
}];
}

def GPU_ConditionalExecutionOp : GPU_Op<"conditional_execution", [
DeclareOpInterfaceMethods<RegionBranchOpInterface>
]> {
let summary = "Executes a region of code based on the surrounding context.";
let description = [{
The `conditional_execution` operation executes a region of host or device
code depending on the surrounding execution context of the operation. If
the operation is inside a GPU module or launch operation, it executes the
device region; otherwise, it runs the host region.

This operation can yield a variadic set of results. If the operation yields
results, then both regions have to be present. However, if there are no
results, then it's valid to implement only one of the regions.

Examples:
```mlir
// Conditional exeution with results.
%res = gpu.conditional_execution device {
...
gpu.yield %val : i32
} host {
...
gpu.yield %val : i32
} -> i32
// Conditional exeution with no results and only the host region.
gpu.conditional_execution host {
...
gpu.yield
}
```
}];
let results = (outs Variadic<AnyType>:$results);
let regions = (region AnyRegion:$hostRegion, AnyRegion:$deviceRegion);
let assemblyFormat = [{
(`device` $deviceRegion^)? (`host` $hostRegion^)? attr-dict
(`->` type($results)^)?
}];
let hasVerifier = 1;
}

#endif // GPU_OPS
4 changes: 4 additions & 0 deletions mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ void populateGpuShufflePatterns(RewritePatternSet &patterns);
/// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
void populateGpuAllReducePatterns(RewritePatternSet &patterns);

/// Collect a set of patterns to rewrite conditional-execution ops within the
/// GPU dialect.
void populateGpuConditionalExecutionPatterns(RewritePatternSet &patterns);

/// Collect a set of patterns to break down subgroup_reduce ops into smaller
/// ones supported by the target of `size <= maxShuffleBitwidth`, where `size`
/// is the subgroup_reduce value bitwidth.
Expand Down
31 changes: 31 additions & 0 deletions mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -250,4 +250,35 @@ def GpuSPIRVAttachTarget: Pass<"spirv-attach-target", ""> {
];
}

def GpuResolveConditionalExecutionPass :
Pass<"gpu-resolve-conditional-execution", ""> {
let summary = "Resolve all conditional execution operations";
let description = [{
This pass searches for all `gpu.conditional_execution` operations and
inlines the appropriate region depending on the execution context. If the
operation is inside any of the [`gpu.module`, `gpu.func`, `gpu.launch`]
operations, then the pass inlines the device region; otherwise, it
inlines the host region.
Example:
```
func.func @conditional_execution(%dev: index, %host: index) {
%0 = gpu.conditional_execution device {
gpu.yield %dev : index
} host {
gpu.yield %host : index
} -> index
return
}
// mlir-opt --gpu-resolve-conditional-execution
func.func @conditional_execution(%dev: index, %host: index) {
%0 = scf.execute_region -> index {
scf.yield %host : index
}
return
}
```
}];
let dependentDialects = ["scf::SCFDialect"];
}

#endif // MLIR_DIALECT_GPU_PASSES
1 change: 1 addition & 0 deletions mlir/lib/Dialect/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ add_mlir_dialect_library(MLIRGPUTransforms
Transforms/ModuleToBinary.cpp
Transforms/NVVMAttachTarget.cpp
Transforms/ParallelLoopMapper.cpp
Transforms/ResolveConditionalExecution.cpp
Transforms/ROCDLAttachTarget.cpp
Transforms/SerializeToBlob.cpp
Transforms/SerializeToCubin.cpp
Expand Down
40 changes: 40 additions & 0 deletions mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2204,6 +2204,46 @@ LogicalResult gpu::DynamicSharedMemoryOp::verify() {
return success();
}

//===----------------------------------------------------------------------===//
// ConditionalExecutionOp
//===----------------------------------------------------------------------===//

LogicalResult ConditionalExecutionOp::verify() {
Region &devRegion = getDeviceRegion();
Region &hostRegion = getHostRegion();
if (devRegion.empty() && hostRegion.empty())
return emitError("both regions can't be empty");
if (getResults().size() > 0 && (devRegion.empty() || hostRegion.empty()))
return emitError(
"when there are results both regions have to be specified");
if ((!devRegion.empty() &&
!mlir::isa<YieldOp>(devRegion.back().getTerminator())) ||
(!hostRegion.empty() &&
!mlir::isa<YieldOp>(hostRegion.back().getTerminator()))) {
return emitError(
"conditional execution regions must terminate with gpu.yield");
}
return success();
}

void ConditionalExecutionOp::getSuccessorRegions(
RegionBranchPoint point, SmallVectorImpl<RegionSuccessor> &regions) {
// Both sub-regions always return to the parent.
if (!point.isParent()) {
regions.push_back(RegionSuccessor(getResults()));
return;
}

Region &devRegion = getDeviceRegion();
Region &hostRegion = getHostRegion();

// Don't consider the regions if they are empty.
regions.push_back(devRegion.empty() ? RegionSuccessor()
: RegionSuccessor(&devRegion));
regions.push_back(hostRegion.empty() ? RegionSuccessor()
: RegionSuccessor(&hostRegion));
}

//===----------------------------------------------------------------------===//
// GPU target options
//===----------------------------------------------------------------------===//
Expand Down
100 changes: 100 additions & 0 deletions mlir/lib/Dialect/GPU/Transforms/ResolveConditionalExecution.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
//===- ResolveConditionalExecution.cpp - Resolve conditional exec ops ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the `gpu-resolve-conditional-execution` pass.
//
//===----------------------------------------------------------------------===//

#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

using namespace mlir;
using namespace mlir::gpu;

namespace mlir {
#define GEN_PASS_DEF_GPURESOLVECONDITIONALEXECUTIONPASS
#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
} // namespace mlir

namespace {
class GpuResolveConditionalExecutionPass
: public impl::GpuResolveConditionalExecutionPassBase<
GpuResolveConditionalExecutionPass> {
public:
using Base::Base;
void runOnOperation() final;
};
} // namespace

void GpuResolveConditionalExecutionPass::runOnOperation() {
RewritePatternSet patterns(&getContext());
mlir::populateGpuConditionalExecutionPatterns(patterns);
if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(patterns))))
return signalPassFailure();
}

namespace {
struct GpuConditionalExecutionOpRewriter
: public OpRewritePattern<ConditionalExecutionOp> {
using OpRewritePattern<ConditionalExecutionOp>::OpRewritePattern;
// Check whether the operation is inside a device execution context.
bool isDevice(Operation *op) const {
while ((op = op->getParentOp()))
if (isa<GPUFuncOp, LaunchOp, GPUModuleOp>(op))
return true;
return false;
}
LogicalResult matchAndRewrite(ConditionalExecutionOp op,
PatternRewriter &rewriter) const override {
bool isDev = isDevice(op);
// Remove the op if the device region is empty and we are in a device
// context.
if (isDev && op.getDeviceRegion().empty()) {
rewriter.eraseOp(op);
return success();
}
// Remove the op if the host region is empty and we are in a host context.
if (!isDev && op.getHostRegion().empty()) {
rewriter.eraseOp(op);
return success();
}
// Replace `ConditionalExecutionOp` with a `scf::ExecuteRegionOp`.
auto execRegionOp = rewriter.create<scf::ExecuteRegionOp>(
op.getLoc(), op.getResults().getTypes());
if (isDev)
rewriter.inlineRegionBefore(op.getDeviceRegion(),
execRegionOp.getRegion(),
execRegionOp.getRegion().begin());
else
rewriter.inlineRegionBefore(op.getHostRegion(), execRegionOp.getRegion(),
execRegionOp.getRegion().begin());
// Update the calling site.
if (op.getResults().empty())
rewriter.eraseOp(op);
else
rewriter.replaceOp(op, execRegionOp);

// This should be safe because `ConditionalExecutionOp` always terminates
// with `gpu::YieldOp`.
auto yieldOp =
dyn_cast<YieldOp>(execRegionOp.getRegion().back().getTerminator());
rewriter.setInsertionPoint(yieldOp);
rewriter.replaceOpWithNewOp<scf::YieldOp>(yieldOp, yieldOp.getValues());
return success();
}
};
} // namespace

void mlir::populateGpuConditionalExecutionPatterns(
RewritePatternSet &patterns) {
patterns.add<GpuConditionalExecutionOpRewriter>(patterns.getContext());
}
21 changes: 21 additions & 0 deletions mlir/test/Dialect/GPU/invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -818,3 +818,24 @@ func.func @main(%arg0 : index) {
return
}

// -----

func.func @conditional_execution(%sz : index) {
// @expected-error@+1 {{when there are results both regions have to be specified}}
%val = gpu.conditional_execution device {
gpu.yield %sz: index
} -> index
return
}

// -----

func.func @conditional_execution(%sz : index) {
// @expected-error@+1 {{'gpu.conditional_execution' op region control flow edge from Region #0 to parent results: source has 0 operands, but target successor needs 1}}
%val = gpu.conditional_execution device {
gpu.yield %sz: index
} host {
gpu.yield
} -> index
return
}
15 changes: 15 additions & 0 deletions mlir/test/Dialect/GPU/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -423,3 +423,18 @@ gpu.module @module_with_two_target [#nvvm.target, #rocdl.target<chip = "gfx90a">
gpu.return
}
}

func.func @conditional_execution(%sz : index) {
%val = gpu.conditional_execution device {
gpu.yield %sz: index
} host {
gpu.yield %sz: index
} -> index
gpu.conditional_execution device {
gpu.yield
}
gpu.conditional_execution host {
gpu.yield
}
return
}
Loading