Skip to content

Commit

Permalink
Revert "[mlir] Create a gpu.module operation for the GPU Dialect."
Browse files Browse the repository at this point in the history
This reverts commit 4624a1e. Causing
problems downstream.

(cherry picked from commit 0133cc6)
  • Loading branch information
d0k authored and zmodem committed Jan 16, 2020
1 parent c4a134a commit ac44630
Show file tree
Hide file tree
Showing 24 changed files with 140 additions and 235 deletions.
11 changes: 3 additions & 8 deletions mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h
Expand Up @@ -19,17 +19,12 @@ namespace mlir {
class Location;
class ModuleOp;

template <typename T>
class OpPassBase;

namespace gpu {
class GPUModuleOp;
} // namespace gpu

namespace LLVM {
class LLVMDialect;
} // namespace LLVM

template <typename T> class OpPassBase;

using OwnedCubin = std::unique_ptr<std::vector<char>>;
using CubinGenerator =
std::function<OwnedCubin(const std::string &, Location, StringRef)>;
Expand All @@ -43,7 +38,7 @@ using CubinGenerator =
/// attached as a string attribute named 'nvvm.cubin' to the kernel function.
/// After the transformation, the body of the kernel function is removed (i.e.,
/// it is turned into a declaration).
std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
std::unique_ptr<OpPassBase<ModuleOp>>
createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator);

/// Creates a pass to convert a gpu.launch_func operation into a sequence of
Expand Down
10 changes: 3 additions & 7 deletions mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
Expand Up @@ -14,19 +14,15 @@ namespace mlir {
class LLVMTypeConverter;
class OwningRewritePatternList;

template <typename OpT>
class OpPassBase;

namespace gpu {
class GPUModuleOp;
}
class ModuleOp;
template <typename OpT> class OpPassBase;

/// Collect a set of patterns to convert from the GPU dialect to NVVM.
void populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,
OwningRewritePatternList &patterns);

/// Creates a pass that lowers GPU dialect operations to NVVM counterparts.
std::unique_ptr<OpPassBase<gpu::GPUModuleOp>> createLowerGpuOpsToNVVMOpsPass();
std::unique_ptr<OpPassBase<ModuleOp>> createLowerGpuOpsToNVVMOpsPass();

} // namespace mlir

Expand Down
52 changes: 0 additions & 52 deletions mlir/include/mlir/Dialect/GPU/GPUOps.td
Expand Up @@ -588,56 +588,4 @@ def GPU_BarrierOp : GPU_Op<"barrier"> {
let printer = [{ p << getOperationName(); }];
}

def GPU_GPUModuleOp : GPU_Op<"module", [
IsolatedFromAbove, SymbolTable, Symbol,
SingleBlockImplicitTerminator<"ModuleEndOp">
]> {
let summary = "A top level compilation unit containing code to be run on a GPU.";
let description = [{
GPU module contains code that is intended to be run on a GPU. A host device
can launch this code through a gpu.launc_func that creates a fully
qualified symbol through the gpu.module's symbol and a gpu.func symbol
contained in the gpu.module.

The module's top-level scope is modeled by a single region with a single
block. GPU modules are required to have a name that is used for symbol
resolution by the gpu.launch_func operation.

Using an op with a region to define a GPU module enables "embedding" GPU
modules with SIMT execution models in other dialects in a clean manner and
allows filtering of code regions to execute passes on only code intended to
or not intended to be run on the separate device.

```
gpu.module @symbol_name {
gpu.func {}
...
gpu.module_end
}

```
}];
let builders = [OpBuilder<"Builder *builder, OperationState &result, "
"StringRef name">];
let parser = [{ return ::parseGPUModuleOp(parser, result); }];
let printer = [{ return ::print(p, *this); }];
let regions = (region SizedRegion<1>:$body);

// We need to ensure the block inside the region is properly terminated;
// the auto-generated builders do not guarantee that.
let skipDefaultBuilders = 1;
}

def GPU_ModuleEndOp : GPU_Op<"module_end", [
Terminator, HasParent<"GPUModuleOp">
]> {
let summary = "A pseudo op that marks the end of a gpu.module.";
let description = [{
This op terminates the only block inside the only region of a `gpu.module`.
}];

let parser = [{ return success(); }];
let printer = [{ p << getOperationName(); }];
}

#endif // GPU_OPS
21 changes: 12 additions & 9 deletions mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
Expand Up @@ -46,15 +46,18 @@ static constexpr const char *kCubinAnnotation = "nvvm.cubin";
/// IR and further to PTX. A user provided CubinGenerator compiles the PTX to
/// GPU binary code, which is then attached as an attribute to the function. The
/// function body is erased.
class GpuKernelToCubinPass
: public OperationPass<GpuKernelToCubinPass, gpu::GPUModuleOp> {
class GpuKernelToCubinPass : public ModulePass<GpuKernelToCubinPass> {
public:
GpuKernelToCubinPass(
CubinGenerator cubinGenerator = compilePtxToCubinForTesting)
: cubinGenerator(cubinGenerator) {}

void runOnOperation() override {
gpu::GPUModuleOp module = getOperation();
void runOnModule() override {
ModuleOp module = getModule();
if (!module.getAttrOfType<UnitAttr>(
gpu::GPUDialect::getKernelModuleAttrName()) ||
!module.getName())
return;

// Make sure the NVPTX target is initialized.
LLVMInitializeNVPTXTarget();
Expand All @@ -68,8 +71,8 @@ class GpuKernelToCubinPass

// Translate the module to CUBIN and attach the result as attribute to the
// module.
if (auto cubinAttr = translateGPUModuleToCubinAnnotation(
*llvmModule, module.getLoc(), module.getName()))
if (auto cubinAttr = translateGpuModuleToCubinAnnotation(
*llvmModule, module.getLoc(), *module.getName()))
module.setAttr(kCubinAnnotation, cubinAttr);
else
signalPassFailure();
Expand All @@ -89,7 +92,7 @@ class GpuKernelToCubinPass
StringRef name);

/// Translates llvmModule to cubin and returns the result as attribute.
StringAttr translateGPUModuleToCubinAnnotation(llvm::Module &llvmModule,
StringAttr translateGpuModuleToCubinAnnotation(llvm::Module &llvmModule,
Location loc, StringRef name);

CubinGenerator cubinGenerator;
Expand Down Expand Up @@ -146,15 +149,15 @@ OwnedCubin GpuKernelToCubinPass::convertModuleToCubin(llvm::Module &llvmModule,
return cubinGenerator(ptx, loc, name);
}

StringAttr GpuKernelToCubinPass::translateGPUModuleToCubinAnnotation(
StringAttr GpuKernelToCubinPass::translateGpuModuleToCubinAnnotation(
llvm::Module &llvmModule, Location loc, StringRef name) {
auto cubin = convertModuleToCubin(llvmModule, loc, name);
if (!cubin)
return {};
return StringAttr::get({cubin->data(), cubin->size()}, loc->getContext());
}

std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
std::unique_ptr<OpPassBase<ModuleOp>>
mlir::createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator) {
return std::make_unique<GpuKernelToCubinPass>(cubinGenerator);
}
Expand Down
13 changes: 7 additions & 6 deletions mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
Expand Up @@ -132,9 +132,9 @@ class GpuLaunchFuncToCudaCallsPass

// GPU kernel modules are no longer necessary since we have a global
// constant with the CUBIN data.
for (auto m :
llvm::make_early_inc_range(getModule().getOps<gpu::GPUModuleOp>()))
m.erase();
for (auto m : llvm::make_early_inc_range(getModule().getOps<ModuleOp>()))
if (m.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelModuleAttrName()))
m.erase();
}

private:
Expand Down Expand Up @@ -343,8 +343,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
builder.getI32IntegerAttr(0));
// Create an LLVM global with CUBIN extracted from the kernel annotation and
// obtain a pointer to the first byte in it.
auto kernelModule = getModule().lookupSymbol<gpu::GPUModuleOp>(
launchOp.getKernelModuleName());
auto kernelModule =
getModule().lookupSymbol<ModuleOp>(launchOp.getKernelModuleName());
assert(kernelModule && "expected a kernel module");

auto cubinAttr = kernelModule.getAttrOfType<StringAttr>(kCubinAnnotation);
Expand All @@ -354,7 +354,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
return signalPassFailure();
}

SmallString<128> nameBuffer(kernelModule.getName());
assert(kernelModule.getName() && "expected a named module");
SmallString<128> nameBuffer(*kernelModule.getName());
nameBuffer.append(kCubinStorageSuffix);
Value data = LLVM::createGlobalString(
loc, builder, nameBuffer.str(), cubinAttr.getValue(),
Expand Down
21 changes: 11 additions & 10 deletions mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
Expand Up @@ -200,7 +200,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
auto type = operand.getType().cast<LLVM::LLVMType>();

// Create shared memory array to store the warp reduction.
auto module = operand.getDefiningOp()->getParentOfType<gpu::GPUModuleOp>();
auto module = operand.getDefiningOp()->getParentOfType<ModuleOp>();
assert(module && "op must belong to a module");
Value sharedMemPtr =
createSharedMemoryArray(loc, module, type, kWarpSize, rewriter);
Expand Down Expand Up @@ -391,10 +391,10 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
}

/// Creates a global array stored in shared memory.
Value createSharedMemoryArray(Location loc, gpu::GPUModuleOp module,
Value createSharedMemoryArray(Location loc, ModuleOp module,
LLVM::LLVMType elementType, int numElements,
ConversionPatternRewriter &rewriter) const {
OpBuilder builder(module.body());
OpBuilder builder(module.getBodyRegion());

auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements);
StringRef name = "reduce_buffer";
Expand Down Expand Up @@ -699,11 +699,13 @@ struct GPUReturnOpLowering : public LLVMOpLowering {
///
/// This pass only handles device code and is not meant to be run on GPU host
/// code.
class LowerGpuOpsToNVVMOpsPass
: public OperationPass<LowerGpuOpsToNVVMOpsPass, gpu::GPUModuleOp> {
class LowerGpuOpsToNVVMOpsPass : public ModulePass<LowerGpuOpsToNVVMOpsPass> {
public:
void runOnOperation() override {
gpu::GPUModuleOp m = getOperation();
void runOnModule() override {
ModuleOp m = getModule();
if (!m.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelModuleAttrName()))
return;

OwningRewritePatternList patterns;
NVVMTypeConverter converter(m.getContext());
populateStdToLLVMConversionPatterns(converter, patterns);
Expand All @@ -716,7 +718,7 @@ class LowerGpuOpsToNVVMOpsPass
target.addLegalDialect<LLVM::LLVMDialect>();
target.addLegalDialect<NVVM::NVVMDialect>();
// TODO(csigg): Remove once we support replacing non-root ops.
target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp, gpu::ModuleEndOp>();
target.addLegalOp<gpu::YieldOp>();
if (failed(applyPartialConversion(m, target, patterns, &converter)))
signalPassFailure();
}
Expand Down Expand Up @@ -748,8 +750,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
"__nv_exp");
}

std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
mlir::createLowerGpuOpsToNVVMOpsPass() {
std::unique_ptr<OpPassBase<ModuleOp>> mlir::createLowerGpuOpsToNVVMOpsPass() {
return std::make_unique<LowerGpuOpsToNVVMOpsPass>();
}

Expand Down
7 changes: 0 additions & 7 deletions mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt
@@ -1,15 +1,8 @@
set(LLVM_TARGET_DEFINITIONS GPUToSPIRV.td)
mlir_tablegen(GPUToSPIRV.cpp.inc -gen-rewriters)
add_public_tablegen_target(MLIRGPUToSPIRVIncGen)

add_llvm_library(MLIRGPUtoSPIRVTransforms
ConvertGPUToSPIRV.cpp
ConvertGPUToSPIRVPass.cpp
)

add_dependencies(MLIRGPUtoSPIRVTransforms
MLIRGPUToSPIRVIncGen)

target_link_libraries(MLIRGPUtoSPIRVTransforms
MLIRGPU
MLIRIR
Expand Down
55 changes: 40 additions & 15 deletions mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
Expand Up @@ -63,13 +63,27 @@ class KernelFnConversion final : public SPIRVOpLowering<gpu::GPUFuncOp> {
SmallVector<int32_t, 3> workGroupSizeAsInt32;
};

/// Pattern to convert a gpu.module to a spv.module.
class GPUModuleConversion final : public SPIRVOpLowering<gpu::GPUModuleOp> {
/// Pattern to convert a module with gpu.kernel_module attribute to a
/// spv.module.
class KernelModuleConversion final : public SPIRVOpLowering<ModuleOp> {
public:
using SPIRVOpLowering<gpu::GPUModuleOp>::SPIRVOpLowering;
using SPIRVOpLowering<ModuleOp>::SPIRVOpLowering;

PatternMatchResult
matchAndRewrite(gpu::GPUModuleOp moduleOp, ArrayRef<Value> operands,
matchAndRewrite(ModuleOp moduleOp, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override;
};

/// Pattern to convert a module terminator op to a terminator of spv.module op.
// TODO: Move this into DRR, but that requires ModuleTerminatorOp to be defined
// in ODS.
class KernelModuleTerminatorConversion final
: public SPIRVOpLowering<ModuleTerminatorOp> {
public:
using SPIRVOpLowering<ModuleTerminatorOp>::SPIRVOpLowering;

PatternMatchResult
matchAndRewrite(ModuleTerminatorOp terminatorOp, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override;
};

Expand Down Expand Up @@ -270,12 +284,16 @@ KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp,
}

//===----------------------------------------------------------------------===//
// ModuleOp with gpu.module.
// ModuleOp with gpu.kernel_module.
//===----------------------------------------------------------------------===//

PatternMatchResult GPUModuleConversion::matchAndRewrite(
gpu::GPUModuleOp moduleOp, ArrayRef<Value> operands,
PatternMatchResult KernelModuleConversion::matchAndRewrite(
ModuleOp moduleOp, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const {
if (!moduleOp.getAttrOfType<UnitAttr>(
gpu::GPUDialect::getKernelModuleAttrName())) {
return matchFailure();
}
// TODO : Generalize this to account for different extensions,
// capabilities, extended_instruction_sets, other addressing models
// and memory models.
Expand All @@ -284,8 +302,8 @@ PatternMatchResult GPUModuleConversion::matchAndRewrite(
spirv::MemoryModel::GLSL450, spirv::Capability::Shader,
spirv::Extension::SPV_KHR_storage_buffer_storage_class);
// Move the region from the module op into the SPIR-V module.
Region &spvModuleRegion = spvModule.body();
rewriter.inlineRegionBefore(moduleOp.body(), spvModuleRegion,
Region &spvModuleRegion = spvModule.getOperation()->getRegion(0);
rewriter.inlineRegionBefore(moduleOp.getBodyRegion(), spvModuleRegion,
spvModuleRegion.begin());
// The spv.module build method adds a block with a terminator. Remove that
// block. The terminator of the module op in the remaining block will be
Expand All @@ -295,6 +313,17 @@ PatternMatchResult GPUModuleConversion::matchAndRewrite(
return matchSuccess();
}

//===----------------------------------------------------------------------===//
// ModuleTerminatorOp for gpu.kernel_module.
//===----------------------------------------------------------------------===//

PatternMatchResult KernelModuleTerminatorConversion::matchAndRewrite(
ModuleTerminatorOp terminatorOp, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const {
rewriter.replaceOpWithNewOp<spirv::ModuleEndOp>(terminatorOp);
return matchSuccess();
}

//===----------------------------------------------------------------------===//
// GPU return inside kernel functions to SPIR-V return.
//===----------------------------------------------------------------------===//
Expand All @@ -313,18 +342,14 @@ PatternMatchResult GPUReturnOpConversion::matchAndRewrite(
// GPU To SPIRV Patterns.
//===----------------------------------------------------------------------===//

namespace {
#include "GPUToSPIRV.cpp.inc"
}

void mlir::populateGPUToSPIRVPatterns(MLIRContext *context,
SPIRVTypeConverter &typeConverter,
OwningRewritePatternList &patterns,
ArrayRef<int64_t> workGroupSize) {
populateWithGenerated(context, &patterns);
patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
patterns.insert<
GPUReturnOpConversion, ForOpConversion, GPUModuleConversion,
GPUReturnOpConversion, ForOpConversion, KernelModuleConversion,
KernelModuleTerminatorConversion,
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
Expand Down

0 comments on commit ac44630

Please sign in to comment.