Skip to content

Commit

Permalink
Moving the GPUIndexIntrinsicOpLowering template to a common location
Browse files Browse the repository at this point in the history
The GPUIndexIntrinsicOpLowering template is currently used by the code in both the GPUToNVVM and GPUToROCDL dirs.
Moving it to a common location to remove code duplication.

Closes tensorflow/mlir#163

COPYBARA_INTEGRATE_REVIEW=tensorflow/mlir#163 from deven-amd:deven-refactor-gpu-index-ops-lowering b8dc2a5f5353df196039b6ff2ad42106028693ed
PiperOrigin-RevId: 272863297
  • Loading branch information
deven-amd authored and tensorflower-gardener committed Oct 4, 2019
1 parent 85dcaf1 commit d064469
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 139 deletions.
94 changes: 94 additions & 0 deletions mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
@@ -0,0 +1,94 @@
//===- IndexIntrinsicsOpLowering.h - GPU IndexOps Lowering class *- C++ -*-===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#ifndef MLIR_CONVERSION_GPUCOMMON_INDEXINTRINSICSOPLOWERING_H_
#define MLIR_CONVERSION_GPUCOMMON_INDEXINTRINSICSOPLOWERING_H_

#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"

#include "llvm/ADT/StringSwitch.h"

namespace mlir {

// Rewriting that replaces Op with XOp, YOp, or ZOp depending on the dimension
// that Op operates on. Op is assumed to return an `std.index` value and
// XOp, YOp and ZOp are assumed to return an `llvm.i32` value. Depending on
// `indexBitwidth`, sign-extend or truncate the resulting value to match the
// bitwidth expected by the consumers of the value.
template <typename Op, typename XOp, typename YOp, typename ZOp>
struct GPUIndexIntrinsicOpLowering : public LLVMOpLowering {
private:
enum dimension { X = 0, Y = 1, Z = 2, invalid };
unsigned indexBitwidth;

static dimension dimensionToIndex(Op op) {
return llvm::StringSwitch<dimension>(op.dimension())
.Case("x", X)
.Case("y", Y)
.Case("z", Z)
.Default(invalid);
}

static unsigned getIndexBitWidth(LLVMTypeConverter &type_converter) {
auto dialect = type_converter.getDialect();
return dialect->getLLVMModule().getDataLayout().getPointerSizeInBits();
}

public:
explicit GPUIndexIntrinsicOpLowering(LLVMTypeConverter &lowering_)
: LLVMOpLowering(Op::getOperationName(),
lowering_.getDialect()->getContext(), lowering_),
indexBitwidth(getIndexBitWidth(lowering_)) {}

// Convert the kernel arguments to an LLVM type, preserve the rest.
PatternMatchResult
matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
ConversionPatternRewriter &rewriter) const override {
auto loc = op->getLoc();
auto dialect = lowering.getDialect();
Value *newOp;
switch (dimensionToIndex(cast<Op>(op))) {
case X:
newOp = rewriter.create<XOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
break;
case Y:
newOp = rewriter.create<YOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
break;
case Z:
newOp = rewriter.create<ZOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
break;
default:
return matchFailure();
}

if (indexBitwidth > 32) {
newOp = rewriter.create<LLVM::SExtOp>(
loc, LLVM::LLVMType::getIntNTy(dialect, indexBitwidth), newOp);
} else if (indexBitwidth < 32) {
newOp = rewriter.create<LLVM::TruncOp>(
loc, LLVM::LLVMType::getIntNTy(dialect, indexBitwidth), newOp);
}

rewriter.replaceOp(op, {newOp});
return matchSuccess();
}
};

} // namespace mlir

#endif // MLIR_CONVERSION_GPUCOMMON_INDEXINTRINSICSOPLOWERING_H_
71 changes: 1 addition & 70 deletions mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
Expand Up @@ -22,87 +22,18 @@

#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"

#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/StandardTypes.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassRegistry.h"
#include "mlir/Transforms/DialectConversion.h"

#include "llvm/ADT/StringSwitch.h"
#include "../GPUCommon/IndexIntrinsicsOpLowering.h"

using namespace mlir;

namespace {

// Rewriting that replaces Op with XOp, YOp, or ZOp depending on the dimension
// that Op operates on. Op is assumed to return an `std.index` value and
// XOp, YOp and ZOp are assumed to return an `llvm.i32` value. Depending on
// `indexBitwidth`, sign-extend or truncate the resulting value to match the
// bitwidth expected by the consumers of the value.
template <typename Op, typename XOp, typename YOp, typename ZOp>
struct GPUIndexIntrinsicOpLowering : public LLVMOpLowering {
private:
enum dimension { X = 0, Y = 1, Z = 2, invalid };
unsigned indexBitwidth;

static dimension dimensionToIndex(Op op) {
return llvm::StringSwitch<dimension>(op.dimension())
.Case("x", X)
.Case("y", Y)
.Case("z", Z)
.Default(invalid);
}

static unsigned getIndexBitWidth(LLVMTypeConverter &lowering) {
auto dialect = lowering.getDialect();
return dialect->getLLVMModule().getDataLayout().getPointerSizeInBits();
}

public:
explicit GPUIndexIntrinsicOpLowering(LLVMTypeConverter &lowering_)
: LLVMOpLowering(Op::getOperationName(),
lowering_.getDialect()->getContext(), lowering_),
indexBitwidth(getIndexBitWidth(lowering_)) {}

// Convert the kernel arguments to an LLVM type, preserve the rest.
PatternMatchResult
matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
ConversionPatternRewriter &rewriter) const override {
auto loc = op->getLoc();
auto dialect = lowering.getDialect();
Value *newOp;
switch (dimensionToIndex(cast<Op>(op))) {
case X:
newOp = rewriter.create<XOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
break;
case Y:
newOp = rewriter.create<YOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
break;
case Z:
newOp = rewriter.create<ZOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
break;
default:
return matchFailure();
}

if (indexBitwidth > 32) {
newOp = rewriter.create<LLVM::SExtOp>(
loc, LLVM::LLVMType::getIntNTy(dialect, indexBitwidth), newOp);
} else if (indexBitwidth < 32) {
newOp = rewriter.create<LLVM::TruncOp>(
loc, LLVM::LLVMType::getIntNTy(dialect, indexBitwidth), newOp);
}

rewriter.replaceOp(op, {newOp});
return matchSuccess();
}
};

// Converts all_reduce op to LLVM/NVVM ops.
struct GPUAllReduceOpLowering : public LLVMOpLowering {
explicit GPUAllReduceOpLowering(LLVMTypeConverter &lowering_)
Expand Down
70 changes: 1 addition & 69 deletions mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
Expand Up @@ -22,86 +22,18 @@

#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"

#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/StandardTypes.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"

#include "llvm/ADT/StringSwitch.h"
#include "../GPUCommon/IndexIntrinsicsOpLowering.h"

using namespace mlir;

namespace {

// Rewriting that replaces Op with XOp, YOp, or ZOp depending on the dimension
// that Op operates on. Op is assumed to return an `std.index` value and
// XOp, YOp and ZOp are assumed to return an `llvm.i32` value. Depending on
// `indexBitwidth`, sign-extend or truncate the resulting value to match the
// bitwidth expected by the consumers of the value.
template <typename Op, typename XOp, typename YOp, typename ZOp>
struct GPUIndexIntrinsicOpLowering : public LLVMOpLowering {
private:
enum dimension { X = 0, Y = 1, Z = 2, invalid };
unsigned indexBitwidth;

static dimension dimensionToIndex(Op op) {
return llvm::StringSwitch<dimension>(op.dimension())
.Case("x", X)
.Case("y", Y)
.Case("z", Z)
.Default(invalid);
}

static unsigned getIndexBitWidth(LLVMTypeConverter &type_converter) {
auto dialect = type_converter.getDialect();
return dialect->getLLVMModule().getDataLayout().getPointerSizeInBits();
}

public:
explicit GPUIndexIntrinsicOpLowering(LLVMTypeConverter &lowering_)
: LLVMOpLowering(Op::getOperationName(),
lowering_.getDialect()->getContext(), lowering_),
indexBitwidth(getIndexBitWidth(lowering_)) {}

// Convert the kernel arguments to an LLVM type, preserve the rest.
PatternMatchResult
matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
ConversionPatternRewriter &rewriter) const override {
auto loc = op->getLoc();
auto dialect = lowering.getDialect();
Value *newOp;
switch (dimensionToIndex(cast<Op>(op))) {
case X:
newOp = rewriter.create<XOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
break;
case Y:
newOp = rewriter.create<YOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
break;
case Z:
newOp = rewriter.create<ZOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
break;
default:
return matchFailure();
}

if (indexBitwidth > 32) {
newOp = rewriter.create<LLVM::SExtOp>(
loc, LLVM::LLVMType::getIntNTy(dialect, indexBitwidth), newOp);
} else if (indexBitwidth < 32) {
newOp = rewriter.create<LLVM::TruncOp>(
loc, LLVM::LLVMType::getIntNTy(dialect, indexBitwidth), newOp);
}

rewriter.replaceOp(op, {newOp});
return matchSuccess();
}
};

// A pass that replaces all occurrences of GPU device operations with their
// corresponding ROCDL equivalent.
//
Expand Down

0 comments on commit d064469

Please sign in to comment.