Skip to content

Commit

Permalink
WIP revise BlockwiseCopyOp so it takes source / dest coordinates.
Browse files Browse the repository at this point in the history
TBD add MovePos op.
TBD revise how "move_source_slice_window" is implemented.
  • Loading branch information
whchung committed Jun 6, 2020
1 parent 555dad6 commit 68f7555
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 25 deletions.
52 changes: 30 additions & 22 deletions mlir/include/mlir/Dialect/MIOpen/LowerMIOpenOps.h
Expand Up @@ -1283,9 +1283,9 @@ struct GridwiseGemmRewritePattern : public OpRewritePattern<miopen::GridwiseGemm
b.create<miopen::FillOp>(op.getLoc(), blockwiseCopyZero, zeroConstantI32Op);

b.create<miopen::BlockwiseCopyOp>(op.getLoc(), op.getOperand(0),
lds2DMatrixAEvenSubviewOp);
lds2DMatrixAEvenSubviewOp, blockwiseCopyASrc, blockwiseCopyZero);
b.create<miopen::BlockwiseCopyOp>(op.getLoc(), op.getOperand(1),
lds2DMatrixBEvenSubviewOp);
lds2DMatrixBEvenSubviewOp, blockwiseCopyBSrc, blockwiseCopyZero);

// Emit loop.
// Compute loop iterations from attributes.
Expand All @@ -1299,48 +1299,56 @@ struct GridwiseGemmRewritePattern : public OpRewritePattern<miopen::GridwiseGemm
// LDS barrier.
lb.create<miopen::LdsBarrierOp>(op.getLoc());

// Blockwise copy from global (generic tensor) to register (naive tensor).
auto blockwiseCopyOpAEven = lb.create<miopen::BlockwiseCopyOp>(op.getLoc(), op.getOperand(0), threadAEvenAllocOp);
// Compute block_slice_copy_steps and set in the attribute.
blockwiseCopyOpAEven.setAttr("move_source_slice_window", b.getI32IntegerAttr(KPerBlock));
auto blockwiseCopyOpBEven = lb.create<miopen::BlockwiseCopyOp>(op.getLoc(), op.getOperand(1), threadBEvenAllocOp);
// Compute block_slice_copy_steps and set in the attribute.
blockwiseCopyOpBEven.setAttr("move_source_slice_window", b.getI32IntegerAttr(KPerBlock));
// TBD add move_pos op.
// TBD move blockwiseCopyASrc by (KPerBlock, 0).
// TBD move blockwiseCopyBSrc by (KPerBlock, 0).
// TBD revive blockwise_copy commands.
// // Blockwise copy from global (generic tensor) to register (naive tensor).
// auto blockwiseCopyOpAEven = lb.create<miopen::BlockwiseCopyOp>(op.getLoc(), op.getOperand(0), threadAEvenAllocOp);
// // Compute block_slice_copy_steps and set in the attribute.
// blockwiseCopyOpAEven.setAttr("move_source_slice_window", b.getI32IntegerAttr(KPerBlock));
// auto blockwiseCopyOpBEven = lb.create<miopen::BlockwiseCopyOp>(op.getLoc(), op.getOperand(1), threadBEvenAllocOp);
// // Compute block_slice_copy_steps and set in the attribute.
// blockwiseCopyOpBEven.setAttr("move_source_slice_window", b.getI32IntegerAttr(KPerBlock));

// Emit blockwise GEMM.
auto blockwiseGemmEvenOp = lb.create<miopen::BlockwiseGemmOp>(
op.getLoc(), lds2DMatrixAEvenSubviewOp, lds2DMatrixBEvenSubviewOp,
threadCAllocOp);
affixBlockwiseGemmAttributes(blockwiseGemmEvenOp, op);

// Blockwise copy from reigster (naitve tensor) to LDS (naive tensor).
// Blockwise copy from register (naive tensor) to LDS (naive tensor).
lb.create<miopen::BlockwiseCopyOp>(op.getLoc(), threadAEvenAllocOp,
lds2DMatrixAOddSubviewOp);
lds2DMatrixAOddSubviewOp, blockwiseCopyASrc, blockwiseCopyZero);
lb.create<miopen::BlockwiseCopyOp>(op.getLoc(), threadBEvenAllocOp,
lds2DMatrixBOddSubviewOp);
lds2DMatrixBOddSubviewOp, blockwiseCopyBSrc, blockwiseCopyZero);

// LDS barrier.
lb.create<miopen::LdsBarrierOp>(op.getLoc());

// Blockwise copy from global (generic tensor) to register (naive tensor).
auto blockwiseCopyOpAOdd = lb.create<miopen::BlockwiseCopyOp>(op.getLoc(), op.getOperand(0), threadAOddAllocOp);
// Compute block_slice_copy_steps and set in the attribute.
blockwiseCopyOpAOdd.setAttr("move_source_slice_window", b.getI32IntegerAttr(KPerBlock));
auto blockwiseCopyOpBOdd = lb.create<miopen::BlockwiseCopyOp>(op.getLoc(), op.getOperand(1), threadBOddAllocOp);
// Compute block_slice_copy_steps and set in the attribute.
blockwiseCopyOpBOdd.setAttr("move_source_slice_window", b.getI32IntegerAttr(KPerBlock));
// TBD add move_pos op.
// TBD move blockwiseCopyASrc by (KPerBlock, 0).
// TBD move blockwiseCopyBSrc by (KPerBlock, 0).
// TBD revive blockwise_copy commands.
// // Blockwise copy from global (generic tensor) to register (naive tensor).
// auto blockwiseCopyOpAOdd = lb.create<miopen::BlockwiseCopyOp>(op.getLoc(), op.getOperand(0), threadAOddAllocOp);
// // Compute block_slice_copy_steps and set in the attribute.
// blockwiseCopyOpAOdd.setAttr("move_source_slice_window", b.getI32IntegerAttr(KPerBlock));
// auto blockwiseCopyOpBOdd = lb.create<miopen::BlockwiseCopyOp>(op.getLoc(), op.getOperand(1), threadBOddAllocOp);
// // Compute block_slice_copy_steps and set in the attribute.
// blockwiseCopyOpBOdd.setAttr("move_source_slice_window", b.getI32IntegerAttr(KPerBlock));

// Emit blockwise GEMM.
auto blockwiseGemmOddOp = lb.create<miopen::BlockwiseGemmOp>(
op.getLoc(), lds2DMatrixAOddSubviewOp, lds2DMatrixBOddSubviewOp,
threadCAllocOp);
affixBlockwiseGemmAttributes(blockwiseGemmOddOp, op);

// Blockwise copy from reigster (naitve tensor) to LDS (naive tensor).
// Blockwise copy from register (naive tensor) to LDS (naive tensor).
lb.create<miopen::BlockwiseCopyOp>(op.getLoc(), threadAOddAllocOp,
lds2DMatrixAEvenSubviewOp);
lds2DMatrixAEvenSubviewOp, blockwiseCopyZero, blockwiseCopyZero);
lb.create<miopen::BlockwiseCopyOp>(op.getLoc(), threadBOddAllocOp,
lds2DMatrixBEvenSubviewOp);
lds2DMatrixBEvenSubviewOp, blockwiseCopyZero, blockwiseCopyZero);

// outside the loop.

Expand Down
4 changes: 3 additions & 1 deletion mlir/include/mlir/Dialect/MIOpen/MIOpenOps.td
Expand Up @@ -152,7 +152,9 @@ def MIOpen_LdsBarrierOp:
def MIOpen_BlockwiseCopyOp:
MIOpen_Op<"blockwise_copy">,
Arguments<(ins AnyMemRef:$source,
AnyMemRef:$dest)> {
AnyMemRef:$dest,
AnyMemRef:$sourceCoord,
AnyMemRef:$destCoord)> {
let summary = "Blockwise GPU data movement";
let description = [{
The `miopen.blockwise` op moves data on GPU. Following movements are
Expand Down
4 changes: 2 additions & 2 deletions mlir/lib/Dialect/MIOpen/MIOpenOps.cpp
Expand Up @@ -318,8 +318,8 @@ static LogicalResult verify(ThreadwiseGemmOp op) {
//===----------------------------------------------------------------------===//

static ParseResult parseBlockwiseCopyOp(OpAsmParser &parser, OperationState &result) {
SmallVector<OpAsmParser::OperandType, 2> ops;
SmallVector<Type, 2> types;
SmallVector<OpAsmParser::OperandType, 4> ops;
SmallVector<Type, 4> types;
return failure(
parser.parseOperandList(ops, OpAsmParser::Delimiter::Paren) ||
parser.parseOptionalAttrDict(result.attributes) ||
Expand Down

0 comments on commit 68f7555

Please sign in to comment.