Skip to content

Commit

Permalink
Define miopen.threadwise_copy_v2 for matrix C write out.
Browse files Browse the repository at this point in the history
  • Loading branch information
whchung committed Sep 9, 2020
1 parent ce7508f commit a33ed62
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 0 deletions.
17 changes: 17 additions & 0 deletions mlir/include/mlir/Dialect/MIOpen/MIOpenOps.td
Expand Up @@ -214,6 +214,23 @@ def MIOpen_ThreadwiseCopyOp:
}];
}

// threadwise_copy_v2
def MIOpen_ThreadwiseCopyV2Op:
MIOpen_Op<"threadwise_copy_v2">,
Arguments<(ins VectorOfRankAndType<[1], [F32, F16, BF16]>:$source,
AnyMemRef:$dest,
I32: $sourceOffset,
Variadic<I32>:$destCoord)> {
let summary = "Threadwise GPU data movement XDLOPS version";
let description = [{
The `miopen.threadwise_v2` op moves data on GPU. Following movements are
allowed:
- Register (naive tensor) as vector type to global (generic tensor).

The op is used to store C matrices, stored as vectors, back to global VMEM.
}];
}

// blockwise_gemm
def MIOpen_BlockwiseGemmOp:
MIOpen_Op<"blockwise_gemm">,
Expand Down
34 changes: 34 additions & 0 deletions mlir/lib/Dialect/MIOpen/MIOpenOps.cpp
Expand Up @@ -507,6 +507,40 @@ static LogicalResult verify(ThreadwiseCopyOp op) {
return success();
}

//===----------------------------------------------------------------------===//
// ThreadwiseCopyV2Op
//===----------------------------------------------------------------------===//

static ParseResult parseThreadwiseCopyV2Op(OpAsmParser &parser, OperationState &result) {
SmallVector<OpAsmParser::OperandType, 5> ops;
SmallVector<Type, 2> types;

auto ret = parser.parseOperandList(ops, OpAsmParser::Delimiter::Paren) ||
parser.parseOptionalAttrDict(result.attributes) ||
parser.parseColonTypeList(types) ||
parser.resolveOperand(ops[0], types[0], result.operands) ||
parser.resolveOperand(ops[1], types[1], result.operands);

// resolve source offset.
// resolve destination coordinates.
for (unsigned i = 2; i < ops.size(); ++i) {
ret &= succeeded(parser.resolveOperand(
ops[i], parser.getBuilder().getIntegerType(32), result.operands));
}
return failure(ret);
}

static void print(OpAsmPrinter &p, ThreadwiseCopyV2Op op) {
p << op.getOperationName() << "(" << op.getOperands() << ")";
p.printOptionalAttrDict(op.getAttrs());
p << " : " << op.getOperands()[0].getType() << ", "
<< op.getOperands()[1].getType();
}

static LogicalResult verify(ThreadwiseCopyV2Op op) {
return success();
}

//===----------------------------------------------------------------------===//
// MFMAOp
//===----------------------------------------------------------------------===//
Expand Down
35 changes: 35 additions & 0 deletions mlir/test/Dialect/MIOpen/ops_2.mlir
Expand Up @@ -192,6 +192,41 @@ func @miopen_threadwise_copy(%source_coord : memref<2xi32, 5>, %dest_coord : mem
// CHECK-LABEL: func @miopen_threadwise_copy
// CHECK: miopen.threadwise_copy

#map11 = affine_map<(d0, d1) -> (d1, d0, d1, d0)>

#map12 = affine_map<(d0, d1) -> (d1, d0 floordiv 9, (d0 mod 9) floordiv 3, (d0 mod 9) mod 3)>
#map13 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2 * 2 + d3, d4 * 2 + d5)>

func @miopen_threadwise_copy_v2(%source_coord : i32, %dest_coord : memref<2xi32, 5>,
%source : vector<32xf32>, %dest : memref<?x?xf32>,
%dest_with_embedded_affine : memref<?x?xf32, #map11>,
%dest_with_externally_defined_affine : memref<?x?x?x?xf32>) {
%c0 = constant 0 : index
%dest_coord_y = load %dest_coord[%c0] : memref<2xi32, 5>
%dest_coord_x = load %dest_coord[%c0] : memref<2xi32, 5>

// check dest as a vanilla memref.
miopen.threadwise_copy_v2(%source, %dest, %source_coord, %dest_coord_x, %dest_coord_y) : vector<32xf32>, memref<?x?xf32>

// -----

// check dest with embedded affine maps.
miopen.threadwise_copy_v2(%source, %dest_with_embedded_affine, %source_coord, %dest_coord_x, %dest_coord_y) : vector<32xf32>, memref<?x?xf32, #map11>

// -----

// check destination with one externally defined affine map.
miopen.threadwise_copy_v2(%source, %dest_with_externally_defined_affine, %source_coord, %dest_coord_x, %dest_coord_y) { coord_transforms = [ { operand = 1, transforms = [#map12] } ] } : vector<32xf32>, memref<?x?x?x?xf32>

// check destination with multiple externally defined affine map.
miopen.threadwise_copy_v2(%source, %dest_with_externally_defined_affine, %source_coord, %dest_coord_x, %dest_coord_y) { coord_transforms = [ { operand = 1, transforms = [#map12, #map13] } ] } : vector<32xf32>, memref<?x?x?x?xf32>

return
}

// CHECK-LABEL: func @miopen_threadwise_copy_v2
// CHECK: miopen.threadwise_copy_v2

func @miopen_threadwise_gemm(%lhs : memref<4x8xf32>, %rhs : memref<4x8xf32>, %output : memref<8x8xf32>) {
miopen.threadwise_gemm(%lhs, %rhs, %output) : memref<4x8xf32>, memref<4x8xf32>, memref<8x8xf32>
return
Expand Down

0 comments on commit a33ed62

Please sign in to comment.