|
| 1 | +//===-- NVGPU.td - NVGPU dialect operation definitions *- tablegen -*------===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +// This file defines the basic operations for the NVGPU dialect. |
| 10 | +// |
| 11 | +// This NVGPU provides a bridge between the target agnostic GPU and Vector |
| 12 | +// dialects and lower level NVVM dialect. This allow representing PTX specific |
| 13 | +// operations while using MLIR high level concepts like memref and 2-D vector. |
| 14 | +// |
| 15 | +// Ops semantic are going to be based on vendor specific PTX defintion: |
| 16 | +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html |
| 17 | +// |
| 18 | +//===----------------------------------------------------------------------===// |
| 19 | + |
| 20 | +#ifndef NVGPU |
| 21 | +#define NVGPU |
| 22 | + |
| 23 | +include "mlir/Interfaces/SideEffectInterfaces.td" |
| 24 | +include "mlir/IR/OpBase.td" |
| 25 | + |
| 26 | +def NVGPU_Dialect : Dialect { |
| 27 | + let name = "nvgpu"; |
| 28 | + let cppNamespace = "::mlir::nvgpu"; |
| 29 | + let description = [{ |
| 30 | + This `NVGPU` dialect provides a bridge between the target agnostic GPU and |
| 31 | + Vector dialects and the lower level LLVM IR based NVVM dialect. This allow |
| 32 | + representing PTX specific operations while using MLIR high level concepts |
| 33 | + like memref and 2-D vector. |
| 34 | + }]; |
| 35 | +} |
| 36 | + |
| 37 | +//===----------------------------------------------------------------------===// |
| 38 | +// NVGPU Op definitions |
| 39 | +//===----------------------------------------------------------------------===// |
| 40 | + |
| 41 | +class NVGPU_Op<string mnemonic, list<Trait> traits = []> : |
| 42 | + Op<NVGPU_Dialect, mnemonic, traits> {} |
| 43 | + |
| 44 | +def NVGPU_LdMatrixOp : NVGPU_Op<"ldmatrix", |
| 45 | + [MemoryEffects<[MemRead]>]> { |
| 46 | + let description = [{ |
| 47 | + The `nvgpu.ldmatrix` op represents loading a matrix fragment from |
| 48 | + memory. The load source and result type must be compatible with lowering |
| 49 | + to the `nvvm.ldmatrix` instruction. This op is meant to represent |
| 50 | + the distributed version of a `vector.transfer_read` as an intermediate |
| 51 | + step between lowering from `vector.transfer_read` to `nvvm.ldmatrix`. |
| 52 | + |
| 53 | + This operation is meant to follow the semantic of described here: |
| 54 | + https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-ldmatrix |
| 55 | + |
| 56 | + Example: |
| 57 | + ```mlir |
| 58 | + %0 = nvgpu.ldmatrix %sm[%c0, %c0] {numTiles = 4 : i32, transpose = false} : |
| 59 | + memref<?x?xf16, 3> -> vector<4x2xf16> |
| 60 | + ``` |
| 61 | + }]; |
| 62 | + |
| 63 | + let arguments = (ins Arg<AnyMemRef, "", [MemRead]>:$srcMemref, |
| 64 | + Variadic<Index>:$indices, BoolAttr:$transpose, |
| 65 | + I32Attr:$numTiles); |
| 66 | + let results = (outs AnyVector:$res); |
| 67 | + let assemblyFormat = [{ |
| 68 | + $srcMemref`[` $indices `]` attr-dict `:` type($srcMemref) `->` type($res) |
| 69 | + }]; |
| 70 | +} |
| 71 | + |
| 72 | +#endif // NVGPU |
0 commit comments