diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index 5241f9a6f2b43..921fdf36a59b0 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -692,6 +692,38 @@ def ROCDL_GlobalLoadLDSOp : }]; } +//===---------------------------------------------------------------------===// +// Async load to LDS intrinsic (available in GFX1250) +//===---------------------------------------------------------------------===// + +foreach bitsVal = [8, 32, 64, 128] in { + defvar bitsStr = "b" # !cast(bitsVal); + def ROCDL_GlobalLoadAsyncToLDS # !toupper(bitsStr) # Op : + ROCDL_IntrOp<"global.load.async.to.lds." # bitsStr, [], [], [], 0, 0, 1, 0, [2, 3], ["offset", "aux"]> { + dag args = (ins Arg:$globalPtr, + Arg:$ldsPtr, + I32Attr:$offset, + I32Attr:$aux); + let arguments = !con(args, baseArgs); + let assemblyFormat = [{ + $globalPtr `,` $ldsPtr `,` $offset `,` $aux + attr-dict `:` type($globalPtr) `,` type($ldsPtr) + }]; + let description = [{ + Asynchronously loads }] # !cast(bitsVal) # [{ bits of data from a global memory pointer + to a Local Data Share (LDS) pointer. + + Available on gfx1250+. + }]; + + let extraClassDefinition = [{ + ::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() { + return {getGlobalPtr(), getLdsPtr()}; + } + }]; + } +} + //===---------------------------------------------------------------------===// // Tensor load/store intrinsics (available in GFX1250) //===---------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir index e703600c71c8e..5e857599b65ea 100644 --- a/mlir/test/Dialect/LLVMIR/rocdl.mlir +++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir @@ -664,6 +664,19 @@ llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) { llvm.return } +llvm.func @rocdl.global.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) { + // CHECK-LABEL @rocdl.global.load.async.to.lds + // CHECK: rocdl.global.load.async.to.lds.b8 %{{.*}}, %{{.*}}, 0, 0 + // CHECK: rocdl.global.load.async.to.lds.b32 %{{.*}}, %{{.*}}, 0, 0 + // CHECK: rocdl.global.load.async.to.lds.b64 %{{.*}}, %{{.*}}, 0, 0 + // CHECK: rocdl.global.load.async.to.lds.b128 %{{.*}}, %{{.*}}, 0, 0 + rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : <1>, <3> + rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : <1>, <3> + rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : <1>, <3> + rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : <1>, <3> + llvm.return +} + // CHECK-LABEL @rocdl.tensor.load.to.lds llvm.func @rocdl.tensor.load.to.lds(%dgroup0 : vector<4xi32>, %dgroup1 : vector<8xi32>, %dgroup2 : vector<4xi32>, %dgroup3 : vector<4xi32>) { diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir index 8a848221a50dd..3fbd9e0567948 100644 --- a/mlir/test/Target/LLVMIR/rocdl.mlir +++ b/mlir/test/Target/LLVMIR/rocdl.mlir @@ -1040,6 +1040,19 @@ llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) { llvm.return } +// CHECK-LABEL: rocdl.global.load.async.to.lds +llvm.func @rocdl.global.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) { + // CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b8 + rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3> + // CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b32 + rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3> + // CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b64 + rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3> + // CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b128 + rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3> + llvm.return +} + // CHECK-LABEL: rocdl.tensor.load.to.lds llvm.func @rocdl.tensor.load.to.lds(%dgroup0 : vector<4xi32>, %dgroup1 : vector<8xi32>, %dgroup2 : vector<4xi32>, %dgroup3 : vector<4xi32>) {