-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[ROCDL] Added LDS barrier ops to ROCDL (gfx1250) #171810
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-llvm Author: Ravil Dorozhinskii (ravil-mobile) ChangesAdded Full diff: https://github.com/llvm/llvm-project/pull/171810.diff 3 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 57cb98a1d9be7..8d115c0fa2060 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -1177,25 +1177,82 @@ def ROCDL_RawBufferAtomicCmpSwap :
// Memory prefetch intrinsics
def ROCDL_GlobalPrefetchOp :
- ROCDL_IntrOp<"global.prefetch", [], [], [], 0, 0, 0, 0, [1], ["scope"]>,
- Arguments<(ins Arg<LLVM_PointerInAddressSpace<1>, "", []>:$ptr, I32Attr:$scope)> {
+ ROCDL_IntrOp<"global.prefetch", [], [], [], 0, 0, 1, 0, [1], ["scope"]> {
+ dag args = (ins Arg<LLVM_PointerInAddressSpace<1>, "", [MemRead]>:$ptr,
+ I32Attr:$scope);
+ let arguments = !con(args, baseArgs);
let description = [{
Prefetches 1 byte of data per lane from global memory into the WGP-cache or L2-cache.
Available on gfx1250+.
}];
let results = (outs);
let assemblyFormat = "$ptr `,` `scope` $scope attr-dict `:` qualified(type($ptr))";
+ let extraClassDefinition = [{
+ SmallVector<Value> $cppClass::getAccessedOperands() {
+ return {getPtr()};
+ }
+ }];
}
def ROCDL_FlatPrefetchOp :
- ROCDL_IntrOp<"flat.prefetch", [], [], [], 0, 0, 0, 0, [1], ["scope"]>,
+ ROCDL_IntrOp<"flat.prefetch", [], [], [], 0, 0, 1, 0, [1], ["scope"]>,
Arguments<(ins Arg<LLVM_PointerInAddressSpace<0>, "", []>:$ptr, I32Attr:$scope)> {
+ dag args = (ins Arg<LLVM_PointerInAddressSpace<0>, "", [MemRead]>:$ptr,
+ I32Attr:$scope);
+ let arguments = !con(args, baseArgs);
let description = [{
Prefetches 1 byte of data per lane using flat-memory addresses into the WGP-cache or L2-cache.
Available on gfx1250+.
}];
let results = (outs);
let assemblyFormat = "$ptr `,` `scope` $scope attr-dict `:` qualified(type($ptr))";
+ let extraClassDefinition = [{
+ SmallVector<Value> $cppClass::getAccessedOperands() {
+ return {getPtr()};
+ }
+ }];
+}
+
+//===---------------------------------------------------------------------===//
+// Atomic barrier intrinsic (LDS memory barriers).
+
+def ROCDL_DsAtomicBarrierArriveRtnOp :
+ ROCDL_IntrOp<"ds.atomic.barrier.arrive.rtn.b64", [], [], [], 1, 0, 1, 0, [], []> {
+ dag args = (ins Arg<ROCDLBufferLDS, "", [MemRead, MemWrite]>:$barrierPtr,
+ I64:$val);
+ let arguments = !con(args, baseArgs);
+ let description = [{
+ Waits on a given DS barrier and decrements its pending count by a given value. Note, the barrier state
+ is given as a 64-bit structure containing pending count, phase and init count. The op returns the old
+ barrier state. The op is executed as an ordinary LDS operations and it is ordered with other LDS operations.
+ Thus, check DSCNT to determine when this instruction has executed.
+ Available on gfx1250+.
+ }];
+ let results = (outs I64:$res);
+ let assemblyFormat = "$barrierPtr `,` $val attr-dict `:` qualified(type($barrierPtr)) `,` qualified(type($val)) `->` qualified(type($res))";
+ let extraClassDefinition = [{
+ SmallVector<Value> $cppClass::getAccessedOperands() {
+ return {getBarrierPtr()};
+ }
+ }];
+}
+
+def ROCDL_DsAtomicAsyncBarrierArriveOp :
+ ROCDL_IntrOp<"ds.atomic.async.barrier.arrive.b64", [], [], [], 0, 0, 1, 0, [], []> {
+ dag args = (ins Arg<ROCDLBufferLDS, "", [MemWrite]>:$barrierPtr);
+ let arguments = !con(args, baseArgs);
+ let description = [{
+ Waits on a given DS barrier and decrements pending count by -1.
+ Stays in order with ASYNC loads to LDS, and uses ASYNCcnt to track its completion.
+ Available on gfx1250+.
+ }];
+ let results = (outs);
+ let assemblyFormat = "$barrierPtr attr-dict `:` qualified(type($barrierPtr))";
+ let extraClassDefinition = [{
+ SmallVector<Value> $cppClass::getAccessedOperands() {
+ return {getBarrierPtr()};
+ }
+ }];
}
//===---------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index ae25b111ea325..b33d41c0a8f03 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -892,6 +892,15 @@ llvm.func @rocdl.flat.prefetch(%ptr : !llvm.ptr) {
llvm.return
}
+llvm.func @rocdl.atomic.barriers.arrive(%ptr : !llvm.ptr<3>, %data : i64) {
+ // CHECK-LABEL: rocdl.atomic.barriers.arrive
+ // CHECK: rocdl.ds.atomic.async.barrier.arrive.b64 %{{.*}} : !llvm.ptr<3>
+ // CHECK: %{{.*}} = rocdl.ds.atomic.barrier.arrive.rtn.b64 %{{.*}}, %{{.*}} : !llvm.ptr<3>, i64 -> i64
+ rocdl.ds.atomic.async.barrier.arrive.b64 %ptr : !llvm.ptr<3>
+ %res = rocdl.ds.atomic.barrier.arrive.rtn.b64 %ptr, %data : !llvm.ptr<3>, i64 -> i64
+ llvm.return
+}
+
// -----
llvm.func @rocdl.raw.buffer.f32(%rsrc : vector<4xi32>,
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 87faed16aa59c..5e80dd2edecb3 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -1355,6 +1355,15 @@ llvm.func @rocdl.flat.prefetch(%ptr : !llvm.ptr) {
llvm.return
}
+llvm.func @rocdl.atomic.barriers.arrive(%ptr : !llvm.ptr<3>, %data : i64) {
+ // CHECK-LABEL: rocdl.atomic.barriers.arrive
+ // CHECK: call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) %{{.*}})
+ // CHECK: %{{.*}} = call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) %{{.*}}, i64 %{{.*}})
+ rocdl.ds.atomic.async.barrier.arrive.b64 %ptr : !llvm.ptr<3>
+ %res = rocdl.ds.atomic.barrier.arrive.rtn.b64 %ptr, %data : !llvm.ptr<3>, i64 -> i64
+ llvm.return
+}
+
llvm.func @rocdl.wmma.scale(%arg0: i32, %arg1: vector<4xf32>, %arg2: vector<8xi32>,
%arg3: vector<12xi32>, %arg5: vector<16xi32>,
%arg8: i64, %arg9: vector<8xf32>) -> vector<4xf32> {
|
648f81c to
5eef2c3
Compare
| Available on gfx1250+. | ||
| }]; | ||
| let results = (outs I64:$res); | ||
| let assemblyFormat = "$barrierPtr `,` $val attr-dict `:` qualified(type($barrierPtr)) `,` qualified(type($val)) `->` qualified(type($res))"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the result type doesn't have to be qualified, no? It's an int type. Also the val operand
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are absolutely right. int64 is a basic built-in type. If we say qualified(...) we are requesting the printer to include the dialect namespace of the type. In this case, qualified only makes sense of llvm pointer type.
Adjusted, done.
krzysz00
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Aside from Jakub's existing note abut qualified, LGTM
5eef2c3 to
810cdca
Compare
This picks up several ROCDL changes: llvm/llvm-project#171810 llvm/llvm-project#171449 llvm/llvm-project#169672
This picks up several ROCDL changes: llvm/llvm-project#171810 llvm/llvm-project#171449 llvm/llvm-project#169672
Added
ds.atomic.barrier.arrive.rtn.b64andds.atomic.async.barrier.arrive.b64to ROCDL. These are parts of the LDS memory barrier concept in GFX1250. Also added alias analysis toglobal/flatdata prefetch ops. Extended rocdl tests.