-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[flang][cuda] Add fir.deallocate operation #88839
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesAdd the fir.cuda_deallocate operation that perform device deallocation of data hold by a descriptor. This will replace the call to AllocatableDeallocate from the runtime. This is a companion operation to the one added in #88586 Full diff: https://github.com/llvm/llvm-project/pull/88839.diff 4 Files Affected:
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index c181c7ed62dff3..0d7f45eb6255e9 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -3222,4 +3222,29 @@ def fir_CUDAAllocateOp : fir_Op<"cuda_allocate", [AttrSizedOperandSegments,
let hasVerifier = 1;
}
+def fir_CUDADeallocateOp : fir_Op<"cuda_deallocate",
+ [MemoryEffects<[MemFree<DefaultResource>]>]> {
+ let summary = "Perform the device deallocation of data of an allocatable";
+
+ let description = [{
+ The fir.cuda_deallocate operation performs the deallocation on the device
+ of the data of an allocatable.
+ }];
+
+ let arguments = (ins Arg<AnyRefOrBoxType, "", [MemWrite]>:$box,
+ Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
+ fir_CUDADataAttributeAttr:$cuda_attr,
+ UnitAttr:$hasStat);
+
+ let results = (outs AnyIntegerType:$stat);
+
+ let assemblyFormat = [{
+ $box `:` qualified(type($box))
+ ( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
+ attr-dict `->` type($stat)
+ }];
+
+ let hasVerifier = 1;
+}
+
#endif
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 88710880174d21..927e167a9a8b66 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -4012,6 +4012,19 @@ mlir::LogicalResult fir::CUDAAllocateOp::verify() {
return mlir::success();
}
+mlir::LogicalResult fir::CUDADeallocateOp::verify() {
+ if (!fir::unwrapRefType(getBox().getType()).isa<fir::BaseBoxType>())
+ return emitOpError(
+ "expect box to be a reference to/or a class or box type value");
+ if (getErrmsg() &&
+ !fir::unwrapRefType(getErrmsg().getType()).isa<fir::BoxType>())
+ return emitOpError(
+ "expect errmsg to be a reference to/or a box type value");
+ if (getErrmsg() && !getHasStat())
+ return emitOpError("expect stat attribute when errmsg is provided");
+ return mlir::success();
+}
+
//===----------------------------------------------------------------------===//
// FIROpsDialect
//===----------------------------------------------------------------------===//
diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir
index 9c5ffe7176a3bd..ee31515134c469 100644
--- a/flang/test/Fir/cuf-invalid.fir
+++ b/flang/test/Fir/cuf-invalid.fir
@@ -48,3 +48,40 @@ func.func @_QPsub1() {
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
return
}
+
+// -----
+
+func.func @_QPsub1() {
+ %1 = fir.alloca i32
+ // expected-error@+1{{'fir.cuda_deallocate' op expect box to be a reference to/or a class or box type value}}
+ %2 = fir.cuda_deallocate %1 : !fir.ref<i32> {cuda_attr = #fir.cuda<device>} -> i32
+ return
+}
+
+// -----
+
+func.func @_QPsub1() {
+ %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
+ %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %1 = fir.alloca i32
+ %11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
+ // expected-error@+1{{'fir.cuda_deallocate' op expect errmsg to be a reference to/or a box type value}}
+ %13 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+ return
+}
+
+// -----
+
+func.func @_QPsub1() {
+ %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
+ %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %c100 = arith.constant 100 : index
+ %7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"}
+ %8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref<!fir.char<1,100>>, index) -> (!fir.ref<!fir.char<1,100>>, !fir.ref<!fir.char<1,100>>)
+ %9 = fir.embox %8#1 : (!fir.ref<!fir.char<1,100>>) -> !fir.box<!fir.char<1,100>>
+ %11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
+ %16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
+ // expected-error@+1{{'fir.cuda_deallocate' op expect stat attribute when errmsg is provided}}
+ %13 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>} -> i32
+ return
+}
diff --git a/flang/test/Fir/cuf.mlir b/flang/test/Fir/cuf.mlir
index 67eff31b35b2b8..71f0652067facf 100644
--- a/flang/test/Fir/cuf.mlir
+++ b/flang/test/Fir/cuf.mlir
@@ -7,10 +7,12 @@ func.func @_QPsub1() {
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
+ %14 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
return
}
// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
+// CHECK: fir.cuda_deallocate %{{.*}} : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
// -----
@@ -66,5 +68,9 @@ func.func @_QPsub1() {
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+ %14 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
return
}
+
+// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+// CHECK: fir.cuda_deallocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, small nit about the effects inlined.
Add the fir.cuda_deallocate operation that perform device deallocation of data hold by a descriptor. This will replace the call to AllocatableDeallocate from the runtime.
This is a companion operation to the one added in #88586