diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 759e3a65dd24f..8d00272b09f42 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -454,6 +454,8 @@ struct DeclareOpConversion : public mlir::OpRewritePattern { mlir::LogicalResult matchAndRewrite(fir::DeclareOp op, mlir::PatternRewriter &rewriter) const override { + if (op.getResult().getUsers().empty()) + return success(); if (auto addrOfOp = op.getMemref().getDefiningOp()) { if (auto global = symTab.lookup( addrOfOp.getSymbol().getRootReference().getValue())) { @@ -963,6 +965,8 @@ class CUFOpConversion : public fir::impl::CUFOpConversionBase { } target.addDynamicallyLegalOp([&](fir::DeclareOp op) { + if (op.getResult().getUsers().empty()) + return true; if (inDeviceContext(op)) return true; if (auto addrOfOp = op.getMemref().getDefiningOp()) { diff --git a/flang/test/Fir/CUDA/cuda-global-addr.mlir b/flang/test/Fir/CUDA/cuda-global-addr.mlir index 3e50c7a51f49c..6f7816c9163cb 100644 --- a/flang/test/Fir/CUDA/cuda-global-addr.mlir +++ b/flang/test/Fir/CUDA/cuda-global-addr.mlir @@ -63,6 +63,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : // We cannot call _FortranACUFGetDeviceAddress on a constant global. // There is no symbol for it and the call would result into an unresolved reference. +// CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "arraysize"} // CHECK-NOT: fir.call {{.*}}GetDeviceAddress // ----- @@ -90,3 +91,22 @@ func.func @_QQmain() attributes {fir.bindc_name = "test"} { // CHECK-NOT: fir.call {{.*}}GetDeviceAddress } + +// ----- + +// Check that we do not introduce call to _FortranACUFGetDeviceAddress when the +// value has no user. + +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry, dense<64> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} { + func.func @_QQmain() attributes {fir.bindc_name = "T"} { + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.address_of(@_QMcon2Ezzz) : !fir.ref + %2 = fir.declare %1 {data_attr = #cuf.cuda, uniq_name = "_QMcon2Ezzz"} : (!fir.ref) -> !fir.ref + return + } + fir.global @_QMcon2Ezzz {data_attr = #cuf.cuda} : i32 +} + +// CHECK-LABEL: func.func @_QQmain() +// CHECK: fir.address_of(@_QMcon2Ezzz) : !fir.ref +// CHECK-NOT: fir.call {{.*}}GetDeviceAddress