diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h index d3af3bafbf279..2ce0d86d0213e 100644 --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -372,6 +372,8 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener { return createCommonLinkage(getContext()); } + mlir::StringAttr createExternalLinkage() { return getStringAttr("external"); } + mlir::StringAttr createInternalLinkage() { return getStringAttr("internal"); } mlir::StringAttr createLinkOnceLinkage() { return getStringAttr("linkonce"); } diff --git a/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp b/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp index 6e04c71c41606..09126e047d382 100644 --- a/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp +++ b/flang/lib/Optimizer/Transforms/CUFComputeSharedMemoryOffsetsAndSize.cpp @@ -143,7 +143,11 @@ struct CUFComputeSharedMemoryOffsetsAndSize auto sharedMemType = fir::SequenceType::get(sharedMemSize, i8Ty); std::string sharedMemGlobalName = (funcOp.getName() + llvm::Twine(cudaSharedMemSuffix)).str(); - mlir::StringAttr linkage = builder.createInternalLinkage(); + // Dynamic shared memory needs an external linkage while static shared + // memory needs an internal linkage. + mlir::StringAttr linkage = nbDynamicSharedVariables > 0 + ? builder.createExternalLinkage() + : builder.createInternalLinkage(); builder.setInsertionPointToEnd(gpuMod.getBody()); llvm::SmallVector attrs; auto globalOpName = mlir::OperationName(fir::GlobalOp::getOperationName(), diff --git a/flang/test/Fir/CUDA/cuda-shared-offset.mlir b/flang/test/Fir/CUDA/cuda-shared-offset.mlir index 29316c90e5281..9c057d024426a 100644 --- a/flang/test/Fir/CUDA/cuda-shared-offset.mlir +++ b/flang/test/Fir/CUDA/cuda-shared-offset.mlir @@ -17,7 +17,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry, %c-1 : index {bindc_name = "r", uniq_name = "_QFdynsharedEr"} -> !fir.ref> // CHECK: gpu.return // CHECK: } -// CHECK: fir.global internal @_QPdynshared__shared_mem {alignment = 4 : i64, data_attr = #cuf.cuda} : !fir.array<0xi8> +// CHECK: fir.global external @_QPdynshared__shared_mem {alignment = 4 : i64, data_attr = #cuf.cuda} : !fir.array<0xi8> // ----- @@ -158,3 +158,5 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry, %c-1{{.*}} : index {bindc_name = "dmasks", uniq_name = "_QMmtestsFtestanyEdmasks"} -> !fir.ref> // CHECK: %{{.*}} = cuf.shared_memory[%c0{{.*}} : i32] !fir.array, %c-1{{.*}} : index {bindc_name = "smasks", uniq_name = "_QMmtestsFtestanyEsmasks"} -> !fir.ref> + +// CHECK: fir.global external @_QMmtestsPtestany__shared_mem {alignment = 8 : i64, data_attr = #cuf.cuda} : !fir.array<0xi8>