diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 377f1febf6b8f..6922b29115078 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -2224,4 +2224,34 @@ def WorkdistributeOp : OpenMP_Op<"workdistribute"> { let assemblyFormat = "$region attr-dict"; } +//===----------------------------------------------------------------------===// +// [6.0] groupprivate Directive +//===----------------------------------------------------------------------===// + +def GroupprivateOp : OpenMP_Op<"groupprivate", + [AllTypesMatch<["sym_addr", "gp_addr"]>]> { + let summary = "groupprivate directive"; + let description = [{ + The groupprivate directive specifies that variables are replicated, with + each group having its own copy. + + This operation takes in the address of a symbol that represents the original + variable, optional DeviceTypeAttr and returns the address of its groupprivate copy. + All occurrences of groupprivate variables in a parallel region should + use the groupprivate copy returned by this operation. + + The `sym_addr` refers to the address of the symbol, which is a pointer to + the original variable. + }]; + + let arguments = (ins + OpenMP_PointerLikeType:$sym_addr, + OptionalAttr:$device_type + ); + let results = (outs OpenMP_PointerLikeType:$gp_addr); + let assemblyFormat = [{ + $sym_addr `:` type($sym_addr) ( `,` `device_type` $device_type^ )? `->` type($gp_addr) attr-dict + }]; +} + #endif // OPENMP_OPS diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index f28454075f1d3..d7c994cd3ad8a 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -6030,7 +6030,7 @@ static bool isTargetDeviceOp(Operation *op) { // by taking it in as an operand, so we must always lower these in // some manner or result in an ICE (whether they end up in a no-op // or otherwise). - if (mlir::isa(op)) + if (mlir::isa(op)) return true; if (mlir::isa(op) || @@ -6128,6 +6128,74 @@ convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder, return success(); } +/// Converts an OpenMP Groupprivate operation into LLVM IR. +static LogicalResult +convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + auto groupprivateOp = cast(opInst); + + if (failed(checkImplementationStatus(opInst))) + return failure(); + + bool isTargetDevice = ompBuilder->Config.isTargetDevice(); + auto deviceType = groupprivateOp.getDeviceType(); + + // skip allocation based on device_type + bool shouldAllocate = true; + if (deviceType.has_value()) { + switch (*deviceType) { + case mlir::omp::DeclareTargetDeviceType::host: + // Only allocate on host + shouldAllocate = !isTargetDevice; + break; + case mlir::omp::DeclareTargetDeviceType::nohost: + // Only allocate on device + shouldAllocate = isTargetDevice; + break; + case mlir::omp::DeclareTargetDeviceType::any: + // Allocate on both + shouldAllocate = true; + break; + } + } + + Value symAddr = groupprivateOp.getSymAddr(); + auto *symOp = symAddr.getDefiningOp(); + + if (auto asCast = dyn_cast(symOp)) + symOp = asCast.getOperand().getDefiningOp(); + + if (!isa(symOp)) + return opInst.emitError("Addressing symbol not found"); + LLVM::AddressOfOp addressOfOp = dyn_cast(symOp); + + LLVM::GlobalOp global = + addressOfOp.getGlobal(moduleTranslation.symbolTable()); + llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global); + llvm::Value *resultPtr; + + if (shouldAllocate) { + // Get the size of the variable + llvm::Type *varType = globalValue->getValueType(); + llvm::Module *llvmModule = moduleTranslation.getLLVMModule(); + llvm::DataLayout DL = llvmModule->getDataLayout(); + uint64_t typeSize = DL.getTypeAllocSize(varType); + // Call omp_alloc_shared to allocate memory for groupprivate variable. + llvm::FunctionCallee allocSharedFn = ompBuilder->getOrCreateRuntimeFunction( + *llvmModule, llvm::omp::OMPRTL___kmpc_alloc_shared); + // Call runtime to allocate shared memory for this group + resultPtr = builder.CreateCall(allocSharedFn, {builder.getInt64(typeSize)}); + } else { + // Use original global address when not allocating group-private storage + resultPtr = moduleTranslation.lookupValue(symAddr); + if (!resultPtr) + resultPtr = globalValue; + } + moduleTranslation.mapValue(opInst.getResult(0), resultPtr); + return success(); +} + /// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including /// OpenMP runtime calls). static LogicalResult @@ -6311,6 +6379,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, .Case([&](omp::TargetFreeMemOp) { return convertTargetFreeMemOp(*op, builder, moduleTranslation); }) + .Case([&](omp::GroupprivateOp) { + return convertOmpGroupprivate(*op, builder, moduleTranslation); + }) .Default([&](Operation *inst) { return inst->emitError() << "not yet implemented: " << inst->getName(); diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index ac29e20907b55..1cdcec3395aa5 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -3367,3 +3367,39 @@ func.func @omp_target_map_clause_type_test(%arg0 : memref) -> () { return } + +// CHECK-LABEL: func.func @omp_groupprivate_device_type +func.func @omp_groupprivate_device_type() { + %0 = arith.constant 1 : i32 + %1 = arith.constant 2 : i32 + // CHECK: [[ARG0:%.*]] = llvm.mlir.addressof @gp : !llvm.ptr + %gp_addr = llvm.mlir.addressof @gp : !llvm.ptr + // CHECK: [[ARG1:%.*]] = llvm.mlir.addressof @any : !llvm.ptr + %any_addr = llvm.mlir.addressof @any : !llvm.ptr + // CHECK: [[ARG2:%.*]] = llvm.mlir.addressof @host : !llvm.ptr + %host_addr = llvm.mlir.addressof @host : !llvm.ptr + // CHECK: [[ARG3:%.*]] = llvm.mlir.addressof @nohost : !llvm.ptr + %nohost_addr = llvm.mlir.addressof @nohost : !llvm.ptr + + // CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr -> !llvm.ptr + %group_private_addr = omp.groupprivate %gp_addr : !llvm.ptr -> !llvm.ptr + + // CHECK: {{.*}} = omp.groupprivate [[ARG1]] : !llvm.ptr, device_type (any) -> !llvm.ptr + %group_private_any = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr + llvm.store %1, %group_private_any : i32, !llvm.ptr + + // CHECK: {{.*}} = omp.groupprivate [[ARG2]] : !llvm.ptr, device_type (host) -> !llvm.ptr + %group_private_host = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr + llvm.store %1, %group_private_host : i32, !llvm.ptr + + // CHECK: {{.*}} = omp.groupprivate [[ARG3]] : !llvm.ptr, device_type (nohost) -> !llvm.ptr + %group_private_nohost = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr + llvm.store %1, %group_private_nohost : i32, !llvm.ptr + + return +} + +llvm.mlir.global internal @gp() : i32 +llvm.mlir.global internal @any() : i32 +llvm.mlir.global internal @host() : i32 +llvm.mlir.global internal @nohost() : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir new file mode 100644 index 0000000000000..46e9639adcc06 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir @@ -0,0 +1,41 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd-amdhsa", + dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>} { + llvm.func @_QQmain() attributes {fir.bindc_name = "main"} { + + %ga = llvm.mlir.addressof @global_a : !llvm.ptr + %map_a = omp.map.info var_ptr(%ga : !llvm.ptr, i32) map_clauses(tofrom) capture(ByCopy) -> !llvm.ptr {name = "i"} + omp.target map_entries(%map_a -> %arg1 : !llvm.ptr) { + %loaded = llvm.load %arg1 : !llvm.ptr -> i32 + + %any_addr = llvm.mlir.addressof @global_any : !llvm.ptr + %any_gp = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr + llvm.store %loaded, %any_gp : i32, !llvm.ptr + + %host_addr = llvm.mlir.addressof @global_host : !llvm.ptr + %host_gp = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr + llvm.store %loaded, %host_gp : i32, !llvm.ptr + + %nohost_addr = llvm.mlir.addressof @global_nohost : !llvm.ptr + %nohost_gp = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr + llvm.store %loaded, %nohost_gp : i32, !llvm.ptr + + omp.terminator + } + llvm.return + } + llvm.mlir.global internal @global_a() : i32 + llvm.mlir.global internal @global_any() : i32 + llvm.mlir.global internal @global_host() : i32 + llvm.mlir.global internal @global_nohost() : i32 +} + +// CHECK: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %{{.*}}) #{{[0-9]+}} { +// CHECK-LABEL: omp.target: +// CHECK-NEXT : %[[LOAD:.*]] = load i32, ptr %3, align 4 +// CHECK-NEXT : %[[ALLOC_any:.*]] = call ptr @__kmpc_alloc_shared(i64 4) +// CHECK-NEXT : store i32 %[[LOAD]], ptr %[[ALLOC_any]], align 4 +// CHECK-NEXT : store i32 %[[LOAD]], ptr @global_host, align 4 +// CHECK-NEXT : %[[ALLOC_NOHOST:.*]] = call ptr @__kmpc_alloc_shared(i64 4) +// CHECK-NEXT : store i32 %[[LOAD]], ptr %[[ALLOC_NOHOST]], align 4 diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 8bd33a382197e..339b1d18942c2 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -3449,3 +3449,104 @@ llvm.func @nested_task_with_deps() { // CHECK: ret void // CHECK: } + +// ----- + +module attributes {omp.is_target_device = false} { +llvm.mlir.global internal @any() : i32 +llvm.mlir.global internal @host() : i32 +llvm.mlir.global internal @nohost() : i32 +llvm.func @omp_groupprivate_host() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.addressof @any : !llvm.ptr + %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr + llvm.store %0, %2 : i32, !llvm.ptr + + %3 = llvm.mlir.addressof @host : !llvm.ptr + %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr + llvm.store %0, %4 : i32, !llvm.ptr + + %5 = llvm.mlir.addressof @nohost : !llvm.ptr + %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr + llvm.store %0, %6 : i32, !llvm.ptr + llvm.return +} +} + +// CHECK: @any = internal global i32 undef +// CHECK: @host = internal global i32 undef +// CHECK: @nohost = internal global i32 undef +// CHECK-LABEL: @omp_groupprivate_host +// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) +// CHECK: store i32 1, ptr [[TMP1]], align 4 +// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) +// CHECK: store i32 1, ptr [[TMP2]], align 4 +// CHECK: store i32 1, ptr @nohost, align 4 + +// ----- + +module attributes {omp.is_target_device = true} { +llvm.mlir.global internal @any() : i32 +llvm.mlir.global internal @host() : i32 +llvm.mlir.global internal @nohost() : i32 +llvm.func @omp_groupprivate_device() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.addressof @any : !llvm.ptr + %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr + llvm.store %0, %2 : i32, !llvm.ptr + + %3 = llvm.mlir.addressof @host : !llvm.ptr + %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr + llvm.store %0, %4 : i32, !llvm.ptr + + %5 = llvm.mlir.addressof @nohost : !llvm.ptr + %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr + llvm.store %0, %6 : i32, !llvm.ptr + llvm.return +} +} + +// CHECK: @any = internal global i32 undef +// CHECK: @host = internal global i32 undef +// CHECK: @nohost = internal global i32 undef +// CHECK-LABEL: @omp_groupprivate_device +// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) +// CHECK: store i32 1, ptr [[TMP1]], align 4 +// CHECK: store i32 1, ptr @host, align 4 +// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) +// CHECK: store i32 1, ptr [[TMP2]], align 4 + +// ----- + +module attributes {omp.is_target_device = false} { +llvm.mlir.global internal @any1() : i32 +llvm.mlir.global internal @host1() : i32 +llvm.mlir.global internal @nohost1() : i32 +llvm.func @omp_groupprivate_host() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.addressof @any1 : !llvm.ptr + %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr + llvm.store %0, %2 : i32, !llvm.ptr + + %3 = llvm.mlir.addressof @host1 : !llvm.ptr + %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr + llvm.store %0, %4 : i32, !llvm.ptr + + %5 = llvm.mlir.addressof @nohost1 : !llvm.ptr + %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr + llvm.store %0, %6 : i32, !llvm.ptr + llvm.return +} +} + +// CHECK: @any1 = internal global i32 undef +// CHECK: @host1 = internal global i32 undef +// CHECK: @nohost1 = internal global i32 undef +// CHECK-LABEL: @omp_groupprivate_host +// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) +// CHECK: store i32 1, ptr [[TMP1]], align 4 +// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) +// CHECK: store i32 1, ptr [[TMP2]], align 4 +// CHECK: store i32 1, ptr @nohost1, align 4 + +// -----