WIP : [Flang][MLIR][OpenMP] Delayed privatisation of index variables #75836

kiranchandramohan · 2023-12-18T17:56:13Z

Not for merge.

This patch is a WIP to delay privatisation of index variables.

github-actions · 2023-12-18T17:59:31Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff decf0277a7f2c69684a44e80f7791038cfaed82d 2d6e1c49fa9687c2ac649aa3416ce04e9f6fb7ba -- flang/lib/Lower/OpenMP.cpp mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

View the diff from clang-format here.

diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index c5c9859dfd..57983d60e5 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -2139,11 +2139,11 @@ static void createBodyOfLoopOp(
   int argIndex = 0;
   for (const Fortran::semantics::Symbol *arg : args) {
     mlir::Value addrVal =
-          fir::getBase(op.getRegion().front().getArgument(argIndex+offset));
+        fir::getBase(op.getRegion().front().getArgument(argIndex + offset));
     converter.bindSymbol(*arg, addrVal);
     mlir::Type symType = converter.genType(*arg);
     mlir::Value indexVal =
-          fir::getBase(op.getRegion().front().getArgument(argIndex));
+        fir::getBase(op.getRegion().front().getArgument(argIndex));
     mlir::Value cvtVal = firOpBuilder.createConvert(loc, symType, indexVal);
     addrVal = converter.getSymbolAddress(*arg);
     storeOp = firOpBuilder.create<fir::StoreOp>(loc, cvtVal, addrVal);
@@ -3121,7 +3121,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
     return;
   }
 
-    // Collect the loops to collapse.
+  // Collect the loops to collapse.
   Fortran::lower::pft::Evaluation *doConstructEval =
       &eval.getFirstNestedEvaluation();
   Fortran::lower::pft::Evaluation *doLoop =
@@ -3174,8 +3174,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
   }
 
   createBodyOfLoopOp<mlir::omp::WsLoopOp>(wsLoopOp, converter, currentLocation,
-                                      eval, &loopOpClauseList, iv,
-                                      /*outer=*/false, &dsp);
+                                          eval, &loopOpClauseList, iv,
+                                          /*outer=*/false, &dsp);
 }
 
 static void
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 3da37231e3..18dbbd572e 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -178,10 +178,10 @@ void printClauseAttr(OpAsmPrinter &p, Operation *op, ClauseAttr attr) {
   p << stringifyEnum(attr.getValue());
 }
 
-static ParseResult
-parsePrivateEntries(OpAsmParser &parser,
-                SmallVectorImpl<OpAsmParser::UnresolvedOperand> &privateOperands,
-                SmallVectorImpl<Type> &privateOperandTypes) {
+static ParseResult parsePrivateEntries(
+    OpAsmParser &parser,
+    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &privateOperands,
+    SmallVectorImpl<Type> &privateOperandTypes) {
   OpAsmParser::UnresolvedOperand arg;
   OpAsmParser::UnresolvedOperand blockArg;
   Type argType;
@@ -213,8 +213,8 @@ parsePrivateEntries(OpAsmParser &parser,
 }
 
 static void printPrivateEntries(OpAsmPrinter &p, Operation *op,
-                            OperandRange privateOperands,
-                            TypeRange privateOperandTypes) {
+                                OperandRange privateOperands,
+                                TypeRange privateOperandTypes) {
   auto &region = op->getRegion(0);
 
   unsigned argIndex = 0;
@@ -222,7 +222,7 @@ static void printPrivateEntries(OpAsmPrinter &p, Operation *op,
   if (auto wsLoop = dyn_cast<WsLoopOp>(op))
     offset = wsLoop.getNumLoops();
   for (const auto &privOperand : privateOperands) {
-    const auto &blockArg = region.front().getArgument(argIndex+offset);
+    const auto &blockArg = region.front().getArgument(argIndex + offset);
     p << privOperand << " -> " << blockArg;
     argIndex++;
     if (argIndex < privateOperands.size())
@@ -1149,13 +1149,13 @@ void printLoopControl(OpAsmPrinter &p, Operation *op, Region &region,
   auto args = region.front().getArguments();
   p << " (";
   unsigned numLoops = steps.size();
-  for (unsigned i=0; i<numLoops; i++) {
+  for (unsigned i = 0; i < numLoops; i++) {
     if (i != 0)
       p << ", ";
     p << args[i];
   }
-  p << ") : " << args[0].getType() << " = (" << lowerBound
-    << ") to (" << upperBound << ") ";
+  p << ") : " << args[0].getType() << " = (" << lowerBound << ") to ("
+    << upperBound << ") ";
   if (inclusive)
     p << "inclusive ";
   p << "step (" << steps << ") ";
@@ -1338,7 +1338,7 @@ void WsLoopOp::build(OpBuilder &builder, OperationState &state,
   build(builder, state, lowerBound, upperBound, step,
         /*linear_vars=*/ValueRange(),
         /*linear_step_vars=*/ValueRange(), /*private_vars=*/ValueRange(),
-	/*reduction_vars=*/ValueRange(),
+        /*reduction_vars=*/ValueRange(),
         /*reductions=*/nullptr, /*schedule_val=*/nullptr,
         /*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr,
         /*simd_modifier=*/false, /*nowait=*/false, /*ordered_val=*/nullptr,
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 26ec12e427..3c3a5278df 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -834,17 +834,18 @@ static void collectReductionInfo(
 }
 
 /// Allocate space for privatized reduction variables.
-void
-allocPrivatizationVars(omp::WsLoopOp loop, llvm::IRBuilderBase &builder,
-                   LLVM::ModuleTranslation &moduleTranslation,
-                   llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
+void allocPrivatizationVars(omp::WsLoopOp loop, llvm::IRBuilderBase &builder,
+                            LLVM::ModuleTranslation &moduleTranslation,
+                            llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
   unsigned offset = loop.getNumLoops();
   unsigned numArgs = loop.getRegion().front().getNumArguments();
   llvm::IRBuilderBase::InsertPointGuard guard(builder);
   builder.restoreIP(allocaIP);
   for (unsigned i = offset; i < numArgs; ++i) {
-    if (auto op = loop.getPrivates()[i-offset].getDefiningOp<LLVM::AllocaOp>()) {
-      llvm::Value *var = builder.CreateAlloca(moduleTranslation.convertType(op.getResultPtrElementType()));
+    if (auto op =
+            loop.getPrivates()[i - offset].getDefiningOp<LLVM::AllocaOp>()) {
+      llvm::Value *var = builder.CreateAlloca(
+          moduleTranslation.convertType(op.getResultPtrElementType()));
       //    moduleTranslation.convertType(loop.getPrivates()[i-offset].getType()));
       moduleTranslation.mapValue(loop.getRegion().front().getArgument(i), var);
     }

clementval · 2023-12-18T18:48:15Z

Nice to see some work towards more information carried out in the dialect!

kiranchandramohan · 2023-12-21T16:04:58Z

This is an alternative to #74843.

But looks like this patch alone is not sufficient. But we can change the codegen of worksharing loop to emit the alloca in the parallel body function. Alternatively, since the alloca for the index variable is being materialized late, we can add the lifetime start and end at the point. @skatrak

skatrak · 2024-01-04T13:08:17Z

I think this approach makes sense. If I understand it correctly, this would add an explicit dialect representation for the private clause of the OpenMP for/do loop and use it internally to privatize the loop index, which allows us only materializing the copy later during MLIR to LLVM IR translation. In that case, I think the best option is to create the alloca inside of the outlined function for the parallel region and then add lifetime markers similarly to what I proposed in #74843, but more simply since we would already have access to the relevant allocas there, and there would be no need of analyzing the MLIR loop body.

Ideally, if we were to outline the loop body into its own function, the best would be to create the alloca there and not have to deal with lifetime markers. But the process we have for this relies on using the code extractor on a loop that has already been created inside of the outlined function for the parallel section, so we either mark the allocation's lifetime so it can be sunk into the new outlined function or we generate it inside of the loop. I think the second case would result in a very slow execution or possibly even memory leaks when this second outlining is not done, so the first one seems like the safer option. Let me know if you'd like to explore that approach and I can update my patch / create another one to build on top of this.

I guess the idea would be to add the private, firstprivate and lastprivate args to all applicable OpenMP MLIR ops, and potentially modify the DataSharingProcessor to make it update these mappings as well?

[Flang][MLIR][OpenMP] WIP: Privatisation for index variables

2d6e1c4

kiranchandramohan force-pushed the privat_arg branch from f6c8748 to 2d6e1c4 Compare December 18, 2023 17:57

kiranchandramohan changed the title ~~WIP : Delayed privatisation of index variables~~ WIP : [Flang][MLIR][OpenMP] Delayed privatisation of index variables Dec 18, 2023

clementval mentioned this pull request Jan 22, 2024

[flang][OpenMP] Add support for target ... private #78968

Closed

kiranchandramohan mentioned this pull request Feb 5, 2024

[WIP] Delayed privatization. #79862

Closed

kiranchandramohan closed this Apr 18, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

WIP : [Flang][MLIR][OpenMP] Delayed privatisation of index variables #75836

WIP : [Flang][MLIR][OpenMP] Delayed privatisation of index variables #75836

kiranchandramohan commented Dec 18, 2023

github-actions bot commented Dec 18, 2023

clementval commented Dec 18, 2023

kiranchandramohan commented Dec 21, 2023

skatrak commented Jan 4, 2024

WIP : [Flang][MLIR][OpenMP] Delayed privatisation of index variables #75836

WIP : [Flang][MLIR][OpenMP] Delayed privatisation of index variables #75836

Conversation

kiranchandramohan commented Dec 18, 2023

github-actions bot commented Dec 18, 2023

clementval commented Dec 18, 2023

kiranchandramohan commented Dec 21, 2023

skatrak commented Jan 4, 2024