16 changes: 10 additions & 6 deletions flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Optimizer/Transforms/Utils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Dominance.h"
Expand Down Expand Up @@ -482,8 +483,9 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
// Generate a loop nest looping around the hlfir.elemental shape and clone
// hlfir.elemental region inside the inner loop
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
flangomp::shouldUseWorkshareLowering(elemental));
builder.setInsertionPointToStart(loopNest.body);
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
loopNest.oneBasedIndices);
hlfir::Entity elementValue{yield.getElementValue()};
Expand Down Expand Up @@ -553,8 +555,9 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
flangomp::shouldUseWorkshareLowering(assign));
builder.setInsertionPointToStart(loopNest.body);
auto arrayElement =
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
builder.create<hlfir::AssignOp>(loc, rhs, arrayElement);
Expand Down Expand Up @@ -648,8 +651,9 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
flangomp::shouldUseWorkshareLowering(assign));
builder.setInsertionPointToStart(loopNest.body);
auto rhsArrayElement =
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
Expand Down
1 change: 1 addition & 0 deletions flang/lib/Optimizer/OpenMP/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ add_flang_library(FlangOpenMPTransforms
FunctionFiltering.cpp
MapInfoFinalization.cpp
MarkDeclareTarget.cpp
LowerWorkshare.cpp

DEPENDS
FIRDialect
Expand Down
527 changes: 527 additions & 0 deletions flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions flang/lib/Optimizer/Passes/Pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
/// \param pm - MLIR pass manager that will hold the pipeline definition
/// \param optLevel - optimization level used for creating FIR optimization
/// passes pipeline
void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP,
llvm::OptimizationLevel optLevel) {
if (optLevel.isOptimizingForSpeed()) {
addCanonicalizerPassWithoutRegionSimplification(pm);
Expand All @@ -230,6 +230,8 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
pm.addPass(hlfir::createLowerHLFIRIntrinsics());
pm.addPass(hlfir::createBufferizeHLFIR());
pm.addPass(hlfir::createConvertHLFIRtoFIR());
if (enableOpenMP)
pm.addPass(flangomp::createLowerWorkshare());
}

/// Create a pass pipeline for handling certain OpenMP transformations needed
Expand Down Expand Up @@ -302,7 +304,7 @@ void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
void createMLIRToLLVMPassPipeline(mlir::PassManager &pm,
MLIRToLLVMPassPipelineConfig &config,
llvm::StringRef inputFilename) {
fir::createHLFIRToFIRPassPipeline(pm, config.OptLevel);
fir::createHLFIRToFIRPassPipeline(pm, config.EnableOpenMP, config.OptLevel);

// Add default optimizer pass pipeline.
fir::createDefaultFIROptimizerPassPipeline(pm, config);
Expand Down
1 change: 1 addition & 0 deletions flang/test/Fir/basic-program.fir
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func.func @_QQmain() {
// PASSES-NEXT: LowerHLFIRIntrinsics
// PASSES-NEXT: BufferizeHLFIR
// PASSES-NEXT: ConvertHLFIRtoFIR
// PASSES-NEXT: LowerWorkshare
// PASSES-NEXT: CSE
// PASSES-NEXT: (S) 0 num-cse'd - Number of operations CSE'd
// PASSES-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
Expand Down
57 changes: 57 additions & 0 deletions flang/test/HLFIR/bufferize-workshare.fir
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s

// CHECK-LABEL: func.func @simple(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) {
// CHECK: omp.parallel {
// CHECK: omp.workshare {
// CHECK: %[[VAL_1:.*]] = arith.constant 42 : index
// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32
// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
// CHECK: %[[VAL_7:.*]] = arith.constant true
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
// CHECK: omp.workshare.loop_wrapper {
// CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) {
// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_2]] : i32
// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] temporary_lhs : i32, !fir.ref<i32>
// CHECK: omp.yield
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
// CHECK: hlfir.assign %[[VAL_6]]#0 to %[[VAL_4]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
// CHECK: fir.freemem %[[VAL_6]]#0 : !fir.heap<!fir.array<42xi32>>
// CHECK: omp.terminator
// CHECK: }
// CHECK: omp.terminator
// CHECK: }
// CHECK: return
// CHECK: }
func.func @simple(%arg: !fir.ref<!fir.array<42xi32>>) {
omp.parallel {
omp.workshare {
%c42 = arith.constant 42 : index
%c1_i32 = arith.constant 1 : i32
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
^bb0(%i: index):
%ref = hlfir.designate %array#0 (%i) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
%val = fir.load %ref : !fir.ref<i32>
%sub = arith.subi %val, %c1_i32 : i32
hlfir.yield_element %sub : i32
}
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
hlfir.destroy %elemental : !hlfir.expr<42xi32>
omp.terminator
}
omp.terminator
}
return
}
34 changes: 34 additions & 0 deletions flang/test/Integration/OpenMP/workshare-array-array-assign.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
!===----------------------------------------------------------------------===!
! This directory can be used to add Integration tests involving multiple
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
! contain executable tests. We should only add tests here sparingly and only
! if there is no other way to test. Repeat this message in each test that is
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!

!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR

subroutine sb1(x, y)
integer :: x(:)
integer :: y(:)
!$omp parallel workshare
x = y
!$omp end parallel workshare
end subroutine

! HLFIR: omp.parallel {
! HLFIR: omp.workshare {
! HLFIR: hlfir.assign
! HLFIR: omp.terminator
! HLFIR: }
! HLFIR: omp.terminator
! HLFIR: }

! FIR: omp.parallel {
! FIR: omp.wsloop nowait {
! FIR: omp.loop_nest
! FIR: }
! FIR: omp.barrier
! FIR: omp.terminator
! FIR: }
57 changes: 57 additions & 0 deletions flang/test/Integration/OpenMP/workshare-axpy.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
!===----------------------------------------------------------------------===!
! This directory can be used to add Integration tests involving multiple
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
! contain executable tests. We should only add tests here sparingly and only
! if there is no other way to test. Repeat this message in each test that is
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!

!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR

subroutine sb1(a, x, y, z)
integer :: a
integer :: x(:)
integer :: y(:)
integer :: z(:)
!$omp parallel workshare
z = a * x + y
!$omp end parallel workshare
end subroutine

! HLFIR: func.func @_QPsb1
! HLFIR: omp.parallel {
! HLFIR: omp.workshare {
! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
! HLFIR: hlfir.assign
! HLFIR: hlfir.destroy
! HLFIR: hlfir.destroy
! HLFIR-NOT: omp.barrier
! HLFIR: omp.terminator
! HLFIR: }
! HLFIR-NOT: omp.barrier
! HLFIR: omp.terminator
! HLFIR: }
! HLFIR: return
! HLFIR: }
! HLFIR:}


! FIR: func.func private @_workshare_copy_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>
! FIR: func.func private @_workshare_copy_i32(%{{[a-z0-9]+}}: !fir.ref<i32>, %{{[a-z0-9]+}}: !fir.ref<i32>

! FIR: func.func @_QPsb1
! FIR: omp.parallel {
! FIR: omp.single copyprivate(%9 -> @_workshare_copy_i32 : !fir.ref<i32>, %10 -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
! FIR: fir.allocmem
! FIR: omp.wsloop {
! FIR: omp.loop_nest
! FIR: omp.single nowait {
! FIR: fir.call @_FortranAAssign
! FIR: fir.freemem
! FIR: omp.terminator
! FIR: }
! FIR: omp.barrier
! FIR: omp.terminator
! FIR: }
45 changes: 45 additions & 0 deletions flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
!===----------------------------------------------------------------------===!
! This directory can be used to add Integration tests involving multiple
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
! contain executable tests. We should only add tests here sparingly and only
! if there is no other way to test. Repeat this message in each test that is
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!

!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR

subroutine sb1(a, x)
integer :: a
integer :: x(:)
!$omp parallel workshare
x = a
!$omp end parallel workshare
end subroutine

! HLFIR: omp.parallel {
! HLFIR: omp.workshare {
! HLFIR: %[[SCALAR:.*]] = fir.load %1#0 : !fir.ref<i32>
! HLFIR: hlfir.assign %[[SCALAR]] to
! HLFIR: omp.terminator
! HLFIR: }
! HLFIR: omp.terminator
! HLFIR: }

! FIR: omp.parallel {
! FIR: %[[SCALAR_ALLOCA:.*]] = fir.alloca i32
! FIR: omp.single copyprivate(%[[SCALAR_ALLOCA]] -> @_workshare_copy_i32 : !fir.ref<i32>) {
! FIR: %[[SCALAR_LOAD:.*]] = fir.load %{{.*}} : !fir.ref<i32>
! FIR: fir.store %[[SCALAR_LOAD]] to %[[SCALAR_ALLOCA]] : !fir.ref<i32>
! FIR: omp.terminator
! FIR: }
! FIR: %[[SCALAR_RELOAD:.*]] = fir.load %[[SCALAR_ALLOCA]] : !fir.ref<i32>
! FIR: %6:3 = fir.box_dims %3, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
! FIR: omp.wsloop nowait {
! FIR: omp.loop_nest (%arg2) : index = (%c1) to (%6#1) inclusive step (%c1) {
! FIR: fir.store %[[SCALAR_RELOAD]]
! FIR: omp.yield
! FIR: }
! FIR: }
! FIR: omp.barrier
! FIR: omp.terminator
65 changes: 65 additions & 0 deletions flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
!===----------------------------------------------------------------------===!
! This directory can be used to add Integration tests involving multiple
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
! contain executable tests. We should only add tests here sparingly and only
! if there is no other way to test. Repeat this message in each test that is
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!

!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR-O3
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR-O3

!RUN: %flang_fc1 -emit-hlfir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix HLFIR-O0
!RUN: %flang_fc1 -emit-fir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix FIR-O0

program test
real :: arr_01(10)
!$omp parallel workshare
arr_01 = arr_01*2
!$omp end parallel workshare
end program

! HLFIR-O3: omp.parallel {
! HLFIR-O3: omp.workshare {
! HLFIR-O3: hlfir.elemental
! HLFIR-O3: hlfir.assign
! HLFIR-O3: hlfir.destroy
! HLFIR-O3: omp.terminator
! HLFIR-O3: omp.terminator

! FIR-O3: omp.parallel {
! FIR-O3: omp.wsloop nowait {
! FIR-O3: omp.loop_nest
! FIR-O3: omp.barrier
! FIR-O3: omp.terminator

! HLFIR-O0: omp.parallel {
! HLFIR-O0: omp.workshare {
! HLFIR-O0: hlfir.elemental
! HLFIR-O0: hlfir.assign
! HLFIR-O0: hlfir.destroy
! HLFIR-O0: omp.terminator
! HLFIR-O0: omp.terminator

! Check the copyprivate copy function
! FIR-O0: func.func private @_workshare_copy_heap_{{.*}}(%[[DST:.*]]: {{.*}}, %[[SRC:.*]]: {{.*}})
! FIR-O0: fir.load %[[SRC]]
! FIR-O0: fir.store {{.*}} to %[[DST]]

! Check that we properly handle the temporary array
! FIR-O0: omp.parallel {
! FIR-O0: %[[CP:.*]] = fir.alloca !fir.heap<!fir.array<10xf32>>
! FIR-O0: omp.single copyprivate(%[[CP]] -> @_workshare_copy_heap_
! FIR-O0: fir.allocmem
! FIR-O0: fir.store
! FIR-O0: omp.terminator
! FIR-O0: fir.load %[[CP]]
! FIR-O0: omp.wsloop {
! FIR-O0: omp.loop_nest
! FIR-O0: omp.yield
! FIR-O0: omp.single nowait {
! FIR-O0: fir.call @_FortranAAssign
! FIR-O0: fir.freemem
! FIR-O0: omp.terminator
! FIR-O0: omp.barrier
! FIR-O0: omp.terminator
6 changes: 3 additions & 3 deletions flang/test/Lower/OpenMP/workshare.f90
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ subroutine sb1(arr)
integer :: arr(:)
!CHECK: omp.parallel {
!$omp parallel
!CHECK: omp.single {
!CHECK: omp.workshare {
!$omp workshare
arr = 0
!$omp end workshare
Expand All @@ -20,7 +20,7 @@ subroutine sb2(arr)
integer :: arr(:)
!CHECK: omp.parallel {
!$omp parallel
!CHECK: omp.single nowait {
!CHECK: omp.workshare nowait {
!$omp workshare
arr = 0
!$omp end workshare nowait
Expand All @@ -33,7 +33,7 @@ subroutine sb2(arr)
subroutine sb3(arr)
integer :: arr(:)
!CHECK: omp.parallel {
!CHECK: omp.single {
!CHECK: omp.workshare {
!$omp parallel workshare
arr = 0
!$omp end parallel workshare
Expand Down
53 changes: 53 additions & 0 deletions flang/test/Transforms/OpenMP/lower-workshare-alloca.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s | FileCheck %s

// Checks that fir.alloca is hoisted out and copyprivate'd
func.func @wsfunc() {
omp.workshare {
%c1 = arith.constant 1 : index
%c42 = arith.constant 42 : index
%c1_i32 = arith.constant 1 : i32
%alloc = fir.alloca i32
fir.store %c1_i32 to %alloc : !fir.ref<i32>
omp.workshare.loop_wrapper {
omp.loop_nest (%arg1) : index = (%c1) to (%c42) inclusive step (%c1) {
"test.test1"(%alloc) : (!fir.ref<i32>) -> ()
omp.yield
}
}
"test.test2"(%alloc) : (!fir.ref<i32>) -> ()
omp.terminator
}
return
}

// CHECK-LABEL: func.func private @_workshare_copy_i32(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<i32>,
// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32>) {
// CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
// CHECK: fir.store %[[VAL_2]] to %[[VAL_0]] : !fir.ref<i32>
// CHECK: return
// CHECK: }

// CHECK-LABEL: func.func @wsfunc() {
// CHECK: %[[VAL_0:.*]] = fir.alloca i32
// CHECK: omp.single copyprivate(%[[VAL_0]] -> @_workshare_copy_i32 : !fir.ref<i32>) {
// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32
// CHECK: fir.store %[[VAL_1]] to %[[VAL_0]] : !fir.ref<i32>
// CHECK: omp.terminator
// CHECK: }
// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_3:.*]] = arith.constant 42 : index
// CHECK: omp.wsloop {
// CHECK: omp.loop_nest (%[[VAL_4:.*]]) : index = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_2]]) {
// CHECK: "test.test1"(%[[VAL_0]]) : (!fir.ref<i32>) -> ()
// CHECK: omp.yield
// CHECK: }
// CHECK: }
// CHECK: omp.single nowait {
// CHECK: "test.test2"(%[[VAL_0]]) : (!fir.ref<i32>) -> ()
// CHECK: omp.terminator
// CHECK: }
// CHECK: omp.barrier
// CHECK: return
// CHECK: }

49 changes: 49 additions & 0 deletions flang/test/Transforms/OpenMP/lower-workshare-binding.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// RUN: fir-opt --split-input-file --lower-workshare --allow-unregistered-dialect %s | FileCheck %s

// Checks that the omp.workshare.loop_wrapper binds to the correct omp.workshare

func.func @wsfunc() {
%c1 = arith.constant 1 : index
%c42 = arith.constant 42 : index
omp.parallel {
omp.workshare nowait {
omp.parallel {
omp.workshare nowait {
omp.workshare.loop_wrapper {
omp.loop_nest (%arg1) : index = (%c1) to (%c42) inclusive step (%c1) {
"test.test2"() : () -> ()
omp.yield
}
}
omp.terminator
}
omp.terminator
}
omp.terminator
}
omp.terminator
}
return
}

// CHECK-LABEL: func.func @wsfunc() {
// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_1:.*]] = arith.constant 42 : index
// CHECK: omp.parallel {
// CHECK: omp.single nowait {
// CHECK: omp.parallel {
// CHECK: omp.wsloop nowait {
// CHECK: omp.loop_nest (%[[VAL_2:.*]]) : index = (%[[VAL_0]]) to (%[[VAL_1]]) inclusive step (%[[VAL_0]]) {
// CHECK: "test.test2"() : () -> ()
// CHECK: omp.yield
// CHECK: }
// CHECK: }
// CHECK: omp.terminator
// CHECK: }
// CHECK: omp.terminator
// CHECK: }
// CHECK: omp.terminator
// CHECK: }
// CHECK: return
// CHECK: }

57 changes: 57 additions & 0 deletions flang/test/Transforms/OpenMP/lower-workshare-cleanup.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// RUN: fir-opt --split-input-file --lower-workshare --allow-unregistered-dialect %s | FileCheck %s

// Check that we cleanup unused pure operations from the parallel and single
// regions

// CHECK-LABEL: func.func @wsfunc() {
// CHECK: %[[VAL_0:.*]] = fir.alloca i32
// CHECK: omp.parallel {
// CHECK: omp.single {
// CHECK: %[[VAL_1:.*]] = "test.test1"() : () -> i32
// CHECK: %[[VAL_2:.*]] = arith.constant 2 : index
// CHECK: %[[VAL_3:.*]] = arith.constant 3 : index
// CHECK: %[[VAL_4:.*]] = arith.addi %[[VAL_2]], %[[VAL_3]] : index
// CHECK: "test.test3"(%[[VAL_4]]) : (index) -> ()
// CHECK: omp.terminator
// CHECK: }
// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_6:.*]] = arith.constant 42 : index
// CHECK: omp.wsloop nowait {
// CHECK: omp.loop_nest (%[[VAL_7:.*]]) : index = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_5]]) {
// CHECK: "test.test2"() : () -> ()
// CHECK: omp.yield
// CHECK: }
// CHECK: }
// CHECK: omp.barrier
// CHECK: omp.terminator
// CHECK: }
// CHECK: return
// CHECK: }
func.func @wsfunc() {
%a = fir.alloca i32
omp.parallel {
omp.workshare {
%t1 = "test.test1"() : () -> i32

%c1 = arith.constant 1 : index
%c42 = arith.constant 42 : index

%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
%add = arith.addi %c2, %c3 : index
"test.test3"(%add) : (index) -> ()

omp.workshare.loop_wrapper {
omp.loop_nest (%arg1) : index = (%c1) to (%c42) inclusive step (%c1) {
"test.test2"() : () -> ()
omp.yield
}
}
omp.terminator
}
omp.terminator
}
return
}


73 changes: 73 additions & 0 deletions flang/test/Transforms/OpenMP/lower-workshare-copyprivate.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// RUN: fir-opt --split-input-file --lower-workshare --allow-unregistered-dialect %s | FileCheck %s


// Check if we store the correct values

func.func @wsfunc() {
omp.parallel {
// CHECK: fir.alloca
// CHECK: fir.alloca
// CHECK: fir.alloca
// CHECK: fir.alloca
// CHECK: fir.alloca
// CHECK-NOT: fir.alloca
omp.workshare {

%t1 = "test.test1"() : () -> i32
// CHECK: %[[T1:.*]] = "test.test1"
// CHECK: fir.store %[[T1]]
%t2 = "test.test2"() : () -> i32
// CHECK: %[[T2:.*]] = "test.test2"
// CHECK: fir.store %[[T2]]
%t3 = "test.test3"() : () -> i32
// CHECK: %[[T3:.*]] = "test.test3"
// CHECK-NOT: fir.store %[[T3]]
%t4 = "test.test4"() : () -> i32
// CHECK: %[[T4:.*]] = "test.test4"
// CHECK: fir.store %[[T4]]
%t5 = "test.test5"() : () -> i32
// CHECK: %[[T5:.*]] = "test.test5"
// CHECK: fir.store %[[T5]]
%t6 = "test.test6"() : () -> i32
// CHECK: %[[T6:.*]] = "test.test6"
// CHECK-NOT: fir.store %[[T6]]


"test.test1"(%t1) : (i32) -> ()
"test.test1"(%t2) : (i32) -> ()
"test.test1"(%t3) : (i32) -> ()

%true = arith.constant true
fir.if %true {
"test.test2"(%t3) : (i32) -> ()
}

%c1_i32 = arith.constant 1 : i32

%t5_pure_use = arith.addi %t5, %c1_i32 : i32

%t6_mem_effect_use = "test.test8"(%t6) : (i32) -> i32
// CHECK: %[[T6_USE:.*]] = "test.test8"
// CHECK: fir.store %[[T6_USE]]

%c42 = arith.constant 42 : index
%c1 = arith.constant 1 : index
omp.workshare.loop_wrapper {
omp.loop_nest (%arg1) : index = (%c1) to (%c42) inclusive step (%c1) {
"test.test10"(%t1) : (i32) -> ()
"test.test10"(%t5_pure_use) : (i32) -> ()
"test.test10"(%t6_mem_effect_use) : (i32) -> ()
omp.yield
}
}

"test.test10"(%t2) : (i32) -> ()
fir.if %true {
"test.test10"(%t4) : (i32) -> ()
}
omp.terminator
}
omp.terminator
}
return
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s | FileCheck %s

// Check that the safe to parallelize `fir.declare` op will not be parallelized
// due to its operand %alloc not being reloaded outside the omp.single.

func.func @foo() {
%c0 = arith.constant 0 : index
omp.workshare {
%alloc = fir.allocmem !fir.array<?xf32>, %c0 {bindc_name = ".tmp.forall", uniq_name = ""}
%shape = fir.shape %c0 : (index) -> !fir.shape<1>
%declare = fir.declare %alloc(%shape) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xf32>>
fir.freemem %alloc : !fir.heap<!fir.array<?xf32>>
omp.terminator
}
return
}

// CHECK: omp.single nowait
// CHECK: fir.allocmem
// CHECK: fir.shape
// CHECK: fir.declare
// CHECK: fir.freemem
// CHECK: omp.terminator
// CHECK: }
// CHECK: omp.barrier
19 changes: 19 additions & 0 deletions flang/test/Transforms/OpenMP/lower-workshare-no-single.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// RUN: fir-opt --split-input-file --lower-workshare --allow-unregistered-dialect %s | FileCheck %s

// Check that we do not emit an omp.single for the constant operation

func.func @foo() {
omp.workshare {
%c1 = arith.constant 1 : index
omp.workshare.loop_wrapper {
omp.loop_nest (%arg1) : index = (%c1) to (%c1) inclusive step (%c1) {
"test.test0"() : () -> ()
omp.yield
}
}
omp.terminator
}
return
}

// CHECK-NOT: omp.single
23 changes: 23 additions & 0 deletions flang/test/Transforms/OpenMP/lower-workshare-nowait.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// RUN: fir-opt --split-input-file --lower-workshare --allow-unregistered-dialect %s | FileCheck %s

// Check that we correctly handle nowait

// CHECK-LABEL: func.func @nonowait
func.func @nonowait(%arg0: !fir.ref<!fir.array<42xi32>>) {
// CHECK: omp.barrier
omp.workshare {
omp.terminator
}
return
}

// -----

// CHECK-LABEL: func.func @nowait
func.func @nowait(%arg0: !fir.ref<!fir.array<42xi32>>) {
// CHECK-NOT: omp.barrier
omp.workshare nowait {
omp.terminator
}
return
}
26 changes: 26 additions & 0 deletions flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s 2>&1 | FileCheck %s

// CHECK: warning: omp workshare with unstructured control flow is currently unsupported and will be serialized.

// CHECK: omp.parallel
// CHECK-NEXT: omp.single

// TODO Check that the definition of %r dominates its use post-transform
func.func @wsfunc() {
%a = fir.alloca i32
omp.parallel {
omp.workshare {
^bb1:
%c1 = arith.constant 1 : i32
cf.br ^bb3(%c1: i32)
^bb2:
"test.test2"(%r) : (i32) -> ()
omp.terminator
^bb3(%arg1: i32):
%r = "test.test2"(%arg1) : (i32) -> i32
cf.br ^bb2
}
omp.terminator
}
return
}
23 changes: 23 additions & 0 deletions flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s 2>&1 | FileCheck %s

// CHECK: warning: omp workshare with unstructured control flow is currently unsupported and will be serialized.

// CHECK: omp.parallel
// CHECK-NEXT: omp.single

// TODO Check transforming a simple CFG
func.func @wsfunc() {
%a = fir.alloca i32
omp.parallel {
omp.workshare {
^bb1:
%c1 = arith.constant 1 : i32
cf.br ^bb3(%c1: i32)
^bb3(%arg1: i32):
"test.test2"(%arg1) : (i32) -> ()
omp.terminator
}
omp.terminator
}
return
}
162 changes: 162 additions & 0 deletions flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s

// Checks that we correctly identify when to use the lowering to
// omp.workshare.loop_wrapper

// CHECK-LABEL: @should_parallelize_0
// CHECK: omp.workshare.loop_wrapper
func.func @should_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
omp.workshare {
%c42 = arith.constant 42 : index
%c1_i32 = arith.constant 1 : i32
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
^bb0(%i: index):
hlfir.yield_element %c1_i32 : i32
}
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
hlfir.destroy %elemental : !hlfir.expr<42xi32>
omp.terminator
}
return
}

// CHECK-LABEL: @should_parallelize_1
// CHECK: omp.workshare.loop_wrapper
func.func @should_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
omp.parallel {
omp.workshare {
%c42 = arith.constant 42 : index
%c1_i32 = arith.constant 1 : i32
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
^bb0(%i: index):
hlfir.yield_element %c1_i32 : i32
}
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
hlfir.destroy %elemental : !hlfir.expr<42xi32>
omp.terminator
}
omp.terminator
}
return
}


// CHECK-LABEL: @should_not_parallelize_0
// CHECK-NOT: omp.workshare.loop_wrapper
func.func @should_not_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
omp.workshare {
omp.single {
%c42 = arith.constant 42 : index
%c1_i32 = arith.constant 1 : i32
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
^bb0(%i: index):
hlfir.yield_element %c1_i32 : i32
}
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
hlfir.destroy %elemental : !hlfir.expr<42xi32>
omp.terminator
}
omp.terminator
}
return
}

// CHECK-LABEL: @should_not_parallelize_1
// CHECK-NOT: omp.workshare.loop_wrapper
func.func @should_not_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
omp.workshare {
omp.critical {
%c42 = arith.constant 42 : index
%c1_i32 = arith.constant 1 : i32
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
^bb0(%i: index):
hlfir.yield_element %c1_i32 : i32
}
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
hlfir.destroy %elemental : !hlfir.expr<42xi32>
omp.terminator
}
omp.terminator
}
return
}

// CHECK-LABEL: @should_not_parallelize_2
// CHECK-NOT: omp.workshare.loop_wrapper
func.func @should_not_parallelize_2(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
omp.workshare {
omp.parallel {
%c42 = arith.constant 42 : index
%c1_i32 = arith.constant 1 : i32
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
^bb0(%i: index):
hlfir.yield_element %c1_i32 : i32
}
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
hlfir.destroy %elemental : !hlfir.expr<42xi32>
omp.terminator
}
omp.terminator
}
return
}

// CHECK-LABEL: @should_not_parallelize_3
// CHECK-NOT: omp.workshare.loop_wrapper
func.func @should_not_parallelize_3(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
omp.workshare {
omp.parallel {
omp.workshare {
omp.parallel {
%c42 = arith.constant 42 : index
%c1_i32 = arith.constant 1 : i32
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
^bb0(%i: index):
hlfir.yield_element %c1_i32 : i32
}
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
hlfir.destroy %elemental : !hlfir.expr<42xi32>
omp.terminator
}
omp.terminator
}
omp.terminator
}
omp.terminator
}
return
}

// CHECK-LABEL: @should_not_parallelize_4
// CHECK-NOT: omp.workshare.loop_wrapper
func.func @should_not_parallelize_4(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
omp.workshare {
^bb1:
%c42 = arith.constant 42 : index
%c1_i32 = arith.constant 1 : i32
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
^bb0(%i: index):
hlfir.yield_element %c1_i32 : i32
}
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
hlfir.destroy %elemental : !hlfir.expr<42xi32>
cf.br ^bb2
^bb2:
omp.terminator
}
return
}
5 changes: 4 additions & 1 deletion flang/tools/bbc/bbc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,8 @@ static llvm::LogicalResult convertFortranSourceToMLIR(

if (emitFIR && useHLFIR) {
// lower HLFIR to FIR
fir::createHLFIRToFIRPassPipeline(pm, llvm::OptimizationLevel::O2);
fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP,
llvm::OptimizationLevel::O2);
if (mlir::failed(pm.run(mlirModule))) {
llvm::errs() << "FATAL: lowering from HLFIR to FIR failed";
return mlir::failure();
Expand All @@ -467,6 +468,8 @@ static llvm::LogicalResult convertFortranSourceToMLIR(

// Add O2 optimizer pass pipeline.
MLIRToLLVMPassPipelineConfig config(llvm::OptimizationLevel::O2);
if (enableOpenMP)
config.EnableOpenMP = true;
config.NSWOnLoopVarInc = setNSW;
fir::registerDefaultInlinerPass(config);
fir::createDefaultFIROptimizerPassPipeline(pm, config);
Expand Down
1 change: 1 addition & 0 deletions flang/tools/tco/tco.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
return mlir::failure();
} else {
MLIRToLLVMPassPipelineConfig config(llvm::OptimizationLevel::O2);
config.EnableOpenMP = true; // assume the input contains OpenMP
config.AliasAnalysis = true; // enabled when optimizing for speed
if (codeGenLLVM) {
// Run only CodeGen passes.
Expand Down
2 changes: 2 additions & 0 deletions mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1937,6 +1937,8 @@ void WorkshareOp::build(OpBuilder &builder, OperationState &state,
LogicalResult WorkshareLoopWrapperOp::verify() {
if (!(*this)->getParentOfType<WorkshareOp>())
return emitError() << "must be nested in an omp.workshare";
if (getNestedWrapper())
return emitError() << "cannot be composite";
return success();
}

Expand Down
11 changes: 4 additions & 7 deletions mlir/test/Dialect/OpenMP/invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -2581,15 +2581,13 @@ func.func @omp_taskloop_invalid_composite(%lb: index, %ub: index, %step: index)
// -----
func.func @nested_wrapper(%idx : index) {
omp.workshare {
// expected-error @below {{nested wrappers not supported}}
// expected-error @below {{cannot be composite}}
omp.workshare.loop_wrapper {
omp.simd {
omp.loop_nest (%iv) : index = (%idx) to (%idx) step (%idx) {
omp.yield
}
omp.terminator
}
omp.terminator
} {omp.composite}
}
omp.terminator
}
Expand All @@ -2599,9 +2597,9 @@ func.func @nested_wrapper(%idx : index) {
// -----
func.func @not_wrapper() {
omp.workshare {
// expected-error @below {{must be a loop wrapper}}
// expected-error @below {{op nested in loop wrapper is not another loop wrapper or `omp.loop_nest`}}
omp.workshare.loop_wrapper {
omp.terminator
%0 = arith.constant 0 : index
}
omp.terminator
}
Expand All @@ -2615,7 +2613,6 @@ func.func @missing_workshare(%idx : index) {
omp.loop_nest (%iv) : index = (%idx) to (%idx) step (%idx) {
omp.yield
}
omp.terminator
}
return
}